xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 82094794e2b9d059cc8370a7dbd4702a5e945ede)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252   else         (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253   (*fact)->ops->solveadd           = MatSolveAdd_SeqAIJ;
254   (*fact)->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
255   (*fact)->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
256 
257   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267 {
268   PetscFunctionBegin;
269   *flg = PETSC_TRUE;
270   PetscFunctionReturn(0);
271 }
272 EXTERN_C_END
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278 {
279   PetscInt           n = A->rmap->n;
280   PetscErrorCode     ierr;
281 
282   PetscFunctionBegin;
283   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
284   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
285   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
286     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
287     if (ftype == MAT_FACTOR_ILU) {
288       (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
289     } else {
290       (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
291     }
292   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
293     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
294     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
295     if (ftype == MAT_FACTOR_ICC) {
296       (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
297     } else {
298       (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
299     }
300   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
301   (*B)->factor = ftype;
302   PetscFunctionReturn(0);
303 }
304 EXTERN_C_END
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
308 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
309 {
310   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
311   IS                 isicol;
312   PetscErrorCode     ierr;
313   const PetscInt     *r,*ic;
314   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
315   PetscInt           *bi,*bj,*ajtmp;
316   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
317   PetscReal          f;
318   PetscInt           nlnk,*lnk,k,**bi_ptr;
319   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
320   PetscBT            lnkbt;
321 
322   PetscFunctionBegin;
323   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
324   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
325   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
326   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
327 
328   /* get new row pointers */
329   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
330   bi[0] = 0;
331 
332   /* bdiag is location of diagonal in factor */
333   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
334   bdiag[0] = 0;
335 
336   /* linked list for storing column indices of the active row */
337   nlnk = n + 1;
338   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
339 
340   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
341 
342   /* initial FreeSpace size is f*(ai[n]+1) */
343   f = info->fill;
344   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
345   current_space = free_space;
346 
347   for (i=0; i<n; i++) {
348     /* copy previous fill into linked list */
349     nzi = 0;
350     nnz = ai[r[i]+1] - ai[r[i]];
351     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
352     ajtmp = aj + ai[r[i]];
353     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
354     nzi += nlnk;
355 
356     /* add pivot rows into linked list */
357     row = lnk[n];
358     while (row < i) {
359       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
360       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
361       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
362       nzi += nlnk;
363       row  = lnk[row];
364     }
365     bi[i+1] = bi[i] + nzi;
366     im[i]   = nzi;
367 
368     /* mark bdiag */
369     nzbd = 0;
370     nnz  = nzi;
371     k    = lnk[n];
372     while (nnz-- && k < i){
373       nzbd++;
374       k = lnk[k];
375     }
376     bdiag[i] = bi[i] + nzbd;
377 
378     /* if free space is not available, make more free space */
379     if (current_space->local_remaining<nzi) {
380       nnz = (n - i)*nzi; /* estimated and max additional space needed */
381       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
382       reallocs++;
383     }
384 
385     /* copy data into free space, then initialize lnk */
386     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
387     bi_ptr[i] = current_space->array;
388     current_space->array           += nzi;
389     current_space->local_used      += nzi;
390     current_space->local_remaining -= nzi;
391   }
392 #if defined(PETSC_USE_INFO)
393   if (ai[n] != 0) {
394     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
395     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
396     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
397     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
398     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
399   } else {
400     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
401   }
402 #endif
403 
404   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
405   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
406 
407   /* destroy list of free space and other temporary array(s) */
408   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
409   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
410   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
411   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
412 
413   /* put together the new matrix */
414   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
415   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
416   b    = (Mat_SeqAIJ*)(B)->data;
417   b->free_a       = PETSC_TRUE;
418   b->free_ij      = PETSC_TRUE;
419   b->singlemalloc = PETSC_FALSE;
420   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
421   b->j          = bj;
422   b->i          = bi;
423   b->diag       = bdiag;
424   b->ilen       = 0;
425   b->imax       = 0;
426   b->row        = isrow;
427   b->col        = iscol;
428   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
429   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
430   b->icol       = isicol;
431   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
432 
433   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
434   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
435   b->maxnz = b->nz = bi[n] ;
436 
437   (B)->factor                 =  MAT_FACTOR_LU;
438   (B)->info.factor_mallocs    = reallocs;
439   (B)->info.fill_ratio_given  = f;
440 
441   if (ai[n] != 0) {
442     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
443   } else {
444     (B)->info.fill_ratio_needed = 0.0;
445   }
446   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
447   (B)->ops->solve            = MatSolve_SeqAIJ;
448   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
449   /* switch to inodes if appropriate */
450   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 /*
455     Trouble in factorization, should we dump the original matrix?
456 */
457 #undef __FUNCT__
458 #define __FUNCT__ "MatFactorDumpMatrix"
459 PetscErrorCode MatFactorDumpMatrix(Mat A)
460 {
461   PetscErrorCode ierr;
462   PetscTruth     flg;
463 
464   PetscFunctionBegin;
465   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
466   if (flg) {
467     PetscViewer viewer;
468     char        filename[PETSC_MAX_PATH_LEN];
469 
470     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
471     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
472     ierr = MatView(A,viewer);CHKERRQ(ierr);
473     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
479 
480 /* ----------------------------------------------------------- */
481 #undef __FUNCT__
482 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
483 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
484 {
485   Mat            C=B;
486   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
487   IS             isrow = b->row,isicol = b->icol;
488   PetscErrorCode ierr;
489   const PetscInt  *r,*ic,*ics;
490   PetscInt       i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
491   PetscInt       *ajtmp,*bjtmp,nz,row;
492   PetscInt       *diag_offset = b->diag,diag,*pj;
493   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
494   MatScalar      *v,*pv;
495   PetscScalar    d;
496   PetscReal      rs;
497   LUShift_Ctx    sctx;
498   PetscInt       newshift,*ddiag;
499 
500   PetscFunctionBegin;
501   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
502   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
503   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
504   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
505   rtmps = rtmp; ics = ic;
506 
507   sctx.shift_top      = 0;
508   sctx.nshift_max     = 0;
509   sctx.shift_lo       = 0;
510   sctx.shift_hi       = 0;
511   sctx.shift_fraction = 0;
512 
513   /* if both shift schemes are chosen by user, only use info->shiftpd */
514   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
515     PetscInt *aai = a->i;
516     ddiag          = a->diag;
517     sctx.shift_top = 0;
518     for (i=0; i<n; i++) {
519       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
520       d  = (a->a)[ddiag[i]];
521       rs = -PetscAbsScalar(d) - PetscRealPart(d);
522       v  = a->a+aai[i];
523       nz = aai[i+1] - aai[i];
524       for (j=0; j<nz; j++)
525 	rs += PetscAbsScalar(v[j]);
526       if (rs>sctx.shift_top) sctx.shift_top = rs;
527     }
528     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
529     sctx.shift_top    *= 1.1;
530     sctx.nshift_max   = 5;
531     sctx.shift_lo     = 0.;
532     sctx.shift_hi     = 1.;
533   }
534 
535   sctx.shift_amount = 0;
536   sctx.nshift       = 0;
537   do {
538     sctx.lushift = PETSC_FALSE;
539     for (i=0; i<n; i++){
540       nz    = bi[i+1] - bi[i];
541       bjtmp = bj + bi[i];
542       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
543 
544       /* load in initial (unfactored row) */
545       nz    = a->i[r[i]+1] - a->i[r[i]];
546       ajtmp = a->j + a->i[r[i]];
547       v     = a->a + a->i[r[i]];
548       for (j=0; j<nz; j++) {
549         rtmp[ics[ajtmp[j]]] = v[j];
550       }
551       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
552 
553       row = *bjtmp++;
554       while  (row < i) {
555         pc = rtmp + row;
556         if (*pc != 0.0) {
557           pv         = b->a + diag_offset[row];
558           pj         = b->j + diag_offset[row] + 1;
559           multiplier = *pc / *pv++;
560           *pc        = multiplier;
561           nz         = bi[row+1] - diag_offset[row] - 1;
562           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
563           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
564         }
565         row = *bjtmp++;
566       }
567       /* finished row so stick it into b->a */
568       pv   = b->a + bi[i] ;
569       pj   = b->j + bi[i] ;
570       nz   = bi[i+1] - bi[i];
571       diag = diag_offset[i] - bi[i];
572       rs   = 0.0;
573       for (j=0; j<nz; j++) {
574         pv[j] = rtmps[pj[j]];
575         if (j != diag) rs += PetscAbsScalar(pv[j]);
576       }
577 
578       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
579       sctx.rs  = rs;
580       sctx.pv  = pv[diag];
581       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
582       if (newshift == 1) break;
583     }
584 
585     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
586       /*
587        * if no shift in this attempt & shifting & started shifting & can refine,
588        * then try lower shift
589        */
590       sctx.shift_hi        = sctx.shift_fraction;
591       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
593       sctx.lushift         = PETSC_TRUE;
594       sctx.nshift++;
595     }
596   } while (sctx.lushift);
597 
598   /* invert diagonal entries for simplier triangular solves */
599   for (i=0; i<n; i++) {
600     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
601   }
602   ierr = PetscFree(rtmp);CHKERRQ(ierr);
603   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
604   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
605   if (b->inode.use) {
606     C->ops->solve   = MatSolve_Inode;
607   } else {
608     PetscTruth row_identity, col_identity;
609     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
610     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
611     if (row_identity && col_identity) {
612       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
613     } else {
614       C->ops->solve   = MatSolve_SeqAIJ;
615     }
616   }
617   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
618   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
619   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
620   C->ops->matsolve           = MatMatSolve_SeqAIJ;
621   C->assembled    = PETSC_TRUE;
622   C->preallocated = PETSC_TRUE;
623   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
624   if (sctx.nshift){
625      if (info->shiftpd) {
626       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
627     } else if (info->shiftnz) {
628       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 /*
635    This routine implements inplace ILU(0) with row or/and column permutations.
636    Input:
637      A - original matrix
638    Output;
639      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
640          a->j (col index) is permuted by the inverse of colperm, then sorted
641          a->a reordered accordingly with a->j
642          a->diag (ptr to diagonal elements) is updated.
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
646 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
647 {
648   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
649   IS             isrow = a->row,isicol = a->icol;
650   PetscErrorCode ierr;
651   const PetscInt *r,*ic,*ics;
652   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
653   PetscInt       *ajtmp,nz,row;
654   PetscInt       *diag = a->diag,nbdiag,*pj;
655   PetscScalar    *rtmp,*pc,multiplier,d;
656   MatScalar      *v,*pv;
657   PetscReal      rs;
658   LUShift_Ctx    sctx;
659   PetscInt       newshift;
660 
661   PetscFunctionBegin;
662   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
663   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
664   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
665   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
666   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
667   ics = ic;
668 
669   sctx.shift_top      = 0;
670   sctx.nshift_max     = 0;
671   sctx.shift_lo       = 0;
672   sctx.shift_hi       = 0;
673   sctx.shift_fraction = 0;
674 
675   /* if both shift schemes are chosen by user, only use info->shiftpd */
676   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
677     sctx.shift_top = 0;
678     for (i=0; i<n; i++) {
679       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
680       d  = (a->a)[diag[i]];
681       rs = -PetscAbsScalar(d) - PetscRealPart(d);
682       v  = a->a+ai[i];
683       nz = ai[i+1] - ai[i];
684       for (j=0; j<nz; j++)
685 	rs += PetscAbsScalar(v[j]);
686       if (rs>sctx.shift_top) sctx.shift_top = rs;
687     }
688     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
689     sctx.shift_top    *= 1.1;
690     sctx.nshift_max   = 5;
691     sctx.shift_lo     = 0.;
692     sctx.shift_hi     = 1.;
693   }
694 
695   sctx.shift_amount = 0;
696   sctx.nshift       = 0;
697   do {
698     sctx.lushift = PETSC_FALSE;
699     for (i=0; i<n; i++){
700       /* load in initial unfactored row */
701       nz    = ai[r[i]+1] - ai[r[i]];
702       ajtmp = aj + ai[r[i]];
703       v     = a->a + ai[r[i]];
704       /* sort permuted ajtmp and values v accordingly */
705       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
706       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
707 
708       diag[r[i]] = ai[r[i]];
709       for (j=0; j<nz; j++) {
710         rtmp[ajtmp[j]] = v[j];
711         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
712       }
713       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
714 
715       row = *ajtmp++;
716       while  (row < i) {
717         pc = rtmp + row;
718         if (*pc != 0.0) {
719           pv         = a->a + diag[r[row]];
720           pj         = aj + diag[r[row]] + 1;
721 
722           multiplier = *pc / *pv++;
723           *pc        = multiplier;
724           nz         = ai[r[row]+1] - diag[r[row]] - 1;
725           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
726           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
727         }
728         row = *ajtmp++;
729       }
730       /* finished row so overwrite it onto a->a */
731       pv   = a->a + ai[r[i]] ;
732       pj   = aj + ai[r[i]] ;
733       nz   = ai[r[i]+1] - ai[r[i]];
734       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
735 
736       rs   = 0.0;
737       for (j=0; j<nz; j++) {
738         pv[j] = rtmp[pj[j]];
739         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
740       }
741 
742       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
743       sctx.rs  = rs;
744       sctx.pv  = pv[nbdiag];
745       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
746       if (newshift == 1) break;
747     }
748 
749     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
750       /*
751        * if no shift in this attempt & shifting & started shifting & can refine,
752        * then try lower shift
753        */
754       sctx.shift_hi        = sctx.shift_fraction;
755       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
756       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
757       sctx.lushift         = PETSC_TRUE;
758       sctx.nshift++;
759     }
760   } while (sctx.lushift);
761 
762   /* invert diagonal entries for simplier triangular solves */
763   for (i=0; i<n; i++) {
764     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
765   }
766 
767   ierr = PetscFree(rtmp);CHKERRQ(ierr);
768   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
769   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
770   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
771   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
772   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
773   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
774   A->assembled = PETSC_TRUE;
775   A->preallocated = PETSC_TRUE;
776   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
777   if (sctx.nshift){
778     if (info->shiftpd) {
779       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
780     } else if (info->shiftnz) {
781       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
782     }
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 /* ----------------------------------------------------------- */
788 #undef __FUNCT__
789 #define __FUNCT__ "MatLUFactor_SeqAIJ"
790 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
791 {
792   PetscErrorCode ierr;
793   Mat            C;
794 
795   PetscFunctionBegin;
796   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
797   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
798   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
799   A->ops->solve            = C->ops->solve;
800   A->ops->solvetranspose   = C->ops->solvetranspose;
801   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
802   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
803   PetscFunctionReturn(0);
804 }
805 /* ----------------------------------------------------------- */
806 #undef __FUNCT__
807 #define __FUNCT__ "MatSolve_SeqAIJ"
808 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
809 {
810   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
811   IS                iscol = a->col,isrow = a->row;
812   PetscErrorCode    ierr;
813   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
814   PetscInt          nz;
815   const PetscInt    *rout,*cout,*r,*c;
816   PetscScalar       *x,*tmp,*tmps,sum;
817   const PetscScalar *b;
818   const MatScalar   *aa = a->a,*v;
819 
820   PetscFunctionBegin;
821   if (!n) PetscFunctionReturn(0);
822 
823   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
824   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
825   tmp  = a->solve_work;
826 
827   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
828   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
829 
830   /* forward solve the lower triangular */
831   tmp[0] = b[*r++];
832   tmps   = tmp;
833   for (i=1; i<n; i++) {
834     v   = aa + ai[i] ;
835     vi  = aj + ai[i] ;
836     nz  = a->diag[i] - ai[i];
837     sum = b[*r++];
838     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
839     tmp[i] = sum;
840   }
841 
842   /* backward solve the upper triangular */
843   for (i=n-1; i>=0; i--){
844     v   = aa + a->diag[i] + 1;
845     vi  = aj + a->diag[i] + 1;
846     nz  = ai[i+1] - a->diag[i] - 1;
847     sum = tmp[i];
848     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
849     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
850   }
851 
852   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
853   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
854   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatMatSolve_SeqAIJ"
862 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
863 {
864   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
865   IS              iscol = a->col,isrow = a->row;
866   PetscErrorCode  ierr;
867   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
868   PetscInt        nz,neq;
869   const PetscInt  *rout,*cout,*r,*c;
870   PetscScalar     *x,*b,*tmp,*tmps,sum;
871   const MatScalar *aa = a->a,*v;
872   PetscTruth      bisdense,xisdense;
873 
874   PetscFunctionBegin;
875   if (!n) PetscFunctionReturn(0);
876 
877   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
878   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
879   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
880   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
881 
882   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
883   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
884 
885   tmp  = a->solve_work;
886   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
887   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
888 
889   for (neq=0; neq<B->cmap->n; neq++){
890     /* forward solve the lower triangular */
891     tmp[0] = b[r[0]];
892     tmps   = tmp;
893     for (i=1; i<n; i++) {
894       v   = aa + ai[i] ;
895       vi  = aj + ai[i] ;
896       nz  = a->diag[i] - ai[i];
897       sum = b[r[i]];
898       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
899       tmp[i] = sum;
900     }
901     /* backward solve the upper triangular */
902     for (i=n-1; i>=0; i--){
903       v   = aa + a->diag[i] + 1;
904       vi  = aj + a->diag[i] + 1;
905       nz  = ai[i+1] - a->diag[i] - 1;
906       sum = tmp[i];
907       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
908       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
909     }
910 
911     b += n;
912     x += n;
913   }
914   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
915   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
916   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
917   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
918   ierr = PetscLogFlops(B->cmap->n*(2*a->nz - n));CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
924 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
925 {
926   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
927   IS              iscol = a->col,isrow = a->row;
928   PetscErrorCode  ierr;
929   const PetscInt  *r,*c,*rout,*cout;
930   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
931   PetscInt        nz,row;
932   PetscScalar     *x,*b,*tmp,*tmps,sum;
933   const MatScalar *aa = a->a,*v;
934 
935   PetscFunctionBegin;
936   if (!n) PetscFunctionReturn(0);
937 
938   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
939   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
940   tmp  = a->solve_work;
941 
942   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
943   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
944 
945   /* forward solve the lower triangular */
946   tmp[0] = b[*r++];
947   tmps   = tmp;
948   for (row=1; row<n; row++) {
949     i   = rout[row]; /* permuted row */
950     v   = aa + ai[i] ;
951     vi  = aj + ai[i] ;
952     nz  = a->diag[i] - ai[i];
953     sum = b[*r++];
954     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
955     tmp[row] = sum;
956   }
957 
958   /* backward solve the upper triangular */
959   for (row=n-1; row>=0; row--){
960     i   = rout[row]; /* permuted row */
961     v   = aa + a->diag[i] + 1;
962     vi  = aj + a->diag[i] + 1;
963     nz  = ai[i+1] - a->diag[i] - 1;
964     sum = tmp[row];
965     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
966     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
967   }
968 
969   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
970   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
971   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
972   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
973   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 /* ----------------------------------------------------------- */
978 #undef __FUNCT__
979 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
980 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
981 {
982   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
983   PetscErrorCode    ierr;
984   PetscInt          n = A->rmap->n;
985   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
986   PetscScalar       *x;
987   const PetscScalar *b;
988   const MatScalar   *aa = a->a;
989 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
990   PetscInt          adiag_i,i,nz,ai_i;
991   const MatScalar   *v;
992   PetscScalar       sum;
993 #endif
994 
995   PetscFunctionBegin;
996   if (!n) PetscFunctionReturn(0);
997 
998   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
999   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1000 
1001 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1002   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1003 #else
1004   /* forward solve the lower triangular */
1005   x[0] = b[0];
1006   for (i=1; i<n; i++) {
1007     ai_i = ai[i];
1008     v    = aa + ai_i;
1009     vi   = aj + ai_i;
1010     nz   = adiag[i] - ai_i;
1011     sum  = b[i];
1012     while (nz--) sum -= *v++ * x[*vi++];
1013     x[i] = sum;
1014   }
1015 
1016   /* backward solve the upper triangular */
1017   for (i=n-1; i>=0; i--){
1018     adiag_i = adiag[i];
1019     v       = aa + adiag_i + 1;
1020     vi      = aj + adiag_i + 1;
1021     nz      = ai[i+1] - adiag_i - 1;
1022     sum     = x[i];
1023     while (nz--) sum -= *v++ * x[*vi++];
1024     x[i]    = sum*aa[adiag_i];
1025   }
1026 #endif
1027   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1028   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1029   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1035 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1036 {
1037   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1038   IS              iscol = a->col,isrow = a->row;
1039   PetscErrorCode  ierr;
1040   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1041   PetscInt        nz;
1042   const PetscInt  *rout,*cout,*r,*c;
1043   PetscScalar     *x,*b,*tmp,sum;
1044   const MatScalar *aa = a->a,*v;
1045 
1046   PetscFunctionBegin;
1047   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1048 
1049   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1050   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1051   tmp  = a->solve_work;
1052 
1053   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1054   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1055 
1056   /* forward solve the lower triangular */
1057   tmp[0] = b[*r++];
1058   for (i=1; i<n; i++) {
1059     v   = aa + ai[i] ;
1060     vi  = aj + ai[i] ;
1061     nz  = a->diag[i] - ai[i];
1062     sum = b[*r++];
1063     while (nz--) sum -= *v++ * tmp[*vi++ ];
1064     tmp[i] = sum;
1065   }
1066 
1067   /* backward solve the upper triangular */
1068   for (i=n-1; i>=0; i--){
1069     v   = aa + a->diag[i] + 1;
1070     vi  = aj + a->diag[i] + 1;
1071     nz  = ai[i+1] - a->diag[i] - 1;
1072     sum = tmp[i];
1073     while (nz--) sum -= *v++ * tmp[*vi++ ];
1074     tmp[i] = sum*aa[a->diag[i]];
1075     x[*c--] += tmp[i];
1076   }
1077 
1078   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1079   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1080   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1081   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1082   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1083 
1084   PetscFunctionReturn(0);
1085 }
1086 /* -------------------------------------------------------------------*/
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1089 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1092   IS              iscol = a->col,isrow = a->row;
1093   PetscErrorCode  ierr;
1094   const PetscInt  *rout,*cout,*r,*c;
1095   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1096   PetscInt        nz,*diag = a->diag;
1097   PetscScalar     *x,*b,*tmp,s1;
1098   const MatScalar *aa = a->a,*v;
1099 
1100   PetscFunctionBegin;
1101   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1102   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1103   tmp  = a->solve_work;
1104 
1105   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1106   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1107 
1108   /* copy the b into temp work space according to permutation */
1109   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1110 
1111   /* forward solve the U^T */
1112   for (i=0; i<n; i++) {
1113     v   = aa + diag[i] ;
1114     vi  = aj + diag[i] + 1;
1115     nz  = ai[i+1] - diag[i] - 1;
1116     s1  = tmp[i];
1117     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1118     while (nz--) {
1119       tmp[*vi++ ] -= (*v++)*s1;
1120     }
1121     tmp[i] = s1;
1122   }
1123 
1124   /* backward solve the L^T */
1125   for (i=n-1; i>=0; i--){
1126     v   = aa + diag[i] - 1 ;
1127     vi  = aj + diag[i] - 1 ;
1128     nz  = diag[i] - ai[i];
1129     s1  = tmp[i];
1130     while (nz--) {
1131       tmp[*vi-- ] -= (*v--)*s1;
1132     }
1133   }
1134 
1135   /* copy tmp into x according to permutation */
1136   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1137 
1138   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1139   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1140   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1141   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1142 
1143   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1149 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1150 {
1151   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1152   IS              iscol = a->col,isrow = a->row;
1153   PetscErrorCode  ierr;
1154   const PetscInt  *r,*c,*rout,*cout;
1155   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1156   PetscInt        nz,*diag = a->diag;
1157   PetscScalar     *x,*b,*tmp;
1158   const MatScalar *aa = a->a,*v;
1159 
1160   PetscFunctionBegin;
1161   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1162 
1163   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1165   tmp = a->solve_work;
1166 
1167   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1168   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1169 
1170   /* copy the b into temp work space according to permutation */
1171   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1172 
1173   /* forward solve the U^T */
1174   for (i=0; i<n; i++) {
1175     v   = aa + diag[i] ;
1176     vi  = aj + diag[i] + 1;
1177     nz  = ai[i+1] - diag[i] - 1;
1178     tmp[i] *= *v++;
1179     while (nz--) {
1180       tmp[*vi++ ] -= (*v++)*tmp[i];
1181     }
1182   }
1183 
1184   /* backward solve the L^T */
1185   for (i=n-1; i>=0; i--){
1186     v   = aa + diag[i] - 1 ;
1187     vi  = aj + diag[i] - 1 ;
1188     nz  = diag[i] - ai[i];
1189     while (nz--) {
1190       tmp[*vi-- ] -= (*v--)*tmp[i];
1191     }
1192   }
1193 
1194   /* copy tmp into x according to permutation */
1195   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1196 
1197   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1198   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1201 
1202   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 /* ----------------------------------------------------------------*/
1206 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1207 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1211 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1212 {
1213   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1214   IS                 isicol;
1215   PetscErrorCode     ierr;
1216   const PetscInt     *r,*ic;
1217   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1218   PetscInt           *bi,*cols,nnz,*cols_lvl;
1219   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1220   PetscInt           i,levels,diagonal_fill;
1221   PetscTruth         col_identity,row_identity;
1222   PetscReal          f;
1223   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1224   PetscBT            lnkbt;
1225   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1226   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1227   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1228   PetscTruth         missing;
1229 
1230   PetscFunctionBegin;
1231   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1232   f             = info->fill;
1233   levels        = (PetscInt)info->levels;
1234   diagonal_fill = (PetscInt)info->diagonal_fill;
1235   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1236 
1237   /* special case that simply copies fill pattern */
1238   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1239   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1240   if (!levels && row_identity && col_identity) {
1241     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1242     fact->factor = MAT_FACTOR_ILU;
1243     (fact)->info.factor_mallocs    = 0;
1244     (fact)->info.fill_ratio_given  = info->fill;
1245     (fact)->info.fill_ratio_needed = 1.0;
1246     b               = (Mat_SeqAIJ*)(fact)->data;
1247     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1248     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1249     b->row              = isrow;
1250     b->col              = iscol;
1251     b->icol             = isicol;
1252     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1253     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1254     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1255     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1256     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1257     PetscFunctionReturn(0);
1258   }
1259 
1260   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1261   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1262 
1263   /* get new row pointers */
1264   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1265   bi[0] = 0;
1266   /* bdiag is location of diagonal in factor */
1267   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1268   bdiag[0]  = 0;
1269 
1270   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1271   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1272 
1273   /* create a linked list for storing column indices of the active row */
1274   nlnk = n + 1;
1275   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1276 
1277   /* initial FreeSpace size is f*(ai[n]+1) */
1278   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1279   current_space = free_space;
1280   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1281   current_space_lvl = free_space_lvl;
1282 
1283   for (i=0; i<n; i++) {
1284     nzi = 0;
1285     /* copy current row into linked list */
1286     nnz  = ai[r[i]+1] - ai[r[i]];
1287     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1288     cols = aj + ai[r[i]];
1289     lnk[i] = -1; /* marker to indicate if diagonal exists */
1290     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1291     nzi += nlnk;
1292 
1293     /* make sure diagonal entry is included */
1294     if (diagonal_fill && lnk[i] == -1) {
1295       fm = n;
1296       while (lnk[fm] < i) fm = lnk[fm];
1297       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1298       lnk[fm]    = i;
1299       lnk_lvl[i] = 0;
1300       nzi++; dcount++;
1301     }
1302 
1303     /* add pivot rows into the active row */
1304     nzbd = 0;
1305     prow = lnk[n];
1306     while (prow < i) {
1307       nnz      = bdiag[prow];
1308       cols     = bj_ptr[prow] + nnz + 1;
1309       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1310       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1311       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1312       nzi += nlnk;
1313       prow = lnk[prow];
1314       nzbd++;
1315     }
1316     bdiag[i] = nzbd;
1317     bi[i+1]  = bi[i] + nzi;
1318 
1319     /* if free space is not available, make more free space */
1320     if (current_space->local_remaining<nzi) {
1321       nnz = nzi*(n - i); /* estimated and max additional space needed */
1322       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1323       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1324       reallocs++;
1325     }
1326 
1327     /* copy data into free_space and free_space_lvl, then initialize lnk */
1328     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1329     bj_ptr[i]    = current_space->array;
1330     bjlvl_ptr[i] = current_space_lvl->array;
1331 
1332     /* make sure the active row i has diagonal entry */
1333     if (*(bj_ptr[i]+bdiag[i]) != i) {
1334       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1335     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1336     }
1337 
1338     current_space->array           += nzi;
1339     current_space->local_used      += nzi;
1340     current_space->local_remaining -= nzi;
1341     current_space_lvl->array           += nzi;
1342     current_space_lvl->local_used      += nzi;
1343     current_space_lvl->local_remaining -= nzi;
1344   }
1345 
1346   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1347   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1348 
1349   /* destroy list of free space and other temporary arrays */
1350   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1351   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1352   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1353   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1354   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1355 
1356 #if defined(PETSC_USE_INFO)
1357   {
1358     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1359     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1360     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1361     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1362     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1363     if (diagonal_fill) {
1364       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1365     }
1366   }
1367 #endif
1368 
1369   /* put together the new matrix */
1370   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1371   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1372   b = (Mat_SeqAIJ*)(fact)->data;
1373   b->free_a       = PETSC_TRUE;
1374   b->free_ij      = PETSC_TRUE;
1375   b->singlemalloc = PETSC_FALSE;
1376   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1377   b->j          = bj;
1378   b->i          = bi;
1379   for (i=0; i<n; i++) bdiag[i] += bi[i];
1380   b->diag       = bdiag;
1381   b->ilen       = 0;
1382   b->imax       = 0;
1383   b->row        = isrow;
1384   b->col        = iscol;
1385   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1386   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1387   b->icol       = isicol;
1388   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1389   /* In b structure:  Free imax, ilen, old a, old j.
1390      Allocate bdiag, solve_work, new a, new j */
1391   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1392   b->maxnz             = b->nz = bi[n] ;
1393   (fact)->info.factor_mallocs    = reallocs;
1394   (fact)->info.fill_ratio_given  = f;
1395   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1396   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1397   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1404 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1405 {
1406   Mat            C = B;
1407   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1408   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1409   IS             ip=b->row,iip = b->icol;
1410   PetscErrorCode ierr;
1411   const PetscInt *rip,*riip;
1412   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1413   PetscInt       *ai=a->i,*aj=a->j;
1414   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1415   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1416   PetscReal      zeropivot,rs,shiftnz;
1417   PetscReal      shiftpd;
1418   ChShift_Ctx    sctx;
1419   PetscInt       newshift;
1420   PetscTruth     perm_identity;
1421 
1422   PetscFunctionBegin;
1423 
1424   shiftnz   = info->shiftnz;
1425   shiftpd   = info->shiftpd;
1426   zeropivot = info->zeropivot;
1427 
1428   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1429   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1430 
1431   /* initialization */
1432   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1433   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1434   jl   = il + mbs;
1435   rtmp = (MatScalar*)(jl + mbs);
1436 
1437   sctx.shift_amount = 0;
1438   sctx.nshift       = 0;
1439   do {
1440     sctx.chshift = PETSC_FALSE;
1441     for (i=0; i<mbs; i++) {
1442       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1443     }
1444 
1445     for (k = 0; k<mbs; k++){
1446       bval = ba + bi[k];
1447       /* initialize k-th row by the perm[k]-th row of A */
1448       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1449       for (j = jmin; j < jmax; j++){
1450         col = riip[aj[j]];
1451         if (col >= k){ /* only take upper triangular entry */
1452           rtmp[col] = aa[j];
1453           *bval++  = 0.0; /* for in-place factorization */
1454         }
1455       }
1456       /* shift the diagonal of the matrix */
1457       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1458 
1459       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1460       dk = rtmp[k];
1461       i = jl[k]; /* first row to be added to k_th row  */
1462 
1463       while (i < k){
1464         nexti = jl[i]; /* next row to be added to k_th row */
1465 
1466         /* compute multiplier, update diag(k) and U(i,k) */
1467         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1468         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1469         dk += uikdi*ba[ili];
1470         ba[ili] = uikdi; /* -U(i,k) */
1471 
1472         /* add multiple of row i to k-th row */
1473         jmin = ili + 1; jmax = bi[i+1];
1474         if (jmin < jmax){
1475           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1476           /* update il and jl for row i */
1477           il[i] = jmin;
1478           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1479         }
1480         i = nexti;
1481       }
1482 
1483       /* shift the diagonals when zero pivot is detected */
1484       /* compute rs=sum of abs(off-diagonal) */
1485       rs   = 0.0;
1486       jmin = bi[k]+1;
1487       nz   = bi[k+1] - jmin;
1488       bcol = bj + jmin;
1489       while (nz--){
1490         rs += PetscAbsScalar(rtmp[*bcol]);
1491         bcol++;
1492       }
1493 
1494       sctx.rs = rs;
1495       sctx.pv = dk;
1496       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1497 
1498       if (newshift == 1) {
1499         if (!sctx.shift_amount) {
1500           sctx.shift_amount = 1e-5;
1501         }
1502         break;
1503       }
1504 
1505       /* copy data into U(k,:) */
1506       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1507       jmin = bi[k]+1; jmax = bi[k+1];
1508       if (jmin < jmax) {
1509         for (j=jmin; j<jmax; j++){
1510           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1511         }
1512         /* add the k-th row into il and jl */
1513         il[k] = jmin;
1514         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1515       }
1516     }
1517   } while (sctx.chshift);
1518   ierr = PetscFree(il);CHKERRQ(ierr);
1519 
1520   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1521   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1522 
1523   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1524   if (perm_identity){
1525     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1526     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1527     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1528     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1529   } else {
1530     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1531     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1532     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1533     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1534   }
1535 
1536   C->assembled    = PETSC_TRUE;
1537   C->preallocated = PETSC_TRUE;
1538   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1539   if (sctx.nshift){
1540     if (shiftnz) {
1541       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1542     } else if (shiftpd) {
1543       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1544     }
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1551 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1552 {
1553   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1554   Mat_SeqSBAIJ       *b;
1555   PetscErrorCode     ierr;
1556   PetscTruth         perm_identity,missing;
1557   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1558   const PetscInt     *rip,*riip;
1559   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1560   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1561   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1562   PetscReal          fill=info->fill,levels=info->levels;
1563   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1564   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1565   PetscBT            lnkbt;
1566   IS                 iperm;
1567 
1568   PetscFunctionBegin;
1569   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1570   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1571   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1572   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1573   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1574 
1575   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1576   ui[0] = 0;
1577 
1578   /* ICC(0) without matrix ordering: simply copies fill pattern */
1579   if (!levels && perm_identity) {
1580 
1581     for (i=0; i<am; i++) {
1582       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1583     }
1584     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1585     cols = uj;
1586     for (i=0; i<am; i++) {
1587       aj    = a->j + a->diag[i];
1588       ncols = ui[i+1] - ui[i];
1589       for (j=0; j<ncols; j++) *cols++ = *aj++;
1590     }
1591   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1592     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1593     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1594 
1595     /* initialization */
1596     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1597 
1598     /* jl: linked list for storing indices of the pivot rows
1599        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1600     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1601     il         = jl + am;
1602     uj_ptr     = (PetscInt**)(il + am);
1603     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1604     for (i=0; i<am; i++){
1605       jl[i] = am; il[i] = 0;
1606     }
1607 
1608     /* create and initialize a linked list for storing column indices of the active row k */
1609     nlnk = am + 1;
1610     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1611 
1612     /* initial FreeSpace size is fill*(ai[am]+1) */
1613     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1614     current_space = free_space;
1615     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1616     current_space_lvl = free_space_lvl;
1617 
1618     for (k=0; k<am; k++){  /* for each active row k */
1619       /* initialize lnk by the column indices of row rip[k] of A */
1620       nzk   = 0;
1621       ncols = ai[rip[k]+1] - ai[rip[k]];
1622       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1623       ncols_upper = 0;
1624       for (j=0; j<ncols; j++){
1625         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1626         if (riip[i] >= k){ /* only take upper triangular entry */
1627           ajtmp[ncols_upper] = i;
1628           ncols_upper++;
1629         }
1630       }
1631       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1632       nzk += nlnk;
1633 
1634       /* update lnk by computing fill-in for each pivot row to be merged in */
1635       prow = jl[k]; /* 1st pivot row */
1636 
1637       while (prow < k){
1638         nextprow = jl[prow];
1639 
1640         /* merge prow into k-th row */
1641         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1642         jmax = ui[prow+1];
1643         ncols = jmax-jmin;
1644         i     = jmin - ui[prow];
1645         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1646         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1647         j     = *(uj - 1);
1648         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1649         nzk += nlnk;
1650 
1651         /* update il and jl for prow */
1652         if (jmin < jmax){
1653           il[prow] = jmin;
1654           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1655         }
1656         prow = nextprow;
1657       }
1658 
1659       /* if free space is not available, make more free space */
1660       if (current_space->local_remaining<nzk) {
1661         i = am - k + 1; /* num of unfactored rows */
1662         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1663         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1664         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1665         reallocs++;
1666       }
1667 
1668       /* copy data into free_space and free_space_lvl, then initialize lnk */
1669       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1670       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1671 
1672       /* add the k-th row into il and jl */
1673       if (nzk > 1){
1674         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1675         jl[k] = jl[i]; jl[i] = k;
1676         il[k] = ui[k] + 1;
1677       }
1678       uj_ptr[k]     = current_space->array;
1679       uj_lvl_ptr[k] = current_space_lvl->array;
1680 
1681       current_space->array           += nzk;
1682       current_space->local_used      += nzk;
1683       current_space->local_remaining -= nzk;
1684 
1685       current_space_lvl->array           += nzk;
1686       current_space_lvl->local_used      += nzk;
1687       current_space_lvl->local_remaining -= nzk;
1688 
1689       ui[k+1] = ui[k] + nzk;
1690     }
1691 
1692 #if defined(PETSC_USE_INFO)
1693     if (ai[am] != 0) {
1694       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1695       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1696       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1697       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1698     } else {
1699       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1700     }
1701 #endif
1702 
1703     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1704     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1705     ierr = PetscFree(jl);CHKERRQ(ierr);
1706     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1707 
1708     /* destroy list of free space and other temporary array(s) */
1709     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1710     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1711     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1712     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1713 
1714   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1715 
1716   /* put together the new matrix in MATSEQSBAIJ format */
1717 
1718   b    = (Mat_SeqSBAIJ*)(fact)->data;
1719   b->singlemalloc = PETSC_FALSE;
1720   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1721   b->j    = uj;
1722   b->i    = ui;
1723   b->diag = 0;
1724   b->ilen = 0;
1725   b->imax = 0;
1726   b->row  = perm;
1727   b->col  = perm;
1728   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1729   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1730   b->icol = iperm;
1731   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1732   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1733   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1734   b->maxnz   = b->nz = ui[am];
1735   b->free_a  = PETSC_TRUE;
1736   b->free_ij = PETSC_TRUE;
1737 
1738   (fact)->info.factor_mallocs    = reallocs;
1739   (fact)->info.fill_ratio_given  = fill;
1740   if (ai[am] != 0) {
1741     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1742   } else {
1743     (fact)->info.fill_ratio_needed = 0.0;
1744   }
1745   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1751 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1752 {
1753   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1754   Mat_SeqSBAIJ       *b;
1755   PetscErrorCode     ierr;
1756   PetscTruth         perm_identity;
1757   PetscReal          fill = info->fill;
1758   const PetscInt     *rip,*riip;
1759   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1760   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1761   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1762   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1763   PetscBT            lnkbt;
1764   IS                 iperm;
1765 
1766   PetscFunctionBegin;
1767   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1768   /* check whether perm is the identity mapping */
1769   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1770   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1771   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1772   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1773 
1774   /* initialization */
1775   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1776   ui[0] = 0;
1777 
1778   /* jl: linked list for storing indices of the pivot rows
1779      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1780   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1781   il     = jl + am;
1782   cols   = il + am;
1783   ui_ptr = (PetscInt**)(cols + am);
1784   for (i=0; i<am; i++){
1785     jl[i] = am; il[i] = 0;
1786   }
1787 
1788   /* create and initialize a linked list for storing column indices of the active row k */
1789   nlnk = am + 1;
1790   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1791 
1792   /* initial FreeSpace size is fill*(ai[am]+1) */
1793   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1794   current_space = free_space;
1795 
1796   for (k=0; k<am; k++){  /* for each active row k */
1797     /* initialize lnk by the column indices of row rip[k] of A */
1798     nzk   = 0;
1799     ncols = ai[rip[k]+1] - ai[rip[k]];
1800     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1801     ncols_upper = 0;
1802     for (j=0; j<ncols; j++){
1803       i = riip[*(aj + ai[rip[k]] + j)];
1804       if (i >= k){ /* only take upper triangular entry */
1805         cols[ncols_upper] = i;
1806         ncols_upper++;
1807       }
1808     }
1809     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1810     nzk += nlnk;
1811 
1812     /* update lnk by computing fill-in for each pivot row to be merged in */
1813     prow = jl[k]; /* 1st pivot row */
1814 
1815     while (prow < k){
1816       nextprow = jl[prow];
1817       /* merge prow into k-th row */
1818       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1819       jmax = ui[prow+1];
1820       ncols = jmax-jmin;
1821       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1822       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1823       nzk += nlnk;
1824 
1825       /* update il and jl for prow */
1826       if (jmin < jmax){
1827         il[prow] = jmin;
1828         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1829       }
1830       prow = nextprow;
1831     }
1832 
1833     /* if free space is not available, make more free space */
1834     if (current_space->local_remaining<nzk) {
1835       i = am - k + 1; /* num of unfactored rows */
1836       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1837       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1838       reallocs++;
1839     }
1840 
1841     /* copy data into free space, then initialize lnk */
1842     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1843 
1844     /* add the k-th row into il and jl */
1845     if (nzk-1 > 0){
1846       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1847       jl[k] = jl[i]; jl[i] = k;
1848       il[k] = ui[k] + 1;
1849     }
1850     ui_ptr[k] = current_space->array;
1851     current_space->array           += nzk;
1852     current_space->local_used      += nzk;
1853     current_space->local_remaining -= nzk;
1854 
1855     ui[k+1] = ui[k] + nzk;
1856   }
1857 
1858 #if defined(PETSC_USE_INFO)
1859   if (ai[am] != 0) {
1860     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1861     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1862     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1863     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1864   } else {
1865      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1866   }
1867 #endif
1868 
1869   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1870   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1871   ierr = PetscFree(jl);CHKERRQ(ierr);
1872 
1873   /* destroy list of free space and other temporary array(s) */
1874   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1875   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1876   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1877 
1878   /* put together the new matrix in MATSEQSBAIJ format */
1879 
1880   b = (Mat_SeqSBAIJ*)(fact)->data;
1881   b->singlemalloc = PETSC_FALSE;
1882   b->free_a       = PETSC_TRUE;
1883   b->free_ij      = PETSC_TRUE;
1884   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1885   b->j    = uj;
1886   b->i    = ui;
1887   b->diag = 0;
1888   b->ilen = 0;
1889   b->imax = 0;
1890   b->row  = perm;
1891   b->col  = perm;
1892   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1893   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1894   b->icol = iperm;
1895   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1896   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1897   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1898   b->maxnz = b->nz = ui[am];
1899 
1900   (fact)->info.factor_mallocs    = reallocs;
1901   (fact)->info.fill_ratio_given  = fill;
1902   if (ai[am] != 0) {
1903     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1904   } else {
1905     (fact)->info.fill_ratio_needed = 0.0;
1906   }
1907   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1908   PetscFunctionReturn(0);
1909 }
1910