xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision ca4a67af8ca7cf2d1ac25cfe8f5aed45e6b9324f)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252   else         (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253   (*fact)->ops->solveadd           = MatSolveAdd_SeqAIJ;
254   (*fact)->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
255   (*fact)->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
256 
257   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267 {
268   PetscFunctionBegin;
269   *flg = PETSC_TRUE;
270   PetscFunctionReturn(0);
271 }
272 EXTERN_C_END
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278 {
279   PetscInt           n = A->rmap->n;
280   PetscErrorCode     ierr;
281 
282   PetscFunctionBegin;
283   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
284   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
285   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
286     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
287     if (ftype == MAT_FACTOR_ILU) {
288       (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
289     } else if(MAT_FACTOR_LU) {
290       (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
291     } else {
292       (*B)->ops->iludtfactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ;
293     }
294   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
295     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
296     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
297     if (ftype == MAT_FACTOR_ICC) {
298       (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
299     } else {
300       (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
301     }
302   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
303   (*B)->factor = ftype;
304   PetscFunctionReturn(0);
305 }
306 EXTERN_C_END
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
310 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
311 {
312   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
313   IS                 isicol;
314   PetscErrorCode     ierr;
315   const PetscInt     *r,*ic;
316   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
317   PetscInt           *bi,*bj,*ajtmp;
318   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
319   PetscReal          f;
320   PetscInt           nlnk,*lnk,k,**bi_ptr;
321   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
322   PetscBT            lnkbt;
323 
324   PetscFunctionBegin;
325   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
326   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
327   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
328   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
329 
330   /* get new row pointers */
331   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
332   bi[0] = 0;
333 
334   /* bdiag is location of diagonal in factor */
335   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
336   bdiag[0] = 0;
337 
338   /* linked list for storing column indices of the active row */
339   nlnk = n + 1;
340   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
341 
342   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
343 
344   /* initial FreeSpace size is f*(ai[n]+1) */
345   f = info->fill;
346   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
347   current_space = free_space;
348 
349   for (i=0; i<n; i++) {
350     /* copy previous fill into linked list */
351     nzi = 0;
352     nnz = ai[r[i]+1] - ai[r[i]];
353     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
354     ajtmp = aj + ai[r[i]];
355     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
356     nzi += nlnk;
357 
358     /* add pivot rows into linked list */
359     row = lnk[n];
360     while (row < i) {
361       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
362       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
363       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
364       nzi += nlnk;
365       row  = lnk[row];
366     }
367     bi[i+1] = bi[i] + nzi;
368     im[i]   = nzi;
369 
370     /* mark bdiag */
371     nzbd = 0;
372     nnz  = nzi;
373     k    = lnk[n];
374     while (nnz-- && k < i){
375       nzbd++;
376       k = lnk[k];
377     }
378     bdiag[i] = bi[i] + nzbd;
379 
380     /* if free space is not available, make more free space */
381     if (current_space->local_remaining<nzi) {
382       nnz = (n - i)*nzi; /* estimated and max additional space needed */
383       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
384       reallocs++;
385     }
386 
387     /* copy data into free space, then initialize lnk */
388     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
389     bi_ptr[i] = current_space->array;
390     current_space->array           += nzi;
391     current_space->local_used      += nzi;
392     current_space->local_remaining -= nzi;
393   }
394 #if defined(PETSC_USE_INFO)
395   if (ai[n] != 0) {
396     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
397     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
398     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
399     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
400     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
401   } else {
402     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
403   }
404 #endif
405 
406   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
407   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
408 
409   /* destroy list of free space and other temporary array(s) */
410   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
411   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
412   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
413   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
414 
415   /* put together the new matrix */
416   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
417   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
418   b    = (Mat_SeqAIJ*)(B)->data;
419   b->free_a       = PETSC_TRUE;
420   b->free_ij      = PETSC_TRUE;
421   b->singlemalloc = PETSC_FALSE;
422   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
423   b->j          = bj;
424   b->i          = bi;
425   b->diag       = bdiag;
426   b->ilen       = 0;
427   b->imax       = 0;
428   b->row        = isrow;
429   b->col        = iscol;
430   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
431   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
432   b->icol       = isicol;
433   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
434 
435   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
436   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
437   b->maxnz = b->nz = bi[n] ;
438 
439   (B)->factor                = MAT_FACTOR_LU;
440   (B)->info.factor_mallocs   = reallocs;
441   (B)->info.fill_ratio_given = f;
442 
443   if (ai[n]) {
444     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
445   } else {
446     (B)->info.fill_ratio_needed = 0.0;
447   }
448   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
449   (B)->ops->solve            = MatSolve_SeqAIJ;
450   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
451   /* switch to inodes if appropriate */
452   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 /*
457     Trouble in factorization, should we dump the original matrix?
458 */
459 #undef __FUNCT__
460 #define __FUNCT__ "MatFactorDumpMatrix"
461 PetscErrorCode MatFactorDumpMatrix(Mat A)
462 {
463   PetscErrorCode ierr;
464   PetscTruth     flg = PETSC_FALSE;
465 
466   PetscFunctionBegin;
467   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
468   if (flg) {
469     PetscViewer viewer;
470     char        filename[PETSC_MAX_PATH_LEN];
471 
472     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
473     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
474     ierr = MatView(A,viewer);CHKERRQ(ierr);
475     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
481 
482 /* ----------------------------------------------------------- */
483 #undef __FUNCT__
484 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
485 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
486 {
487   Mat            C=B;
488   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
489   IS             isrow = b->row,isicol = b->icol;
490   PetscErrorCode ierr;
491   const PetscInt  *r,*ic,*ics;
492   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
493   PetscInt       *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj;
494   MatScalar      *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a;
495   PetscReal      rs = 0.0;
496   LUShift_Ctx    sctx;
497   PetscInt       newshift,*ddiag;
498 
499   PetscFunctionBegin;
500   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
501   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
502   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
503   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
504   ics  = ic;
505 
506   sctx.shift_top      = 0;
507   sctx.nshift_max     = 0;
508   sctx.shift_lo       = 0;
509   sctx.shift_hi       = 0;
510   sctx.shift_fraction = 0;
511 
512   /* if both shift schemes are chosen by user, only use info->shiftpd */
513   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
514     ddiag          = a->diag;
515     sctx.shift_top = info->zeropivot;
516     for (i=0; i<n; i++) {
517       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
518       d  = (aa)[ddiag[i]];
519       rs = -PetscAbsScalar(d) - PetscRealPart(d);
520       v  = aa+ai[i];
521       nz = ai[i+1] - ai[i];
522       for (j=0; j<nz; j++)
523 	rs += PetscAbsScalar(v[j]);
524       if (rs>sctx.shift_top) sctx.shift_top = rs;
525     }
526     sctx.shift_top   *= 1.1;
527     sctx.nshift_max   = 5;
528     sctx.shift_lo     = 0.;
529     sctx.shift_hi     = 1.;
530   }
531 
532   sctx.shift_amount = 0.0;
533   sctx.nshift       = 0;
534   do {
535     sctx.lushift = PETSC_FALSE;
536     for (i=0; i<n; i++){
537       nz    = bi[i+1] - bi[i];
538       bjtmp = bj + bi[i];
539       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
540 
541       /* load in initial (unfactored row) */
542       nz    = ai[r[i]+1] - ai[r[i]];
543       ajtmp = aj + ai[r[i]];
544       v     = aa + ai[r[i]];
545       for (j=0; j<nz; j++) {
546         rtmp[ics[ajtmp[j]]] = v[j];
547       }
548       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
549 
550       row = *bjtmp++;
551       while  (row < i) {
552         pc = rtmp + row;
553         if (*pc != 0.0) {
554           pv         = b->a + diag_offset[row];
555           pj         = b->j + diag_offset[row] + 1;
556           multiplier = *pc / *pv++;
557           *pc        = multiplier;
558           nz         = bi[row+1] - diag_offset[row] - 1;
559           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
560           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
561         }
562         row = *bjtmp++;
563       }
564       /* finished row so stick it into b->a */
565       pv   = b->a + bi[i] ;
566       pj   = b->j + bi[i] ;
567       nz   = bi[i+1] - bi[i];
568       diag = diag_offset[i] - bi[i];
569       rs   = -PetscAbsScalar(pv[diag]);
570       for (j=0; j<nz; j++) {
571         pv[j] = rtmp[pj[j]];
572         rs   += PetscAbsScalar(pv[j]);
573       }
574 
575       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
576       sctx.rs  = rs;
577       sctx.pv  = pv[diag];
578       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
579       if (newshift == 1) break;
580     }
581 
582     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
583       /*
584        * if no shift in this attempt & shifting & started shifting & can refine,
585        * then try lower shift
586        */
587       sctx.shift_hi       = sctx.shift_fraction;
588       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
589       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
590       sctx.lushift        = PETSC_TRUE;
591       sctx.nshift++;
592     }
593   } while (sctx.lushift);
594 
595   /* invert diagonal entries for simplier triangular solves */
596   for (i=0; i<n; i++) {
597     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
598   }
599   ierr = PetscFree(rtmp);CHKERRQ(ierr);
600   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
601   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
602   if (b->inode.use) {
603     C->ops->solve   = MatSolve_Inode;
604   } else {
605     PetscTruth row_identity, col_identity;
606     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
607     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
608     if (row_identity && col_identity) {
609       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
610     } else {
611       C->ops->solve   = MatSolve_SeqAIJ;
612     }
613   }
614   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
615   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
616   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
617   C->ops->matsolve           = MatMatSolve_SeqAIJ;
618   C->assembled    = PETSC_TRUE;
619   C->preallocated = PETSC_TRUE;
620   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
621   if (sctx.nshift){
622      if (info->shiftpd) {
623       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
624     } else if (info->shiftnz) {
625       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
626     }
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 /*
632    This routine implements inplace ILU(0) with row or/and column permutations.
633    Input:
634      A - original matrix
635    Output;
636      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
637          a->j (col index) is permuted by the inverse of colperm, then sorted
638          a->a reordered accordingly with a->j
639          a->diag (ptr to diagonal elements) is updated.
640 */
641 #undef __FUNCT__
642 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
643 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
644 {
645   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
646   IS             isrow = a->row,isicol = a->icol;
647   PetscErrorCode ierr;
648   const PetscInt *r,*ic,*ics;
649   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
650   PetscInt       *ajtmp,nz,row;
651   PetscInt       *diag = a->diag,nbdiag,*pj;
652   PetscScalar    *rtmp,*pc,multiplier,d;
653   MatScalar      *v,*pv;
654   PetscReal      rs;
655   LUShift_Ctx    sctx;
656   PetscInt       newshift;
657 
658   PetscFunctionBegin;
659   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
660   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
661   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
662   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
663   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
664   ics = ic;
665 
666   sctx.shift_top      = 0;
667   sctx.nshift_max     = 0;
668   sctx.shift_lo       = 0;
669   sctx.shift_hi       = 0;
670   sctx.shift_fraction = 0;
671 
672   /* if both shift schemes are chosen by user, only use info->shiftpd */
673   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
674     sctx.shift_top = 0;
675     for (i=0; i<n; i++) {
676       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
677       d  = (a->a)[diag[i]];
678       rs = -PetscAbsScalar(d) - PetscRealPart(d);
679       v  = a->a+ai[i];
680       nz = ai[i+1] - ai[i];
681       for (j=0; j<nz; j++)
682 	rs += PetscAbsScalar(v[j]);
683       if (rs>sctx.shift_top) sctx.shift_top = rs;
684     }
685     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
686     sctx.shift_top    *= 1.1;
687     sctx.nshift_max   = 5;
688     sctx.shift_lo     = 0.;
689     sctx.shift_hi     = 1.;
690   }
691 
692   sctx.shift_amount = 0;
693   sctx.nshift       = 0;
694   do {
695     sctx.lushift = PETSC_FALSE;
696     for (i=0; i<n; i++){
697       /* load in initial unfactored row */
698       nz    = ai[r[i]+1] - ai[r[i]];
699       ajtmp = aj + ai[r[i]];
700       v     = a->a + ai[r[i]];
701       /* sort permuted ajtmp and values v accordingly */
702       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
703       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
704 
705       diag[r[i]] = ai[r[i]];
706       for (j=0; j<nz; j++) {
707         rtmp[ajtmp[j]] = v[j];
708         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
709       }
710       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
711 
712       row = *ajtmp++;
713       while  (row < i) {
714         pc = rtmp + row;
715         if (*pc != 0.0) {
716           pv         = a->a + diag[r[row]];
717           pj         = aj + diag[r[row]] + 1;
718 
719           multiplier = *pc / *pv++;
720           *pc        = multiplier;
721           nz         = ai[r[row]+1] - diag[r[row]] - 1;
722           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
723           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
724         }
725         row = *ajtmp++;
726       }
727       /* finished row so overwrite it onto a->a */
728       pv   = a->a + ai[r[i]] ;
729       pj   = aj + ai[r[i]] ;
730       nz   = ai[r[i]+1] - ai[r[i]];
731       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
732 
733       rs   = 0.0;
734       for (j=0; j<nz; j++) {
735         pv[j] = rtmp[pj[j]];
736         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
737       }
738 
739       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
740       sctx.rs  = rs;
741       sctx.pv  = pv[nbdiag];
742       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
743       if (newshift == 1) break;
744     }
745 
746     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
747       /*
748        * if no shift in this attempt & shifting & started shifting & can refine,
749        * then try lower shift
750        */
751       sctx.shift_hi        = sctx.shift_fraction;
752       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
753       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
754       sctx.lushift         = PETSC_TRUE;
755       sctx.nshift++;
756     }
757   } while (sctx.lushift);
758 
759   /* invert diagonal entries for simplier triangular solves */
760   for (i=0; i<n; i++) {
761     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
762   }
763 
764   ierr = PetscFree(rtmp);CHKERRQ(ierr);
765   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
766   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
767   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
768   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
769   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
770   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
771   A->assembled = PETSC_TRUE;
772   A->preallocated = PETSC_TRUE;
773   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
774   if (sctx.nshift){
775     if (info->shiftpd) {
776       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
777     } else if (info->shiftnz) {
778       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
779     }
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 /* ----------------------------------------------------------- */
785 #undef __FUNCT__
786 #define __FUNCT__ "MatLUFactor_SeqAIJ"
787 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
788 {
789   PetscErrorCode ierr;
790   Mat            C;
791 
792   PetscFunctionBegin;
793   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
794   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
795   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
796   A->ops->solve            = C->ops->solve;
797   A->ops->solvetranspose   = C->ops->solvetranspose;
798   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
799   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
800   PetscFunctionReturn(0);
801 }
802 /* ----------------------------------------------------------- */
803 #undef __FUNCT__
804 #define __FUNCT__ "MatSolve_SeqAIJ"
805 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
806 {
807   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
808   IS                iscol = a->col,isrow = a->row;
809   PetscErrorCode    ierr;
810   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
811   PetscInt          nz;
812   const PetscInt    *rout,*cout,*r,*c;
813   PetscScalar       *x,*tmp,*tmps,sum;
814   const PetscScalar *b;
815   const MatScalar   *aa = a->a,*v;
816 
817   PetscFunctionBegin;
818   if (!n) PetscFunctionReturn(0);
819 
820   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822   tmp  = a->solve_work;
823 
824   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
825   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
826 
827   /* forward solve the lower triangular */
828   tmp[0] = b[*r++];
829   tmps   = tmp;
830   for (i=1; i<n; i++) {
831     v   = aa + ai[i] ;
832     vi  = aj + ai[i] ;
833     nz  = a->diag[i] - ai[i];
834     sum = b[*r++];
835     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
836     tmp[i] = sum;
837   }
838 
839   /* backward solve the upper triangular */
840   for (i=n-1; i>=0; i--){
841     v   = aa + a->diag[i] + 1;
842     vi  = aj + a->diag[i] + 1;
843     nz  = ai[i+1] - a->diag[i] - 1;
844     sum = tmp[i];
845     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
846     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
847   }
848 
849   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
850   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
851   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
852   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
853   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatMatSolve_SeqAIJ"
859 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
860 {
861   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
862   IS              iscol = a->col,isrow = a->row;
863   PetscErrorCode  ierr;
864   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
865   PetscInt        nz,neq;
866   const PetscInt  *rout,*cout,*r,*c;
867   PetscScalar     *x,*b,*tmp,*tmps,sum;
868   const MatScalar *aa = a->a,*v;
869   PetscTruth      bisdense,xisdense;
870 
871   PetscFunctionBegin;
872   if (!n) PetscFunctionReturn(0);
873 
874   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
875   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
876   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
877   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
878 
879   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
880   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
881 
882   tmp  = a->solve_work;
883   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
884   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
885 
886   for (neq=0; neq<B->cmap->n; neq++){
887     /* forward solve the lower triangular */
888     tmp[0] = b[r[0]];
889     tmps   = tmp;
890     for (i=1; i<n; i++) {
891       v   = aa + ai[i] ;
892       vi  = aj + ai[i] ;
893       nz  = a->diag[i] - ai[i];
894       sum = b[r[i]];
895       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
896       tmp[i] = sum;
897     }
898     /* backward solve the upper triangular */
899     for (i=n-1; i>=0; i--){
900       v   = aa + a->diag[i] + 1;
901       vi  = aj + a->diag[i] + 1;
902       nz  = ai[i+1] - a->diag[i] - 1;
903       sum = tmp[i];
904       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
905       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
906     }
907 
908     b += n;
909     x += n;
910   }
911   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
912   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
913   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
914   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
915   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
921 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
922 {
923   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
924   IS              iscol = a->col,isrow = a->row;
925   PetscErrorCode  ierr;
926   const PetscInt  *r,*c,*rout,*cout;
927   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
928   PetscInt        nz,row;
929   PetscScalar     *x,*b,*tmp,*tmps,sum;
930   const MatScalar *aa = a->a,*v;
931 
932   PetscFunctionBegin;
933   if (!n) PetscFunctionReturn(0);
934 
935   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
936   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
937   tmp  = a->solve_work;
938 
939   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
940   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
941 
942   /* forward solve the lower triangular */
943   tmp[0] = b[*r++];
944   tmps   = tmp;
945   for (row=1; row<n; row++) {
946     i   = rout[row]; /* permuted row */
947     v   = aa + ai[i] ;
948     vi  = aj + ai[i] ;
949     nz  = a->diag[i] - ai[i];
950     sum = b[*r++];
951     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
952     tmp[row] = sum;
953   }
954 
955   /* backward solve the upper triangular */
956   for (row=n-1; row>=0; row--){
957     i   = rout[row]; /* permuted row */
958     v   = aa + a->diag[i] + 1;
959     vi  = aj + a->diag[i] + 1;
960     nz  = ai[i+1] - a->diag[i] - 1;
961     sum = tmp[row];
962     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
963     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
964   }
965 
966   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
967   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
968   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
969   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
970   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 /* ----------------------------------------------------------- */
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
977 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
978 {
979   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
980   PetscErrorCode    ierr;
981   PetscInt          n = A->rmap->n;
982   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
983   PetscScalar       *x;
984   const PetscScalar *b;
985   const MatScalar   *aa = a->a;
986 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
987   PetscInt          adiag_i,i,nz,ai_i;
988   const MatScalar   *v;
989   PetscScalar       sum;
990 #endif
991 
992   PetscFunctionBegin;
993   if (!n) PetscFunctionReturn(0);
994 
995   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
996   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
997 
998 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
999   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1000 #else
1001   /* forward solve the lower triangular */
1002   x[0] = b[0];
1003   for (i=1; i<n; i++) {
1004     ai_i = ai[i];
1005     v    = aa + ai_i;
1006     vi   = aj + ai_i;
1007     nz   = adiag[i] - ai_i;
1008     sum  = b[i];
1009     while (nz--) sum -= *v++ * x[*vi++];
1010     x[i] = sum;
1011   }
1012 
1013   /* backward solve the upper triangular */
1014   for (i=n-1; i>=0; i--){
1015     adiag_i = adiag[i];
1016     v       = aa + adiag_i + 1;
1017     vi      = aj + adiag_i + 1;
1018     nz      = ai[i+1] - adiag_i - 1;
1019     sum     = x[i];
1020     while (nz--) sum -= *v++ * x[*vi++];
1021     x[i]    = sum*aa[adiag_i];
1022   }
1023 #endif
1024   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1025   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1026   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1032 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1033 {
1034   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1035   IS              iscol = a->col,isrow = a->row;
1036   PetscErrorCode  ierr;
1037   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1038   PetscInt        nz;
1039   const PetscInt  *rout,*cout,*r,*c;
1040   PetscScalar     *x,*b,*tmp,sum;
1041   const MatScalar *aa = a->a,*v;
1042 
1043   PetscFunctionBegin;
1044   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1045 
1046   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1047   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1048   tmp  = a->solve_work;
1049 
1050   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1051   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1052 
1053   /* forward solve the lower triangular */
1054   tmp[0] = b[*r++];
1055   for (i=1; i<n; i++) {
1056     v   = aa + ai[i] ;
1057     vi  = aj + ai[i] ;
1058     nz  = a->diag[i] - ai[i];
1059     sum = b[*r++];
1060     while (nz--) sum -= *v++ * tmp[*vi++ ];
1061     tmp[i] = sum;
1062   }
1063 
1064   /* backward solve the upper triangular */
1065   for (i=n-1; i>=0; i--){
1066     v   = aa + a->diag[i] + 1;
1067     vi  = aj + a->diag[i] + 1;
1068     nz  = ai[i+1] - a->diag[i] - 1;
1069     sum = tmp[i];
1070     while (nz--) sum -= *v++ * tmp[*vi++ ];
1071     tmp[i] = sum*aa[a->diag[i]];
1072     x[*c--] += tmp[i];
1073   }
1074 
1075   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1076   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1077   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1078   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1079   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1080 
1081   PetscFunctionReturn(0);
1082 }
1083 /* -------------------------------------------------------------------*/
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1086 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1087 {
1088   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1089   IS              iscol = a->col,isrow = a->row;
1090   PetscErrorCode  ierr;
1091   const PetscInt  *rout,*cout,*r,*c;
1092   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1093   PetscInt        nz,*diag = a->diag;
1094   PetscScalar     *x,*b,*tmp,s1;
1095   const MatScalar *aa = a->a,*v;
1096 
1097   PetscFunctionBegin;
1098   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1099   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1100   tmp  = a->solve_work;
1101 
1102   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1103   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1104 
1105   /* copy the b into temp work space according to permutation */
1106   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1107 
1108   /* forward solve the U^T */
1109   for (i=0; i<n; i++) {
1110     v   = aa + diag[i] ;
1111     vi  = aj + diag[i] + 1;
1112     nz  = ai[i+1] - diag[i] - 1;
1113     s1  = tmp[i];
1114     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1115     while (nz--) {
1116       tmp[*vi++ ] -= (*v++)*s1;
1117     }
1118     tmp[i] = s1;
1119   }
1120 
1121   /* backward solve the L^T */
1122   for (i=n-1; i>=0; i--){
1123     v   = aa + diag[i] - 1 ;
1124     vi  = aj + diag[i] - 1 ;
1125     nz  = diag[i] - ai[i];
1126     s1  = tmp[i];
1127     while (nz--) {
1128       tmp[*vi-- ] -= (*v--)*s1;
1129     }
1130   }
1131 
1132   /* copy tmp into x according to permutation */
1133   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1134 
1135   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1136   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1137   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1138   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1139 
1140   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1146 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1147 {
1148   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1149   IS              iscol = a->col,isrow = a->row;
1150   PetscErrorCode  ierr;
1151   const PetscInt  *r,*c,*rout,*cout;
1152   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1153   PetscInt        nz,*diag = a->diag;
1154   PetscScalar     *x,*b,*tmp;
1155   const MatScalar *aa = a->a,*v;
1156 
1157   PetscFunctionBegin;
1158   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1159 
1160   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1162   tmp = a->solve_work;
1163 
1164   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1165   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1166 
1167   /* copy the b into temp work space according to permutation */
1168   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1169 
1170   /* forward solve the U^T */
1171   for (i=0; i<n; i++) {
1172     v   = aa + diag[i] ;
1173     vi  = aj + diag[i] + 1;
1174     nz  = ai[i+1] - diag[i] - 1;
1175     tmp[i] *= *v++;
1176     while (nz--) {
1177       tmp[*vi++ ] -= (*v++)*tmp[i];
1178     }
1179   }
1180 
1181   /* backward solve the L^T */
1182   for (i=n-1; i>=0; i--){
1183     v   = aa + diag[i] - 1 ;
1184     vi  = aj + diag[i] - 1 ;
1185     nz  = diag[i] - ai[i];
1186     while (nz--) {
1187       tmp[*vi-- ] -= (*v--)*tmp[i];
1188     }
1189   }
1190 
1191   /* copy tmp into x according to permutation */
1192   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1193 
1194   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1195   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1196   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1197   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1198 
1199   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 /* ----------------------------------------------------------------*/
1203 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1204 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1208 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1209 {
1210   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1211   IS                 isicol;
1212   PetscErrorCode     ierr;
1213   const PetscInt     *r,*ic;
1214   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1215   PetscInt           *bi,*cols,nnz,*cols_lvl;
1216   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1217   PetscInt           i,levels,diagonal_fill;
1218   PetscTruth         col_identity,row_identity;
1219   PetscReal          f;
1220   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1221   PetscBT            lnkbt;
1222   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1223   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1224   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1225   PetscTruth         missing;
1226 
1227   PetscFunctionBegin;
1228   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1229   f             = info->fill;
1230   levels        = (PetscInt)info->levels;
1231   diagonal_fill = (PetscInt)info->diagonal_fill;
1232   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1233 
1234   /* special case that simply copies fill pattern */
1235   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1236   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1237   if (!levels && row_identity && col_identity) {
1238     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1239     fact->factor = MAT_FACTOR_ILU;
1240     (fact)->info.factor_mallocs    = 0;
1241     (fact)->info.fill_ratio_given  = info->fill;
1242     (fact)->info.fill_ratio_needed = 1.0;
1243     b               = (Mat_SeqAIJ*)(fact)->data;
1244     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1245     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1246     b->row              = isrow;
1247     b->col              = iscol;
1248     b->icol             = isicol;
1249     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1250     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1251     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1252     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1253     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1254     PetscFunctionReturn(0);
1255   }
1256 
1257   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1258   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1259 
1260   /* get new row pointers */
1261   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1262   bi[0] = 0;
1263   /* bdiag is location of diagonal in factor */
1264   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1265   bdiag[0]  = 0;
1266 
1267   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1268   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1269 
1270   /* create a linked list for storing column indices of the active row */
1271   nlnk = n + 1;
1272   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1273 
1274   /* initial FreeSpace size is f*(ai[n]+1) */
1275   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1276   current_space = free_space;
1277   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1278   current_space_lvl = free_space_lvl;
1279 
1280   for (i=0; i<n; i++) {
1281     nzi = 0;
1282     /* copy current row into linked list */
1283     nnz  = ai[r[i]+1] - ai[r[i]];
1284     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1285     cols = aj + ai[r[i]];
1286     lnk[i] = -1; /* marker to indicate if diagonal exists */
1287     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1288     nzi += nlnk;
1289 
1290     /* make sure diagonal entry is included */
1291     if (diagonal_fill && lnk[i] == -1) {
1292       fm = n;
1293       while (lnk[fm] < i) fm = lnk[fm];
1294       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1295       lnk[fm]    = i;
1296       lnk_lvl[i] = 0;
1297       nzi++; dcount++;
1298     }
1299 
1300     /* add pivot rows into the active row */
1301     nzbd = 0;
1302     prow = lnk[n];
1303     while (prow < i) {
1304       nnz      = bdiag[prow];
1305       cols     = bj_ptr[prow] + nnz + 1;
1306       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1307       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1308       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1309       nzi += nlnk;
1310       prow = lnk[prow];
1311       nzbd++;
1312     }
1313     bdiag[i] = nzbd;
1314     bi[i+1]  = bi[i] + nzi;
1315 
1316     /* if free space is not available, make more free space */
1317     if (current_space->local_remaining<nzi) {
1318       nnz = nzi*(n - i); /* estimated and max additional space needed */
1319       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1320       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1321       reallocs++;
1322     }
1323 
1324     /* copy data into free_space and free_space_lvl, then initialize lnk */
1325     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1326     bj_ptr[i]    = current_space->array;
1327     bjlvl_ptr[i] = current_space_lvl->array;
1328 
1329     /* make sure the active row i has diagonal entry */
1330     if (*(bj_ptr[i]+bdiag[i]) != i) {
1331       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1332     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1333     }
1334 
1335     current_space->array           += nzi;
1336     current_space->local_used      += nzi;
1337     current_space->local_remaining -= nzi;
1338     current_space_lvl->array           += nzi;
1339     current_space_lvl->local_used      += nzi;
1340     current_space_lvl->local_remaining -= nzi;
1341   }
1342 
1343   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1344   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1345 
1346   /* destroy list of free space and other temporary arrays */
1347   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1348   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1349   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1350   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1351   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1352 
1353 #if defined(PETSC_USE_INFO)
1354   {
1355     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1356     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1357     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1358     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1359     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1360     if (diagonal_fill) {
1361       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1362     }
1363   }
1364 #endif
1365 
1366   /* put together the new matrix */
1367   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1368   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1369   b = (Mat_SeqAIJ*)(fact)->data;
1370   b->free_a       = PETSC_TRUE;
1371   b->free_ij      = PETSC_TRUE;
1372   b->singlemalloc = PETSC_FALSE;
1373   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1374   b->j          = bj;
1375   b->i          = bi;
1376   for (i=0; i<n; i++) bdiag[i] += bi[i];
1377   b->diag       = bdiag;
1378   b->ilen       = 0;
1379   b->imax       = 0;
1380   b->row        = isrow;
1381   b->col        = iscol;
1382   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1383   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1384   b->icol       = isicol;
1385   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1386   /* In b structure:  Free imax, ilen, old a, old j.
1387      Allocate bdiag, solve_work, new a, new j */
1388   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1389   b->maxnz             = b->nz = bi[n] ;
1390   (fact)->info.factor_mallocs    = reallocs;
1391   (fact)->info.fill_ratio_given  = f;
1392   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1393   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1394   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1401 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1402 {
1403   Mat            C = B;
1404   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1405   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1406   IS             ip=b->row,iip = b->icol;
1407   PetscErrorCode ierr;
1408   const PetscInt *rip,*riip;
1409   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1410   PetscInt       *ai=a->i,*aj=a->j;
1411   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1412   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1413   PetscReal      zeropivot,rs,shiftnz;
1414   PetscReal      shiftpd;
1415   ChShift_Ctx    sctx;
1416   PetscInt       newshift;
1417   PetscTruth     perm_identity;
1418 
1419   PetscFunctionBegin;
1420 
1421   shiftnz   = info->shiftnz;
1422   shiftpd   = info->shiftpd;
1423   zeropivot = info->zeropivot;
1424 
1425   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1426   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1427 
1428   /* initialization */
1429   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1430   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1431   jl   = il + mbs;
1432   rtmp = (MatScalar*)(jl + mbs);
1433 
1434   sctx.shift_amount = 0;
1435   sctx.nshift       = 0;
1436   do {
1437     sctx.chshift = PETSC_FALSE;
1438     for (i=0; i<mbs; i++) {
1439       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1440     }
1441 
1442     for (k = 0; k<mbs; k++){
1443       bval = ba + bi[k];
1444       /* initialize k-th row by the perm[k]-th row of A */
1445       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1446       for (j = jmin; j < jmax; j++){
1447         col = riip[aj[j]];
1448         if (col >= k){ /* only take upper triangular entry */
1449           rtmp[col] = aa[j];
1450           *bval++  = 0.0; /* for in-place factorization */
1451         }
1452       }
1453       /* shift the diagonal of the matrix */
1454       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1455 
1456       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1457       dk = rtmp[k];
1458       i = jl[k]; /* first row to be added to k_th row  */
1459 
1460       while (i < k){
1461         nexti = jl[i]; /* next row to be added to k_th row */
1462 
1463         /* compute multiplier, update diag(k) and U(i,k) */
1464         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1465         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1466         dk += uikdi*ba[ili];
1467         ba[ili] = uikdi; /* -U(i,k) */
1468 
1469         /* add multiple of row i to k-th row */
1470         jmin = ili + 1; jmax = bi[i+1];
1471         if (jmin < jmax){
1472           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1473           /* update il and jl for row i */
1474           il[i] = jmin;
1475           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1476         }
1477         i = nexti;
1478       }
1479 
1480       /* shift the diagonals when zero pivot is detected */
1481       /* compute rs=sum of abs(off-diagonal) */
1482       rs   = 0.0;
1483       jmin = bi[k]+1;
1484       nz   = bi[k+1] - jmin;
1485       bcol = bj + jmin;
1486       while (nz--){
1487         rs += PetscAbsScalar(rtmp[*bcol]);
1488         bcol++;
1489       }
1490 
1491       sctx.rs = rs;
1492       sctx.pv = dk;
1493       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1494 
1495       if (newshift == 1) {
1496         if (!sctx.shift_amount) {
1497           sctx.shift_amount = 1e-5;
1498         }
1499         break;
1500       }
1501 
1502       /* copy data into U(k,:) */
1503       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1504       jmin = bi[k]+1; jmax = bi[k+1];
1505       if (jmin < jmax) {
1506         for (j=jmin; j<jmax; j++){
1507           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1508         }
1509         /* add the k-th row into il and jl */
1510         il[k] = jmin;
1511         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1512       }
1513     }
1514   } while (sctx.chshift);
1515   ierr = PetscFree(il);CHKERRQ(ierr);
1516 
1517   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1518   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1519 
1520   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1521   if (perm_identity){
1522     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1523     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1524     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1525     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1526   } else {
1527     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1528     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1529     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1530     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1531   }
1532 
1533   C->assembled    = PETSC_TRUE;
1534   C->preallocated = PETSC_TRUE;
1535   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1536   if (sctx.nshift){
1537     if (shiftnz) {
1538       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1539     } else if (shiftpd) {
1540       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1548 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1549 {
1550   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1551   Mat_SeqSBAIJ       *b;
1552   PetscErrorCode     ierr;
1553   PetscTruth         perm_identity,missing;
1554   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1555   const PetscInt     *rip,*riip;
1556   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1557   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1558   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1559   PetscReal          fill=info->fill,levels=info->levels;
1560   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1561   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1562   PetscBT            lnkbt;
1563   IS                 iperm;
1564 
1565   PetscFunctionBegin;
1566   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1567   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1568   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1569   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1570   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1571 
1572   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1573   ui[0] = 0;
1574 
1575   /* ICC(0) without matrix ordering: simply copies fill pattern */
1576   if (!levels && perm_identity) {
1577 
1578     for (i=0; i<am; i++) {
1579       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1580     }
1581     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1582     cols = uj;
1583     for (i=0; i<am; i++) {
1584       aj    = a->j + a->diag[i];
1585       ncols = ui[i+1] - ui[i];
1586       for (j=0; j<ncols; j++) *cols++ = *aj++;
1587     }
1588   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1589     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1590     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1591 
1592     /* initialization */
1593     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1594 
1595     /* jl: linked list for storing indices of the pivot rows
1596        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1597     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1598     il         = jl + am;
1599     uj_ptr     = (PetscInt**)(il + am);
1600     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1601     for (i=0; i<am; i++){
1602       jl[i] = am; il[i] = 0;
1603     }
1604 
1605     /* create and initialize a linked list for storing column indices of the active row k */
1606     nlnk = am + 1;
1607     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1608 
1609     /* initial FreeSpace size is fill*(ai[am]+1) */
1610     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1611     current_space = free_space;
1612     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1613     current_space_lvl = free_space_lvl;
1614 
1615     for (k=0; k<am; k++){  /* for each active row k */
1616       /* initialize lnk by the column indices of row rip[k] of A */
1617       nzk   = 0;
1618       ncols = ai[rip[k]+1] - ai[rip[k]];
1619       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1620       ncols_upper = 0;
1621       for (j=0; j<ncols; j++){
1622         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1623         if (riip[i] >= k){ /* only take upper triangular entry */
1624           ajtmp[ncols_upper] = i;
1625           ncols_upper++;
1626         }
1627       }
1628       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1629       nzk += nlnk;
1630 
1631       /* update lnk by computing fill-in for each pivot row to be merged in */
1632       prow = jl[k]; /* 1st pivot row */
1633 
1634       while (prow < k){
1635         nextprow = jl[prow];
1636 
1637         /* merge prow into k-th row */
1638         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1639         jmax = ui[prow+1];
1640         ncols = jmax-jmin;
1641         i     = jmin - ui[prow];
1642         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1643         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1644         j     = *(uj - 1);
1645         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1646         nzk += nlnk;
1647 
1648         /* update il and jl for prow */
1649         if (jmin < jmax){
1650           il[prow] = jmin;
1651           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1652         }
1653         prow = nextprow;
1654       }
1655 
1656       /* if free space is not available, make more free space */
1657       if (current_space->local_remaining<nzk) {
1658         i = am - k + 1; /* num of unfactored rows */
1659         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1660         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1661         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1662         reallocs++;
1663       }
1664 
1665       /* copy data into free_space and free_space_lvl, then initialize lnk */
1666       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1667       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1668 
1669       /* add the k-th row into il and jl */
1670       if (nzk > 1){
1671         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1672         jl[k] = jl[i]; jl[i] = k;
1673         il[k] = ui[k] + 1;
1674       }
1675       uj_ptr[k]     = current_space->array;
1676       uj_lvl_ptr[k] = current_space_lvl->array;
1677 
1678       current_space->array           += nzk;
1679       current_space->local_used      += nzk;
1680       current_space->local_remaining -= nzk;
1681 
1682       current_space_lvl->array           += nzk;
1683       current_space_lvl->local_used      += nzk;
1684       current_space_lvl->local_remaining -= nzk;
1685 
1686       ui[k+1] = ui[k] + nzk;
1687     }
1688 
1689 #if defined(PETSC_USE_INFO)
1690     if (ai[am] != 0) {
1691       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1692       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1693       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1694       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1695     } else {
1696       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1697     }
1698 #endif
1699 
1700     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1701     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1702     ierr = PetscFree(jl);CHKERRQ(ierr);
1703     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1704 
1705     /* destroy list of free space and other temporary array(s) */
1706     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1707     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1708     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1709     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1710 
1711   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1712 
1713   /* put together the new matrix in MATSEQSBAIJ format */
1714 
1715   b    = (Mat_SeqSBAIJ*)(fact)->data;
1716   b->singlemalloc = PETSC_FALSE;
1717   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1718   b->j    = uj;
1719   b->i    = ui;
1720   b->diag = 0;
1721   b->ilen = 0;
1722   b->imax = 0;
1723   b->row  = perm;
1724   b->col  = perm;
1725   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1726   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1727   b->icol = iperm;
1728   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1729   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1730   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1731   b->maxnz   = b->nz = ui[am];
1732   b->free_a  = PETSC_TRUE;
1733   b->free_ij = PETSC_TRUE;
1734 
1735   (fact)->info.factor_mallocs    = reallocs;
1736   (fact)->info.fill_ratio_given  = fill;
1737   if (ai[am] != 0) {
1738     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1739   } else {
1740     (fact)->info.fill_ratio_needed = 0.0;
1741   }
1742   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1748 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1749 {
1750   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1751   Mat_SeqSBAIJ       *b;
1752   PetscErrorCode     ierr;
1753   PetscTruth         perm_identity;
1754   PetscReal          fill = info->fill;
1755   const PetscInt     *rip,*riip;
1756   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1757   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1758   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1759   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1760   PetscBT            lnkbt;
1761   IS                 iperm;
1762 
1763   PetscFunctionBegin;
1764   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1765   /* check whether perm is the identity mapping */
1766   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1767   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1768   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1769   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1770 
1771   /* initialization */
1772   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1773   ui[0] = 0;
1774 
1775   /* jl: linked list for storing indices of the pivot rows
1776      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1777   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1778   il     = jl + am;
1779   cols   = il + am;
1780   ui_ptr = (PetscInt**)(cols + am);
1781   for (i=0; i<am; i++){
1782     jl[i] = am; il[i] = 0;
1783   }
1784 
1785   /* create and initialize a linked list for storing column indices of the active row k */
1786   nlnk = am + 1;
1787   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1788 
1789   /* initial FreeSpace size is fill*(ai[am]+1) */
1790   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1791   current_space = free_space;
1792 
1793   for (k=0; k<am; k++){  /* for each active row k */
1794     /* initialize lnk by the column indices of row rip[k] of A */
1795     nzk   = 0;
1796     ncols = ai[rip[k]+1] - ai[rip[k]];
1797     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1798     ncols_upper = 0;
1799     for (j=0; j<ncols; j++){
1800       i = riip[*(aj + ai[rip[k]] + j)];
1801       if (i >= k){ /* only take upper triangular entry */
1802         cols[ncols_upper] = i;
1803         ncols_upper++;
1804       }
1805     }
1806     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1807     nzk += nlnk;
1808 
1809     /* update lnk by computing fill-in for each pivot row to be merged in */
1810     prow = jl[k]; /* 1st pivot row */
1811 
1812     while (prow < k){
1813       nextprow = jl[prow];
1814       /* merge prow into k-th row */
1815       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1816       jmax = ui[prow+1];
1817       ncols = jmax-jmin;
1818       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1819       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1820       nzk += nlnk;
1821 
1822       /* update il and jl for prow */
1823       if (jmin < jmax){
1824         il[prow] = jmin;
1825         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1826       }
1827       prow = nextprow;
1828     }
1829 
1830     /* if free space is not available, make more free space */
1831     if (current_space->local_remaining<nzk) {
1832       i = am - k + 1; /* num of unfactored rows */
1833       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1834       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1835       reallocs++;
1836     }
1837 
1838     /* copy data into free space, then initialize lnk */
1839     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1840 
1841     /* add the k-th row into il and jl */
1842     if (nzk-1 > 0){
1843       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1844       jl[k] = jl[i]; jl[i] = k;
1845       il[k] = ui[k] + 1;
1846     }
1847     ui_ptr[k] = current_space->array;
1848     current_space->array           += nzk;
1849     current_space->local_used      += nzk;
1850     current_space->local_remaining -= nzk;
1851 
1852     ui[k+1] = ui[k] + nzk;
1853   }
1854 
1855 #if defined(PETSC_USE_INFO)
1856   if (ai[am] != 0) {
1857     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1858     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1859     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1860     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1861   } else {
1862      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1863   }
1864 #endif
1865 
1866   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1867   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1868   ierr = PetscFree(jl);CHKERRQ(ierr);
1869 
1870   /* destroy list of free space and other temporary array(s) */
1871   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1872   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1873   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1874 
1875   /* put together the new matrix in MATSEQSBAIJ format */
1876 
1877   b = (Mat_SeqSBAIJ*)(fact)->data;
1878   b->singlemalloc = PETSC_FALSE;
1879   b->free_a       = PETSC_TRUE;
1880   b->free_ij      = PETSC_TRUE;
1881   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1882   b->j    = uj;
1883   b->i    = ui;
1884   b->diag = 0;
1885   b->ilen = 0;
1886   b->imax = 0;
1887   b->row  = perm;
1888   b->col  = perm;
1889   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1890   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1891   b->icol = iperm;
1892   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1893   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1894   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1895   b->maxnz = b->nz = ui[am];
1896 
1897   (fact)->info.factor_mallocs    = reallocs;
1898   (fact)->info.fill_ratio_given  = fill;
1899   if (ai[am] != 0) {
1900     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1901   } else {
1902     (fact)->info.fill_ratio_needed = 0.0;
1903   }
1904   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
1910 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1911 {
1912   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data,*b;
1913   IS               isicol;
1914   PetscErrorCode   ierr;
1915   PetscInt         d,n=A->rmap->n,*ai=a->i;
1916   PetscInt         *bi,*bj,*bdiag,nnz,dtcount;
1917   PetscScalar      *ba;
1918   PetscTruth       missing;
1919 
1920   PetscFunctionBegin;
1921   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
1922   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1923   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1924 
1925   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1926 
1927   /* get new row pointers */
1928   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1929 
1930   /* bdiag is location of diagonal in factor */
1931   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1932 
1933   /* max num of nonzero entries is (ai[n]+2*n*dtcount+1) */
1934   dtcount = (PetscInt)info->dtcount;
1935   nnz  = ai[n]+2*n*dtcount+1;
1936   ierr = PetscMalloc2(nnz,PetscInt,&bj,nnz,PetscScalar,&ba);CHKERRQ(ierr);
1937   /* printf("nnz %d\n",nnz); */
1938 
1939   /* put together the new matrix */
1940   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1941   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
1942   b    = (Mat_SeqAIJ*)(B)->data;
1943   b->free_a       = PETSC_TRUE;
1944   b->free_ij      = PETSC_TRUE;
1945   b->singlemalloc = PETSC_FALSE;
1946   b->a          = ba;
1947   b->j          = bj;
1948   b->i          = bi;
1949   b->diag       = bdiag;
1950   b->ilen       = 0;
1951   b->imax       = 0;
1952   b->row        = isrow;
1953   b->col        = iscol;
1954   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1955   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1956   b->icol       = isicol;
1957   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1958 
1959   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1960 
1961   b->maxnz = b->nz = nnz;
1962 
1963   (B)->factor                = MAT_FACTOR_ILUDT;
1964   (B)->info.factor_mallocs   = 0;
1965   (B)->info.fill_ratio_given = ((PetscReal)nnz)/((PetscReal)ai[n]);
1966   B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
1972 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1973 {
1974   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1975   IS                 isrow=b->row,isicol=b->icol;
1976   PetscErrorCode     ierr;
1977   const PetscInt     *r,*ic;
1978   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag=a->diag;
1979   PetscInt           *bi=b->i,*bj=b->j,*bdiag=b->diag;
1980   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au;
1981   PetscInt           nlnk,*lnk;
1982   PetscBT            lnkbt;
1983   PetscTruth         row_identity,icol_identity,both_identity;
1984   MatScalar          *v,*pv,*batmp,*ba=b->a,*rtmp,*pc,multiplier,*vtmp;
1985   const PetscInt     *ics;
1986   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
1987   PetscReal          dt=info->dt;
1988 
1989   PetscFunctionBegin;
1990   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1991   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1992   ics  = ic;
1993 
1994   /* linked list for storing column indices of the active row */
1995   nlnk = n + 1;
1996   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1997   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
1998   ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1999   vtmp = rtmp + n;
2000   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&jtmp);CHKERRQ(ierr);
2001 
2002   dtcount = (PetscInt)info->dtcount;
2003 
2004   bi[0]    = 0;
2005   bdiag[0] = 0; /* location of diagonal in factor B */
2006   for (i=0; i<n; i++) {
2007     /* copy initial fill into linked list */
2008     nzi = 0; /* nonzeros for active row i */
2009     nzi = ai[r[i]+1] - ai[r[i]];
2010     if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
2011     nzi_al = adiag[r[i]] - ai[r[i]];
2012     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
2013     ajtmp = aj + ai[r[i]];
2014     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2015 
2016     /* load in initial (unfactored row) */
2017     ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2018     v = a->a + ai[r[i]];
2019     for (j=0; j<nzi; j++) {
2020       rtmp[ics[ajtmp[j]]] = v[j];
2021     }
2022 
2023     /* add pivot rows into linked list */
2024     row = lnk[n];
2025     while (row < i) {
2026       nzi_bl  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
2027       ajtmp = bj + bi[row] + nzi_bl; /* points to the column next to the diagonal */
2028       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
2029       nzi  += nlnk;
2030       row   = lnk[row];
2031     }
2032 
2033     /* copy data from lnk into jtmp, then initialize lnk */
2034     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
2035 
2036     /* numerical factorization */
2037     bjtmp = jtmp;
2038     row = *bjtmp++; /* 1st pivot row */
2039     while  (row < i) {
2040       pc = rtmp + row;
2041       /* apply tolerance dropping rule */
2042       if (PetscAbsScalar(*pc) > dt){
2043         pv         = ba + bdiag[row]; /* diag of the pivot row */
2044         pj         = bj + bdiag[row] + 1;
2045         multiplier = *pc / *pv++;
2046         *pc        = multiplier;
2047         nz         = bi[row+1] - bdiag[row] - 1;
2048         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
2049         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
2050       }
2051       row = *bjtmp++;
2052     }
2053 
2054     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
2055     /* printf("row %d:\n",i); */
2056     nzi_bl = 0; j = 0;
2057     while (jtmp[j] < i){ /* Note: jtmp is sorted */
2058       vtmp[j] = rtmp[jtmp[j]];
2059       nzi_bl++; j++;
2060     }
2061     nzi_bu = nzi - nzi_bl -1;
2062     while (j < nzi){
2063       vtmp[j] = rtmp[jtmp[j]];
2064       j++;
2065     }
2066 
2067     bjtmp = bj + bi[i];
2068     batmp = ba + bi[i];
2069     /* apply level dropping rule to L part */
2070     ncut = nzi_al + dtcount;
2071     if (ncut < nzi_bl){
2072       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
2073       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
2074     } else {
2075       ncut = nzi_bl;
2076     }
2077     for (j=0; j<ncut; j++){
2078       bjtmp[j] = jtmp[j];
2079       batmp[j] = vtmp[j];
2080       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
2081     }
2082 
2083     /* mark bdiagonal */
2084     bdiag[i] = bi[i] + ncut;
2085     bjtmp[ncut] = i;
2086     batmp[ncut] = rtmp[i];
2087     /* printf(" diag(%d,%g),",bjtmp[ncut],batmp[ncut]); */
2088     j++;
2089     nzi = ncut + 1;
2090 
2091     /* apply level dropping rule to U part */
2092     ncut = nzi_au + dtcount;
2093     if (ncut < nzi_bu){
2094       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
2095       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
2096     } else {
2097       ncut = nzi_bu;
2098     }
2099     nzi += ncut;
2100     /* printf(" \nU level dropping ..."); */
2101     for (k=0; k<ncut; k++){
2102       bjtmp[j] = jtmp[nzi_bl+1+k];
2103       batmp[j] = vtmp[nzi_bl+1+k];
2104       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
2105       j++;
2106     }
2107 
2108     bi[i+1] = bi[i] + nzi;
2109     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
2110     /*
2111       printf("bi[%d] = %d\n",i+1,bi[i+1]);
2112       printf(" ----------------------------\n");
2113     */
2114   } /* for (i=0; i<n; i++) */
2115 
2116   /* invert diagonal entries for simplier triangular solves */
2117   for (i=0; i<n; i++) {
2118     ba[bdiag[i]] = 1.0/ba[bdiag[i]];
2119   }
2120 
2121   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2122   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2123 
2124   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2125   ierr = PetscFree(im);CHKERRQ(ierr);
2126   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2127   ierr = PetscFree(jtmp);CHKERRQ(ierr);
2128 
2129   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
2130 
2131   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2132   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
2133   both_identity = (PetscTruth) (row_identity && icol_identity);
2134 
2135   if (row_identity && icol_identity) {
2136     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
2137   } else {
2138     B->ops->solve = MatSolve_SeqAIJ;
2139   }
2140 
2141   B->ops->solveadd          = MatSolveAdd_SeqAIJ;
2142   B->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
2143   B->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
2144   B->ops->matsolve          = MatMatSolve_SeqAIJ;
2145   B->assembled              = PETSC_TRUE;
2146   B->preallocated           = PETSC_TRUE;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150