xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 1d098868d584ebb02ecc9dede33e6e9ce022b7a5)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ_saad"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ_saad(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252   else         (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253   (*fact)->ops->solveadd           = MatSolveAdd_SeqAIJ;
254   (*fact)->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
255   (*fact)->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
256 
257   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267 {
268   PetscFunctionBegin;
269   *flg = PETSC_TRUE;
270   PetscFunctionReturn(0);
271 }
272 EXTERN_C_END
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278 {
279   PetscInt           n = A->rmap->n;
280   PetscErrorCode     ierr;
281 
282   PetscFunctionBegin;
283   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
284   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
285   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
286     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
287     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
288     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
289     (*B)->ops->iludtfactor       = MatILUDTFactor_SeqAIJ;
290   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
291     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
292     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
293     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
294     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
295   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
296   (*B)->factor = ftype;
297   PetscFunctionReturn(0);
298 }
299 EXTERN_C_END
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
303 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
304 {
305   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
306   IS                 isicol;
307   PetscErrorCode     ierr;
308   const PetscInt     *r,*ic;
309   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
310   PetscInt           *bi,*bj,*ajtmp;
311   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
312   PetscReal          f;
313   PetscInt           nlnk,*lnk,k,**bi_ptr;
314   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
315   PetscBT            lnkbt;
316 
317   PetscFunctionBegin;
318   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
319   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
320   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
321   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
322 
323   /* get new row pointers */
324   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
325   bi[0] = 0;
326 
327   /* bdiag is location of diagonal in factor */
328   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
329   bdiag[0] = 0;
330 
331   /* linked list for storing column indices of the active row */
332   nlnk = n + 1;
333   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
334 
335   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
336 
337   /* initial FreeSpace size is f*(ai[n]+1) */
338   f = info->fill;
339   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
340   current_space = free_space;
341 
342   for (i=0; i<n; i++) {
343     /* copy previous fill into linked list */
344     nzi = 0;
345     nnz = ai[r[i]+1] - ai[r[i]];
346     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
347     ajtmp = aj + ai[r[i]];
348     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
349     nzi += nlnk;
350 
351     /* add pivot rows into linked list */
352     row = lnk[n];
353     while (row < i) {
354       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
355       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
356       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
357       nzi += nlnk;
358       row  = lnk[row];
359     }
360     bi[i+1] = bi[i] + nzi;
361     im[i]   = nzi;
362 
363     /* mark bdiag */
364     nzbd = 0;
365     nnz  = nzi;
366     k    = lnk[n];
367     while (nnz-- && k < i){
368       nzbd++;
369       k = lnk[k];
370     }
371     bdiag[i] = bi[i] + nzbd;
372 
373     /* if free space is not available, make more free space */
374     if (current_space->local_remaining<nzi) {
375       nnz = (n - i)*nzi; /* estimated and max additional space needed */
376       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
377       reallocs++;
378     }
379 
380     /* copy data into free space, then initialize lnk */
381     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
382     bi_ptr[i] = current_space->array;
383     current_space->array           += nzi;
384     current_space->local_used      += nzi;
385     current_space->local_remaining -= nzi;
386   }
387 #if defined(PETSC_USE_INFO)
388   if (ai[n] != 0) {
389     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
390     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
391     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
392     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
393     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
394   } else {
395     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
396   }
397 #endif
398 
399   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
400   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
401 
402   /* destroy list of free space and other temporary array(s) */
403   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
404   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
405   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
406   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
407 
408   /* put together the new matrix */
409   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
410   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
411   b    = (Mat_SeqAIJ*)(B)->data;
412   b->free_a       = PETSC_TRUE;
413   b->free_ij      = PETSC_TRUE;
414   b->singlemalloc = PETSC_FALSE;
415   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
416   b->j          = bj;
417   b->i          = bi;
418   b->diag       = bdiag;
419   b->ilen       = 0;
420   b->imax       = 0;
421   b->row        = isrow;
422   b->col        = iscol;
423   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
424   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
425   b->icol       = isicol;
426   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
427 
428   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
429   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
430   b->maxnz = b->nz = bi[n] ;
431 
432   (B)->factor                = MAT_FACTOR_LU;
433   (B)->info.factor_mallocs   = reallocs;
434   (B)->info.fill_ratio_given = f;
435 
436   if (ai[n]) {
437     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
438   } else {
439     (B)->info.fill_ratio_needed = 0.0;
440   }
441   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
442   (B)->ops->solve            = MatSolve_SeqAIJ;
443   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
444   /* switch to inodes if appropriate */
445   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 /*
450     Trouble in factorization, should we dump the original matrix?
451 */
452 #undef __FUNCT__
453 #define __FUNCT__ "MatFactorDumpMatrix"
454 PetscErrorCode MatFactorDumpMatrix(Mat A)
455 {
456   PetscErrorCode ierr;
457   PetscTruth     flg = PETSC_FALSE;
458 
459   PetscFunctionBegin;
460   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
461   if (flg) {
462     PetscViewer viewer;
463     char        filename[PETSC_MAX_PATH_LEN];
464 
465     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
466     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
467     ierr = MatView(A,viewer);CHKERRQ(ierr);
468     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
469   }
470   PetscFunctionReturn(0);
471 }
472 
473 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
474 
475 /* ----------------------------------------------------------- */
476 #undef __FUNCT__
477 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
478 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
479 {
480   Mat            C=B;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
482   IS             isrow = b->row,isicol = b->icol;
483   PetscErrorCode ierr;
484   const PetscInt  *r,*ic,*ics;
485   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
486   PetscInt       *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj;
487   MatScalar      *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a;
488   PetscReal      rs = 0.0;
489   LUShift_Ctx    sctx;
490   PetscInt       newshift,*ddiag;
491 
492   PetscFunctionBegin;
493   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
494   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
495   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
496   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
497   ics  = ic;
498 
499   sctx.shift_top      = 0;
500   sctx.nshift_max     = 0;
501   sctx.shift_lo       = 0;
502   sctx.shift_hi       = 0;
503   sctx.shift_fraction = 0;
504 
505   /* if both shift schemes are chosen by user, only use info->shiftpd */
506   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
507     ddiag          = a->diag;
508     sctx.shift_top = info->zeropivot;
509     for (i=0; i<n; i++) {
510       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
511       d  = (aa)[ddiag[i]];
512       rs = -PetscAbsScalar(d) - PetscRealPart(d);
513       v  = aa+ai[i];
514       nz = ai[i+1] - ai[i];
515       for (j=0; j<nz; j++)
516 	rs += PetscAbsScalar(v[j]);
517       if (rs>sctx.shift_top) sctx.shift_top = rs;
518     }
519     sctx.shift_top   *= 1.1;
520     sctx.nshift_max   = 5;
521     sctx.shift_lo     = 0.;
522     sctx.shift_hi     = 1.;
523   }
524 
525   sctx.shift_amount = 0.0;
526   sctx.nshift       = 0;
527   do {
528     sctx.lushift = PETSC_FALSE;
529     for (i=0; i<n; i++){
530       nz    = bi[i+1] - bi[i];
531       bjtmp = bj + bi[i];
532       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
533 
534       /* load in initial (unfactored row) */
535       nz    = ai[r[i]+1] - ai[r[i]];
536       ajtmp = aj + ai[r[i]];
537       v     = aa + ai[r[i]];
538       for (j=0; j<nz; j++) {
539         rtmp[ics[ajtmp[j]]] = v[j];
540       }
541       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
542 
543       row = *bjtmp++;
544       while  (row < i) {
545         pc = rtmp + row;
546         if (*pc != 0.0) {
547           pv         = b->a + diag_offset[row];
548           pj         = b->j + diag_offset[row] + 1;
549           multiplier = *pc / *pv++;
550           *pc        = multiplier;
551           nz         = bi[row+1] - diag_offset[row] - 1;
552           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
553           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
554         }
555         row = *bjtmp++;
556       }
557       /* finished row so stick it into b->a */
558       pv   = b->a + bi[i] ;
559       pj   = b->j + bi[i] ;
560       nz   = bi[i+1] - bi[i];
561       diag = diag_offset[i] - bi[i];
562       rs   = -PetscAbsScalar(pv[diag]);
563       for (j=0; j<nz; j++) {
564         pv[j] = rtmp[pj[j]];
565         rs   += PetscAbsScalar(pv[j]);
566       }
567 
568       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
569       sctx.rs  = rs;
570       sctx.pv  = pv[diag];
571       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
572       if (newshift == 1) break;
573     }
574 
575     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
576       /*
577        * if no shift in this attempt & shifting & started shifting & can refine,
578        * then try lower shift
579        */
580       sctx.shift_hi       = sctx.shift_fraction;
581       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
582       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
583       sctx.lushift        = PETSC_TRUE;
584       sctx.nshift++;
585     }
586   } while (sctx.lushift);
587 
588   /* invert diagonal entries for simplier triangular solves */
589   for (i=0; i<n; i++) {
590     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
591   }
592   ierr = PetscFree(rtmp);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
594   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
595   if (b->inode.use) {
596     C->ops->solve   = MatSolve_Inode;
597   } else {
598     PetscTruth row_identity, col_identity;
599     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
600     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
601     if (row_identity && col_identity) {
602       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
603     } else {
604       C->ops->solve   = MatSolve_SeqAIJ;
605     }
606   }
607   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
608   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
609   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
610   C->ops->matsolve           = MatMatSolve_SeqAIJ;
611   C->assembled    = PETSC_TRUE;
612   C->preallocated = PETSC_TRUE;
613   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
614   if (sctx.nshift){
615      if (info->shiftpd) {
616       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
617     } else if (info->shiftnz) {
618       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
619     }
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 /*
625    This routine implements inplace ILU(0) with row or/and column permutations.
626    Input:
627      A - original matrix
628    Output;
629      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
630          a->j (col index) is permuted by the inverse of colperm, then sorted
631          a->a reordered accordingly with a->j
632          a->diag (ptr to diagonal elements) is updated.
633 */
634 #undef __FUNCT__
635 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
636 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
637 {
638   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
639   IS             isrow = a->row,isicol = a->icol;
640   PetscErrorCode ierr;
641   const PetscInt *r,*ic,*ics;
642   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
643   PetscInt       *ajtmp,nz,row;
644   PetscInt       *diag = a->diag,nbdiag,*pj;
645   PetscScalar    *rtmp,*pc,multiplier,d;
646   MatScalar      *v,*pv;
647   PetscReal      rs;
648   LUShift_Ctx    sctx;
649   PetscInt       newshift;
650 
651   PetscFunctionBegin;
652   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
653   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
654   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
655   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
656   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
657   ics = ic;
658 
659   sctx.shift_top      = 0;
660   sctx.nshift_max     = 0;
661   sctx.shift_lo       = 0;
662   sctx.shift_hi       = 0;
663   sctx.shift_fraction = 0;
664 
665   /* if both shift schemes are chosen by user, only use info->shiftpd */
666   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
667     sctx.shift_top = 0;
668     for (i=0; i<n; i++) {
669       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
670       d  = (a->a)[diag[i]];
671       rs = -PetscAbsScalar(d) - PetscRealPart(d);
672       v  = a->a+ai[i];
673       nz = ai[i+1] - ai[i];
674       for (j=0; j<nz; j++)
675 	rs += PetscAbsScalar(v[j]);
676       if (rs>sctx.shift_top) sctx.shift_top = rs;
677     }
678     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
679     sctx.shift_top    *= 1.1;
680     sctx.nshift_max   = 5;
681     sctx.shift_lo     = 0.;
682     sctx.shift_hi     = 1.;
683   }
684 
685   sctx.shift_amount = 0;
686   sctx.nshift       = 0;
687   do {
688     sctx.lushift = PETSC_FALSE;
689     for (i=0; i<n; i++){
690       /* load in initial unfactored row */
691       nz    = ai[r[i]+1] - ai[r[i]];
692       ajtmp = aj + ai[r[i]];
693       v     = a->a + ai[r[i]];
694       /* sort permuted ajtmp and values v accordingly */
695       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
696       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
697 
698       diag[r[i]] = ai[r[i]];
699       for (j=0; j<nz; j++) {
700         rtmp[ajtmp[j]] = v[j];
701         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
702       }
703       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
704 
705       row = *ajtmp++;
706       while  (row < i) {
707         pc = rtmp + row;
708         if (*pc != 0.0) {
709           pv         = a->a + diag[r[row]];
710           pj         = aj + diag[r[row]] + 1;
711 
712           multiplier = *pc / *pv++;
713           *pc        = multiplier;
714           nz         = ai[r[row]+1] - diag[r[row]] - 1;
715           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
716           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
717         }
718         row = *ajtmp++;
719       }
720       /* finished row so overwrite it onto a->a */
721       pv   = a->a + ai[r[i]] ;
722       pj   = aj + ai[r[i]] ;
723       nz   = ai[r[i]+1] - ai[r[i]];
724       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
725 
726       rs   = 0.0;
727       for (j=0; j<nz; j++) {
728         pv[j] = rtmp[pj[j]];
729         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
730       }
731 
732       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
733       sctx.rs  = rs;
734       sctx.pv  = pv[nbdiag];
735       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
736       if (newshift == 1) break;
737     }
738 
739     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
740       /*
741        * if no shift in this attempt & shifting & started shifting & can refine,
742        * then try lower shift
743        */
744       sctx.shift_hi        = sctx.shift_fraction;
745       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
746       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
747       sctx.lushift         = PETSC_TRUE;
748       sctx.nshift++;
749     }
750   } while (sctx.lushift);
751 
752   /* invert diagonal entries for simplier triangular solves */
753   for (i=0; i<n; i++) {
754     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
755   }
756 
757   ierr = PetscFree(rtmp);CHKERRQ(ierr);
758   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
759   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
760   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
761   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
762   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
763   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
764   A->assembled = PETSC_TRUE;
765   A->preallocated = PETSC_TRUE;
766   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
767   if (sctx.nshift){
768     if (info->shiftpd) {
769       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
770     } else if (info->shiftnz) {
771       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
772     }
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 /* ----------------------------------------------------------- */
778 #undef __FUNCT__
779 #define __FUNCT__ "MatLUFactor_SeqAIJ"
780 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
781 {
782   PetscErrorCode ierr;
783   Mat            C;
784 
785   PetscFunctionBegin;
786   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
787   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
788   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
789   A->ops->solve            = C->ops->solve;
790   A->ops->solvetranspose   = C->ops->solvetranspose;
791   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
792   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 /* ----------------------------------------------------------- */
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatSolve_SeqAIJ"
799 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
800 {
801   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
802   IS                iscol = a->col,isrow = a->row;
803   PetscErrorCode    ierr;
804   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
805   PetscInt          nz;
806   const PetscInt    *rout,*cout,*r,*c;
807   PetscScalar       *x,*tmp,*tmps,sum;
808   const PetscScalar *b;
809   const MatScalar   *aa = a->a,*v;
810 
811   PetscFunctionBegin;
812   if (!n) PetscFunctionReturn(0);
813 
814   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
815   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
816   tmp  = a->solve_work;
817 
818   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
819   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
820 
821   /* forward solve the lower triangular */
822   tmp[0] = b[*r++];
823   tmps   = tmp;
824   for (i=1; i<n; i++) {
825     v   = aa + ai[i] ;
826     vi  = aj + ai[i] ;
827     nz  = a->diag[i] - ai[i];
828     sum = b[*r++];
829     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
830     tmp[i] = sum;
831   }
832 
833   /* backward solve the upper triangular */
834   for (i=n-1; i>=0; i--){
835     v   = aa + a->diag[i] + 1;
836     vi  = aj + a->diag[i] + 1;
837     nz  = ai[i+1] - a->diag[i] - 1;
838     sum = tmp[i];
839     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
840     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
841   }
842 
843   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
844   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
845   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
846   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
847   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMatSolve_SeqAIJ"
853 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
854 {
855   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
856   IS              iscol = a->col,isrow = a->row;
857   PetscErrorCode  ierr;
858   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
859   PetscInt        nz,neq;
860   const PetscInt  *rout,*cout,*r,*c;
861   PetscScalar     *x,*b,*tmp,*tmps,sum;
862   const MatScalar *aa = a->a,*v;
863   PetscTruth      bisdense,xisdense;
864 
865   PetscFunctionBegin;
866   if (!n) PetscFunctionReturn(0);
867 
868   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
869   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
870   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
871   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
872 
873   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
874   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
875 
876   tmp  = a->solve_work;
877   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
878   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
879 
880   for (neq=0; neq<B->cmap->n; neq++){
881     /* forward solve the lower triangular */
882     tmp[0] = b[r[0]];
883     tmps   = tmp;
884     for (i=1; i<n; i++) {
885       v   = aa + ai[i] ;
886       vi  = aj + ai[i] ;
887       nz  = a->diag[i] - ai[i];
888       sum = b[r[i]];
889       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
890       tmp[i] = sum;
891     }
892     /* backward solve the upper triangular */
893     for (i=n-1; i>=0; i--){
894       v   = aa + a->diag[i] + 1;
895       vi  = aj + a->diag[i] + 1;
896       nz  = ai[i+1] - a->diag[i] - 1;
897       sum = tmp[i];
898       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
899       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
900     }
901 
902     b += n;
903     x += n;
904   }
905   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
906   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
907   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
908   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
909   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
915 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
916 {
917   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
918   IS              iscol = a->col,isrow = a->row;
919   PetscErrorCode  ierr;
920   const PetscInt  *r,*c,*rout,*cout;
921   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
922   PetscInt        nz,row;
923   PetscScalar     *x,*b,*tmp,*tmps,sum;
924   const MatScalar *aa = a->a,*v;
925 
926   PetscFunctionBegin;
927   if (!n) PetscFunctionReturn(0);
928 
929   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
930   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
931   tmp  = a->solve_work;
932 
933   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
934   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
935 
936   /* forward solve the lower triangular */
937   tmp[0] = b[*r++];
938   tmps   = tmp;
939   for (row=1; row<n; row++) {
940     i   = rout[row]; /* permuted row */
941     v   = aa + ai[i] ;
942     vi  = aj + ai[i] ;
943     nz  = a->diag[i] - ai[i];
944     sum = b[*r++];
945     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
946     tmp[row] = sum;
947   }
948 
949   /* backward solve the upper triangular */
950   for (row=n-1; row>=0; row--){
951     i   = rout[row]; /* permuted row */
952     v   = aa + a->diag[i] + 1;
953     vi  = aj + a->diag[i] + 1;
954     nz  = ai[i+1] - a->diag[i] - 1;
955     sum = tmp[row];
956     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
957     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
958   }
959 
960   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
961   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
962   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 /* ----------------------------------------------------------- */
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
971 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
972 {
973   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
974   PetscErrorCode    ierr;
975   PetscInt          n = A->rmap->n;
976   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
977   PetscScalar       *x;
978   const PetscScalar *b;
979   const MatScalar   *aa = a->a;
980 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
981   PetscInt          adiag_i,i,nz,ai_i;
982   const MatScalar   *v;
983   PetscScalar       sum;
984 #endif
985 
986   PetscFunctionBegin;
987   if (!n) PetscFunctionReturn(0);
988 
989   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
990   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
991 
992 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
993   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
994 #else
995   /* forward solve the lower triangular */
996   x[0] = b[0];
997   for (i=1; i<n; i++) {
998     ai_i = ai[i];
999     v    = aa + ai_i;
1000     vi   = aj + ai_i;
1001     nz   = adiag[i] - ai_i;
1002     sum  = b[i];
1003     while (nz--) sum -= *v++ * x[*vi++];
1004     x[i] = sum;
1005   }
1006 
1007   /* backward solve the upper triangular */
1008   for (i=n-1; i>=0; i--){
1009     adiag_i = adiag[i];
1010     v       = aa + adiag_i + 1;
1011     vi      = aj + adiag_i + 1;
1012     nz      = ai[i+1] - adiag_i - 1;
1013     sum     = x[i];
1014     while (nz--) sum -= *v++ * x[*vi++];
1015     x[i]    = sum*aa[adiag_i];
1016   }
1017 #endif
1018   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1019   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1020   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1026 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1027 {
1028   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1029   IS              iscol = a->col,isrow = a->row;
1030   PetscErrorCode  ierr;
1031   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1032   PetscInt        nz;
1033   const PetscInt  *rout,*cout,*r,*c;
1034   PetscScalar     *x,*b,*tmp,sum;
1035   const MatScalar *aa = a->a,*v;
1036 
1037   PetscFunctionBegin;
1038   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1039 
1040   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1041   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1042   tmp  = a->solve_work;
1043 
1044   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1045   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1046 
1047   /* forward solve the lower triangular */
1048   tmp[0] = b[*r++];
1049   for (i=1; i<n; i++) {
1050     v   = aa + ai[i] ;
1051     vi  = aj + ai[i] ;
1052     nz  = a->diag[i] - ai[i];
1053     sum = b[*r++];
1054     while (nz--) sum -= *v++ * tmp[*vi++ ];
1055     tmp[i] = sum;
1056   }
1057 
1058   /* backward solve the upper triangular */
1059   for (i=n-1; i>=0; i--){
1060     v   = aa + a->diag[i] + 1;
1061     vi  = aj + a->diag[i] + 1;
1062     nz  = ai[i+1] - a->diag[i] - 1;
1063     sum = tmp[i];
1064     while (nz--) sum -= *v++ * tmp[*vi++ ];
1065     tmp[i] = sum*aa[a->diag[i]];
1066     x[*c--] += tmp[i];
1067   }
1068 
1069   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1070   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1071   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 /* -------------------------------------------------------------------*/
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1080 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1081 {
1082   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1083   IS              iscol = a->col,isrow = a->row;
1084   PetscErrorCode  ierr;
1085   const PetscInt  *rout,*cout,*r,*c;
1086   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1087   PetscInt        nz,*diag = a->diag;
1088   PetscScalar     *x,*b,*tmp,s1;
1089   const MatScalar *aa = a->a,*v;
1090 
1091   PetscFunctionBegin;
1092   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1093   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1094   tmp  = a->solve_work;
1095 
1096   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1097   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1098 
1099   /* copy the b into temp work space according to permutation */
1100   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1101 
1102   /* forward solve the U^T */
1103   for (i=0; i<n; i++) {
1104     v   = aa + diag[i] ;
1105     vi  = aj + diag[i] + 1;
1106     nz  = ai[i+1] - diag[i] - 1;
1107     s1  = tmp[i];
1108     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1109     while (nz--) {
1110       tmp[*vi++ ] -= (*v++)*s1;
1111     }
1112     tmp[i] = s1;
1113   }
1114 
1115   /* backward solve the L^T */
1116   for (i=n-1; i>=0; i--){
1117     v   = aa + diag[i] - 1 ;
1118     vi  = aj + diag[i] - 1 ;
1119     nz  = diag[i] - ai[i];
1120     s1  = tmp[i];
1121     while (nz--) {
1122       tmp[*vi-- ] -= (*v--)*s1;
1123     }
1124   }
1125 
1126   /* copy tmp into x according to permutation */
1127   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1128 
1129   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1130   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1132   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1133 
1134   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1140 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1141 {
1142   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1143   IS              iscol = a->col,isrow = a->row;
1144   PetscErrorCode  ierr;
1145   const PetscInt  *r,*c,*rout,*cout;
1146   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1147   PetscInt        nz,*diag = a->diag;
1148   PetscScalar     *x,*b,*tmp;
1149   const MatScalar *aa = a->a,*v;
1150 
1151   PetscFunctionBegin;
1152   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1153 
1154   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1155   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1156   tmp = a->solve_work;
1157 
1158   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1159   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1160 
1161   /* copy the b into temp work space according to permutation */
1162   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1163 
1164   /* forward solve the U^T */
1165   for (i=0; i<n; i++) {
1166     v   = aa + diag[i] ;
1167     vi  = aj + diag[i] + 1;
1168     nz  = ai[i+1] - diag[i] - 1;
1169     tmp[i] *= *v++;
1170     while (nz--) {
1171       tmp[*vi++ ] -= (*v++)*tmp[i];
1172     }
1173   }
1174 
1175   /* backward solve the L^T */
1176   for (i=n-1; i>=0; i--){
1177     v   = aa + diag[i] - 1 ;
1178     vi  = aj + diag[i] - 1 ;
1179     nz  = diag[i] - ai[i];
1180     while (nz--) {
1181       tmp[*vi-- ] -= (*v--)*tmp[i];
1182     }
1183   }
1184 
1185   /* copy tmp into x according to permutation */
1186   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1187 
1188   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1189   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1190   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1191   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1192 
1193   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 /* ----------------------------------------------------------------*/
1197 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1198 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1202 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1203 {
1204   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1205   IS                 isicol;
1206   PetscErrorCode     ierr;
1207   const PetscInt     *r,*ic;
1208   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1209   PetscInt           *bi,*cols,nnz,*cols_lvl;
1210   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1211   PetscInt           i,levels,diagonal_fill;
1212   PetscTruth         col_identity,row_identity;
1213   PetscReal          f;
1214   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1215   PetscBT            lnkbt;
1216   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1217   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1218   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1219   PetscTruth         missing;
1220 
1221   PetscFunctionBegin;
1222   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1223   f             = info->fill;
1224   levels        = (PetscInt)info->levels;
1225   diagonal_fill = (PetscInt)info->diagonal_fill;
1226   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1227 
1228   /* special case that simply copies fill pattern */
1229   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1230   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1231   if (!levels && row_identity && col_identity) {
1232     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1233     fact->factor = MAT_FACTOR_ILU;
1234     (fact)->info.factor_mallocs    = 0;
1235     (fact)->info.fill_ratio_given  = info->fill;
1236     (fact)->info.fill_ratio_needed = 1.0;
1237     b               = (Mat_SeqAIJ*)(fact)->data;
1238     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1239     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1240     b->row              = isrow;
1241     b->col              = iscol;
1242     b->icol             = isicol;
1243     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1244     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1245     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1246     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1247     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1248     PetscFunctionReturn(0);
1249   }
1250 
1251   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1252   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1253 
1254   /* get new row pointers */
1255   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1256   bi[0] = 0;
1257   /* bdiag is location of diagonal in factor */
1258   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1259   bdiag[0]  = 0;
1260 
1261   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1262   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1263 
1264   /* create a linked list for storing column indices of the active row */
1265   nlnk = n + 1;
1266   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1267 
1268   /* initial FreeSpace size is f*(ai[n]+1) */
1269   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1270   current_space = free_space;
1271   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1272   current_space_lvl = free_space_lvl;
1273 
1274   for (i=0; i<n; i++) {
1275     nzi = 0;
1276     /* copy current row into linked list */
1277     nnz  = ai[r[i]+1] - ai[r[i]];
1278     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1279     cols = aj + ai[r[i]];
1280     lnk[i] = -1; /* marker to indicate if diagonal exists */
1281     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1282     nzi += nlnk;
1283 
1284     /* make sure diagonal entry is included */
1285     if (diagonal_fill && lnk[i] == -1) {
1286       fm = n;
1287       while (lnk[fm] < i) fm = lnk[fm];
1288       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1289       lnk[fm]    = i;
1290       lnk_lvl[i] = 0;
1291       nzi++; dcount++;
1292     }
1293 
1294     /* add pivot rows into the active row */
1295     nzbd = 0;
1296     prow = lnk[n];
1297     while (prow < i) {
1298       nnz      = bdiag[prow];
1299       cols     = bj_ptr[prow] + nnz + 1;
1300       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1301       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1302       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1303       nzi += nlnk;
1304       prow = lnk[prow];
1305       nzbd++;
1306     }
1307     bdiag[i] = nzbd;
1308     bi[i+1]  = bi[i] + nzi;
1309 
1310     /* if free space is not available, make more free space */
1311     if (current_space->local_remaining<nzi) {
1312       nnz = nzi*(n - i); /* estimated and max additional space needed */
1313       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1314       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1315       reallocs++;
1316     }
1317 
1318     /* copy data into free_space and free_space_lvl, then initialize lnk */
1319     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1320     bj_ptr[i]    = current_space->array;
1321     bjlvl_ptr[i] = current_space_lvl->array;
1322 
1323     /* make sure the active row i has diagonal entry */
1324     if (*(bj_ptr[i]+bdiag[i]) != i) {
1325       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1326     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1327     }
1328 
1329     current_space->array           += nzi;
1330     current_space->local_used      += nzi;
1331     current_space->local_remaining -= nzi;
1332     current_space_lvl->array           += nzi;
1333     current_space_lvl->local_used      += nzi;
1334     current_space_lvl->local_remaining -= nzi;
1335   }
1336 
1337   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1339 
1340   /* destroy list of free space and other temporary arrays */
1341   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1342   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1343   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1344   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1345   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1346 
1347 #if defined(PETSC_USE_INFO)
1348   {
1349     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1350     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1351     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1352     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1353     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1354     if (diagonal_fill) {
1355       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1356     }
1357   }
1358 #endif
1359 
1360   /* put together the new matrix */
1361   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1362   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1363   b = (Mat_SeqAIJ*)(fact)->data;
1364   b->free_a       = PETSC_TRUE;
1365   b->free_ij      = PETSC_TRUE;
1366   b->singlemalloc = PETSC_FALSE;
1367   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1368   b->j          = bj;
1369   b->i          = bi;
1370   for (i=0; i<n; i++) bdiag[i] += bi[i];
1371   b->diag       = bdiag;
1372   b->ilen       = 0;
1373   b->imax       = 0;
1374   b->row        = isrow;
1375   b->col        = iscol;
1376   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1377   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1378   b->icol       = isicol;
1379   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1380   /* In b structure:  Free imax, ilen, old a, old j.
1381      Allocate bdiag, solve_work, new a, new j */
1382   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1383   b->maxnz             = b->nz = bi[n] ;
1384   (fact)->info.factor_mallocs    = reallocs;
1385   (fact)->info.fill_ratio_given  = f;
1386   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1387   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1388   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1395 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1396 {
1397   Mat            C = B;
1398   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1399   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1400   IS             ip=b->row,iip = b->icol;
1401   PetscErrorCode ierr;
1402   const PetscInt *rip,*riip;
1403   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1404   PetscInt       *ai=a->i,*aj=a->j;
1405   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1406   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1407   PetscReal      zeropivot,rs,shiftnz;
1408   PetscReal      shiftpd;
1409   ChShift_Ctx    sctx;
1410   PetscInt       newshift;
1411   PetscTruth     perm_identity;
1412 
1413   PetscFunctionBegin;
1414 
1415   shiftnz   = info->shiftnz;
1416   shiftpd   = info->shiftpd;
1417   zeropivot = info->zeropivot;
1418 
1419   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1420   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1421 
1422   /* initialization */
1423   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1424   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1425   jl   = il + mbs;
1426   rtmp = (MatScalar*)(jl + mbs);
1427 
1428   sctx.shift_amount = 0;
1429   sctx.nshift       = 0;
1430   do {
1431     sctx.chshift = PETSC_FALSE;
1432     for (i=0; i<mbs; i++) {
1433       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1434     }
1435 
1436     for (k = 0; k<mbs; k++){
1437       bval = ba + bi[k];
1438       /* initialize k-th row by the perm[k]-th row of A */
1439       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1440       for (j = jmin; j < jmax; j++){
1441         col = riip[aj[j]];
1442         if (col >= k){ /* only take upper triangular entry */
1443           rtmp[col] = aa[j];
1444           *bval++  = 0.0; /* for in-place factorization */
1445         }
1446       }
1447       /* shift the diagonal of the matrix */
1448       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1449 
1450       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1451       dk = rtmp[k];
1452       i = jl[k]; /* first row to be added to k_th row  */
1453 
1454       while (i < k){
1455         nexti = jl[i]; /* next row to be added to k_th row */
1456 
1457         /* compute multiplier, update diag(k) and U(i,k) */
1458         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1459         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1460         dk += uikdi*ba[ili];
1461         ba[ili] = uikdi; /* -U(i,k) */
1462 
1463         /* add multiple of row i to k-th row */
1464         jmin = ili + 1; jmax = bi[i+1];
1465         if (jmin < jmax){
1466           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1467           /* update il and jl for row i */
1468           il[i] = jmin;
1469           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1470         }
1471         i = nexti;
1472       }
1473 
1474       /* shift the diagonals when zero pivot is detected */
1475       /* compute rs=sum of abs(off-diagonal) */
1476       rs   = 0.0;
1477       jmin = bi[k]+1;
1478       nz   = bi[k+1] - jmin;
1479       bcol = bj + jmin;
1480       while (nz--){
1481         rs += PetscAbsScalar(rtmp[*bcol]);
1482         bcol++;
1483       }
1484 
1485       sctx.rs = rs;
1486       sctx.pv = dk;
1487       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1488 
1489       if (newshift == 1) {
1490         if (!sctx.shift_amount) {
1491           sctx.shift_amount = 1e-5;
1492         }
1493         break;
1494       }
1495 
1496       /* copy data into U(k,:) */
1497       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1498       jmin = bi[k]+1; jmax = bi[k+1];
1499       if (jmin < jmax) {
1500         for (j=jmin; j<jmax; j++){
1501           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1502         }
1503         /* add the k-th row into il and jl */
1504         il[k] = jmin;
1505         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1506       }
1507     }
1508   } while (sctx.chshift);
1509   ierr = PetscFree(il);CHKERRQ(ierr);
1510 
1511   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1512   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1513 
1514   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1515   if (perm_identity){
1516     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1517     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1518     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1519     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1520   } else {
1521     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1522     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1523     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1524     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1525   }
1526 
1527   C->assembled    = PETSC_TRUE;
1528   C->preallocated = PETSC_TRUE;
1529   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1530   if (sctx.nshift){
1531     if (shiftnz) {
1532       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1533     } else if (shiftpd) {
1534       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1535     }
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1542 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1543 {
1544   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1545   Mat_SeqSBAIJ       *b;
1546   PetscErrorCode     ierr;
1547   PetscTruth         perm_identity,missing;
1548   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1549   const PetscInt     *rip,*riip;
1550   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1551   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1552   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1553   PetscReal          fill=info->fill,levels=info->levels;
1554   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1555   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1556   PetscBT            lnkbt;
1557   IS                 iperm;
1558 
1559   PetscFunctionBegin;
1560   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1561   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1562   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1563   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1564   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1565 
1566   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1567   ui[0] = 0;
1568 
1569   /* ICC(0) without matrix ordering: simply copies fill pattern */
1570   if (!levels && perm_identity) {
1571 
1572     for (i=0; i<am; i++) {
1573       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1574     }
1575     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1576     cols = uj;
1577     for (i=0; i<am; i++) {
1578       aj    = a->j + a->diag[i];
1579       ncols = ui[i+1] - ui[i];
1580       for (j=0; j<ncols; j++) *cols++ = *aj++;
1581     }
1582   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1583     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1584     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1585 
1586     /* initialization */
1587     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1588 
1589     /* jl: linked list for storing indices of the pivot rows
1590        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1591     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1592     il         = jl + am;
1593     uj_ptr     = (PetscInt**)(il + am);
1594     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1595     for (i=0; i<am; i++){
1596       jl[i] = am; il[i] = 0;
1597     }
1598 
1599     /* create and initialize a linked list for storing column indices of the active row k */
1600     nlnk = am + 1;
1601     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1602 
1603     /* initial FreeSpace size is fill*(ai[am]+1) */
1604     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1605     current_space = free_space;
1606     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1607     current_space_lvl = free_space_lvl;
1608 
1609     for (k=0; k<am; k++){  /* for each active row k */
1610       /* initialize lnk by the column indices of row rip[k] of A */
1611       nzk   = 0;
1612       ncols = ai[rip[k]+1] - ai[rip[k]];
1613       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1614       ncols_upper = 0;
1615       for (j=0; j<ncols; j++){
1616         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1617         if (riip[i] >= k){ /* only take upper triangular entry */
1618           ajtmp[ncols_upper] = i;
1619           ncols_upper++;
1620         }
1621       }
1622       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1623       nzk += nlnk;
1624 
1625       /* update lnk by computing fill-in for each pivot row to be merged in */
1626       prow = jl[k]; /* 1st pivot row */
1627 
1628       while (prow < k){
1629         nextprow = jl[prow];
1630 
1631         /* merge prow into k-th row */
1632         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1633         jmax = ui[prow+1];
1634         ncols = jmax-jmin;
1635         i     = jmin - ui[prow];
1636         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1637         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1638         j     = *(uj - 1);
1639         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1640         nzk += nlnk;
1641 
1642         /* update il and jl for prow */
1643         if (jmin < jmax){
1644           il[prow] = jmin;
1645           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1646         }
1647         prow = nextprow;
1648       }
1649 
1650       /* if free space is not available, make more free space */
1651       if (current_space->local_remaining<nzk) {
1652         i = am - k + 1; /* num of unfactored rows */
1653         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1654         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1655         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1656         reallocs++;
1657       }
1658 
1659       /* copy data into free_space and free_space_lvl, then initialize lnk */
1660       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1661       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1662 
1663       /* add the k-th row into il and jl */
1664       if (nzk > 1){
1665         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1666         jl[k] = jl[i]; jl[i] = k;
1667         il[k] = ui[k] + 1;
1668       }
1669       uj_ptr[k]     = current_space->array;
1670       uj_lvl_ptr[k] = current_space_lvl->array;
1671 
1672       current_space->array           += nzk;
1673       current_space->local_used      += nzk;
1674       current_space->local_remaining -= nzk;
1675 
1676       current_space_lvl->array           += nzk;
1677       current_space_lvl->local_used      += nzk;
1678       current_space_lvl->local_remaining -= nzk;
1679 
1680       ui[k+1] = ui[k] + nzk;
1681     }
1682 
1683 #if defined(PETSC_USE_INFO)
1684     if (ai[am] != 0) {
1685       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1686       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1687       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1688       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1689     } else {
1690       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1691     }
1692 #endif
1693 
1694     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1695     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1696     ierr = PetscFree(jl);CHKERRQ(ierr);
1697     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1698 
1699     /* destroy list of free space and other temporary array(s) */
1700     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1701     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1702     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1703     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1704 
1705   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1706 
1707   /* put together the new matrix in MATSEQSBAIJ format */
1708 
1709   b    = (Mat_SeqSBAIJ*)(fact)->data;
1710   b->singlemalloc = PETSC_FALSE;
1711   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1712   b->j    = uj;
1713   b->i    = ui;
1714   b->diag = 0;
1715   b->ilen = 0;
1716   b->imax = 0;
1717   b->row  = perm;
1718   b->col  = perm;
1719   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1720   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1721   b->icol = iperm;
1722   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1723   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1724   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1725   b->maxnz   = b->nz = ui[am];
1726   b->free_a  = PETSC_TRUE;
1727   b->free_ij = PETSC_TRUE;
1728 
1729   (fact)->info.factor_mallocs    = reallocs;
1730   (fact)->info.fill_ratio_given  = fill;
1731   if (ai[am] != 0) {
1732     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1733   } else {
1734     (fact)->info.fill_ratio_needed = 0.0;
1735   }
1736   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1742 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1743 {
1744   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1745   Mat_SeqSBAIJ       *b;
1746   PetscErrorCode     ierr;
1747   PetscTruth         perm_identity;
1748   PetscReal          fill = info->fill;
1749   const PetscInt     *rip,*riip;
1750   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1751   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1752   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1753   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1754   PetscBT            lnkbt;
1755   IS                 iperm;
1756 
1757   PetscFunctionBegin;
1758   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1759   /* check whether perm is the identity mapping */
1760   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1761   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1762   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1763   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1764 
1765   /* initialization */
1766   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1767   ui[0] = 0;
1768 
1769   /* jl: linked list for storing indices of the pivot rows
1770      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1771   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1772   il     = jl + am;
1773   cols   = il + am;
1774   ui_ptr = (PetscInt**)(cols + am);
1775   for (i=0; i<am; i++){
1776     jl[i] = am; il[i] = 0;
1777   }
1778 
1779   /* create and initialize a linked list for storing column indices of the active row k */
1780   nlnk = am + 1;
1781   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1782 
1783   /* initial FreeSpace size is fill*(ai[am]+1) */
1784   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1785   current_space = free_space;
1786 
1787   for (k=0; k<am; k++){  /* for each active row k */
1788     /* initialize lnk by the column indices of row rip[k] of A */
1789     nzk   = 0;
1790     ncols = ai[rip[k]+1] - ai[rip[k]];
1791     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1792     ncols_upper = 0;
1793     for (j=0; j<ncols; j++){
1794       i = riip[*(aj + ai[rip[k]] + j)];
1795       if (i >= k){ /* only take upper triangular entry */
1796         cols[ncols_upper] = i;
1797         ncols_upper++;
1798       }
1799     }
1800     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1801     nzk += nlnk;
1802 
1803     /* update lnk by computing fill-in for each pivot row to be merged in */
1804     prow = jl[k]; /* 1st pivot row */
1805 
1806     while (prow < k){
1807       nextprow = jl[prow];
1808       /* merge prow into k-th row */
1809       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1810       jmax = ui[prow+1];
1811       ncols = jmax-jmin;
1812       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1813       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1814       nzk += nlnk;
1815 
1816       /* update il and jl for prow */
1817       if (jmin < jmax){
1818         il[prow] = jmin;
1819         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1820       }
1821       prow = nextprow;
1822     }
1823 
1824     /* if free space is not available, make more free space */
1825     if (current_space->local_remaining<nzk) {
1826       i = am - k + 1; /* num of unfactored rows */
1827       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1828       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1829       reallocs++;
1830     }
1831 
1832     /* copy data into free space, then initialize lnk */
1833     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1834 
1835     /* add the k-th row into il and jl */
1836     if (nzk-1 > 0){
1837       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1838       jl[k] = jl[i]; jl[i] = k;
1839       il[k] = ui[k] + 1;
1840     }
1841     ui_ptr[k] = current_space->array;
1842     current_space->array           += nzk;
1843     current_space->local_used      += nzk;
1844     current_space->local_remaining -= nzk;
1845 
1846     ui[k+1] = ui[k] + nzk;
1847   }
1848 
1849 #if defined(PETSC_USE_INFO)
1850   if (ai[am] != 0) {
1851     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1852     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1853     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1854     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1855   } else {
1856      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1857   }
1858 #endif
1859 
1860   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1861   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1862   ierr = PetscFree(jl);CHKERRQ(ierr);
1863 
1864   /* destroy list of free space and other temporary array(s) */
1865   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1866   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1867   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1868 
1869   /* put together the new matrix in MATSEQSBAIJ format */
1870 
1871   b = (Mat_SeqSBAIJ*)(fact)->data;
1872   b->singlemalloc = PETSC_FALSE;
1873   b->free_a       = PETSC_TRUE;
1874   b->free_ij      = PETSC_TRUE;
1875   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1876   b->j    = uj;
1877   b->i    = ui;
1878   b->diag = 0;
1879   b->ilen = 0;
1880   b->imax = 0;
1881   b->row  = perm;
1882   b->col  = perm;
1883   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1884   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1885   b->icol = iperm;
1886   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1887   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1888   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1889   b->maxnz = b->nz = ui[am];
1890 
1891   (fact)->info.factor_mallocs    = reallocs;
1892   (fact)->info.fill_ratio_given  = fill;
1893   if (ai[am] != 0) {
1894     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1895   } else {
1896     (fact)->info.fill_ratio_needed = 0.0;
1897   }
1898   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt"
1904 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx)
1905 {
1906   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1907   PetscErrorCode    ierr;
1908   PetscInt          n = A->rmap->n;
1909   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
1910   PetscScalar       *x,sum;
1911   const PetscScalar *b;
1912   const MatScalar   *aa = a->a,*v;
1913   PetscInt          i,nz;
1914 
1915   PetscFunctionBegin;
1916   if (!n) PetscFunctionReturn(0);
1917 
1918   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1920 
1921   /* forward solve the lower triangular */
1922   x[0] = b[0];
1923   v    = aa;
1924   vi   = aj;
1925   for (i=1; i<n; i++) {
1926     nz  = ai[i+1] - ai[i];
1927     sum = b[i];
1928     while (nz--) sum -= *v++ * x[*vi++];
1929     x[i] = sum;
1930   }
1931 
1932   /* backward solve the upper triangular */
1933   v   = aa + adiag[n] + 1;
1934   vi  = aj + adiag[n] + 1;
1935   for (i=n-1; i>=0; i--){
1936     nz  = adiag[i] - adiag[i+1] - 1;
1937     sum = x[i];
1938     while (nz--) sum -= *v++ * x[*vi++];
1939     x[i] = sum*aa[adiag[i]];
1940     v++; vi++;
1941   }
1942 
1943   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1944   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1945   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatSolve_SeqAIJ_iludt"
1951 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx)
1952 {
1953   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1954   IS                iscol = a->col,isrow = a->row;
1955   PetscErrorCode    ierr;
1956   PetscInt          i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag;
1957   PetscInt          nz;
1958   const PetscInt    *rout,*cout,*r,*c;
1959   PetscScalar       *x,*tmp,*tmps;
1960   const PetscScalar *b;
1961   const MatScalar   *aa = a->a,*v;
1962 
1963   PetscFunctionBegin;
1964   if (!n) PetscFunctionReturn(0);
1965 
1966   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1967   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1968   tmp  = a->solve_work;
1969 
1970   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1971   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1972 
1973   /* forward solve the lower triangular */
1974   tmp[0] = b[*r++];
1975   tmps   = tmp;
1976   v      = aa;
1977   vi     = aj;
1978   for (i=1; i<n; i++) {
1979     nz  = ai[i+1] - ai[i];
1980     tmp[i] = b[*r++];
1981     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1982     v += nz; vi += nz;
1983   }
1984 
1985   /* backward solve the upper triangular */
1986   v   = aa + adiag[n] + 1;
1987   vi  = aj + adiag[n] + 1;
1988   for (i=n-1; i>=0; i--){
1989     nz  = adiag[i] - adiag[i+1] - 1;
1990     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1991     x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]];
1992     v += nz+1; vi += nz+1;
1993   }
1994 
1995   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1996   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1997   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1998   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1999   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
2005 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
2006 {
2007   Mat                B = *fact;
2008   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
2009   IS                 isicol;
2010   PetscErrorCode     ierr;
2011   const PetscInt     *r,*ic;
2012   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
2013   PetscInt           *bi,*bj,*bdiag;
2014   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au;
2015   PetscInt           nlnk,*lnk;
2016   PetscBT            lnkbt;
2017   PetscTruth         row_identity,icol_identity,both_identity;
2018   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp;
2019   const PetscInt     *ics;
2020   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
2021   PetscReal          dt=info->dt,shift=info->shiftinblocks;
2022   PetscInt           nnz_max;
2023   PetscTruth         missing;
2024 
2025   PetscFunctionBegin;
2026   /* ------- symbolic factorization, can be reused ---------*/
2027   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2028   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2029   adiag=a->diag;
2030 
2031   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
2032 
2033   /* bdiag is location of diagonal in factor */
2034   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
2035 
2036   /* allocate row pointers bi */
2037   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
2038 
2039   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
2040   dtcount = (PetscInt)info->dtcount;
2041   nnz_max  = ai[n]+2*n*dtcount+2;
2042   ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2043   ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr);
2044 
2045   /* put together the new matrix */
2046   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2047   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
2048   b    = (Mat_SeqAIJ*)(B)->data;
2049   b->free_a       = PETSC_TRUE;
2050   b->free_ij      = PETSC_TRUE;
2051   b->singlemalloc = PETSC_FALSE;
2052   b->a          = ba;
2053   b->j          = bj;
2054   b->i          = bi;
2055   b->diag       = bdiag;
2056   b->ilen       = 0;
2057   b->imax       = 0;
2058   b->row        = isrow;
2059   b->col        = iscol;
2060   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2061   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2062   b->icol       = isicol;
2063   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2064 
2065   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2066   b->maxnz = nnz_max;
2067 
2068   (B)->factor                = MAT_FACTOR_ILUDT;
2069   (B)->info.factor_mallocs   = 0;
2070   (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
2071   CHKMEMQ;
2072   /* ------- end of symbolic factorization ---------*/
2073 
2074   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2075   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
2076   ics  = ic;
2077 
2078   /* linked list for storing column indices of the active row */
2079   nlnk = n + 1;
2080   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2081 
2082   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
2083   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
2084   jtmp = im + n;
2085   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
2086   ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2087   vtmp = rtmp + n;
2088 
2089   bi[0]    = 0;
2090   bdiag[0] = nnz_max-1; /* location of diagonal in factor B */
2091   for (i=0; i<n; i++) {
2092     /* printf("row %d ...\n",i); */
2093     /* copy initial fill into linked list */
2094     nzi = 0; /* nonzeros for active row i */
2095     nzi = ai[r[i]+1] - ai[r[i]];
2096     if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
2097     nzi_al = adiag[r[i]] - ai[r[i]];
2098     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
2099     ajtmp = aj + ai[r[i]];
2100     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2101 
2102     /* load in initial (unfactored row) */
2103     ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2104     aatmp = a->a + ai[r[i]];
2105     for (j=0; j<nzi; j++) {
2106       rtmp[ics[*ajtmp++]] = *aatmp++;
2107     }
2108 
2109     /* add pivot rows into linked list */
2110     row = lnk[n];
2111     while (row < i) {
2112       nzi_bl = bi[row+1] - bi[row] + 1;
2113       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
2114       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
2115       nzi  += nlnk;
2116       row   = lnk[row];
2117     }
2118 
2119     /* copy data from lnk into jtmp, then initialize lnk */
2120     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
2121 
2122     /* numerical factorization */
2123     bjtmp = jtmp;
2124     row   = *bjtmp++; /* 1st pivot row */
2125     while  (row < i) {
2126       //printf("  prow %d\n",row);
2127       pc = rtmp + row;
2128       /* apply tolerance dropping rule */
2129       if (PetscAbsScalar(*pc) > dt){
2130         pv         = ba + bdiag[row]; /* diag of the pivot row */
2131         multiplier = *pc / *pv;
2132         *pc        = multiplier;
2133         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
2134         pv         = ba + bdiag[row+1] + 1;
2135         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
2136         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
2137         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
2138         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
2139       }
2140       row = *bjtmp++;
2141     }
2142 
2143     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
2144     /* printf("row %d:\n",i); */
2145     nzi_bl = 0; j = 0;
2146     while (jtmp[j] < i){ /* Note: jtmp is sorted */
2147       vtmp[j] = rtmp[jtmp[j]];
2148       nzi_bl++; j++;
2149     }
2150     nzi_bu = nzi - nzi_bl -1;
2151     while (j < nzi){
2152       vtmp[j] = rtmp[jtmp[j]];
2153       j++;
2154     }
2155 
2156     bjtmp = bj + bi[i];
2157     batmp = ba + bi[i];
2158     /* apply level dropping rule to L part */
2159     ncut = nzi_al + dtcount;
2160     if (ncut < nzi_bl){
2161       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
2162       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
2163     } else {
2164       ncut = nzi_bl;
2165     }
2166     for (j=0; j<ncut; j++){
2167       bjtmp[j] = jtmp[j];
2168       batmp[j] = vtmp[j];
2169       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
2170     }
2171     bi[i+1] = bi[i] + ncut;
2172     nzi = ncut + 1;
2173 
2174     /* apply level dropping rule to U part */
2175     ncut = nzi_au + dtcount;
2176     if (ncut < nzi_bu){
2177       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
2178       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
2179     } else {
2180       ncut = nzi_bu;
2181     }
2182     nzi += ncut;
2183 
2184     /* mark bdiagonal */
2185     bdiag[i+1]   = bdiag[i] - (ncut + 1);
2186     bjtmp = bj + bdiag[i];
2187     batmp = ba + bdiag[i];
2188     *bjtmp = i;
2189     *batmp = rtmp[i];
2190     /* printf(" diag (%d,%g),",*bjtmp,*batmp); */
2191 
2192 
2193     bjtmp = bj + bdiag[i+1]+1;
2194     batmp = ba + bdiag[i+1]+1;
2195     for (k=0; k<ncut; k++){
2196       bjtmp[k] = jtmp[nzi_bl+1+k];
2197       batmp[k] = vtmp[nzi_bl+1+k];
2198       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
2199     }
2200 
2201     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
2202     /*
2203     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
2204     printf(" ----------------------------\n");
2205     */
2206   } /* for (i=0; i<n; i++) */
2207   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
2208   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
2209 
2210   /* invert diagonal entries for simplier triangular solves */
2211   /* printf(" diag:\n"); */
2212   for (i=0; i<n; i++) {
2213     if (ba[bdiag[i]] == 0.0) {
2214       /* SHIFT is applied to SINGLE diagonal entry; does this make any sense? */
2215       ba[bdiag[i]] = 1.0/shift;
2216     } else {
2217       ba[bdiag[i]] = 1.0/ba[bdiag[i]];
2218     }
2219   }
2220 
2221   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2222   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2223 
2224   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2225   ierr = PetscFree(im);CHKERRQ(ierr);
2226   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2227 
2228   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
2229   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
2230 
2231   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2232   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
2233   both_identity = (PetscTruth) (row_identity && icol_identity);
2234   if (row_identity && icol_identity) {
2235     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt;
2236   } else {
2237     B->ops->solve = MatSolve_SeqAIJ_iludt;
2238   }
2239 
2240   B->ops->solveadd          = 0;
2241   B->ops->solvetranspose    = 0;
2242   B->ops->solvetransposeadd = 0;
2243   B->ops->matsolve          = 0;
2244   B->assembled              = PETSC_TRUE;
2245   B->preallocated           = PETSC_TRUE;
2246   PetscFunctionReturn(0);
2247 }
2248 
2249