xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 04b7c42f72c03fd77d047c8ce4a771bfc583ae8d)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ_saad"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ_saad(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af,dt,dtcount,dtcol,fill;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      dt      = .005; else dt = info->dt;
74   if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);  else dtcount = info->dtcount;
75   if (info->dtcol == PETSC_DEFAULT)   dtcol   = .01; else dtcol = info->dtcol;
76   if (info->fill == PETSC_DEFAULT)    fill    = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill;
77   lfill   = (PetscInt)(dtcount/2.0);
78   jmax    = (PetscInt)(fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ;
252   else         (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
253   (*fact)->ops->solveadd           = MatSolveAdd_SeqAIJ;
254   (*fact)->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
255   (*fact)->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
256 
257   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
267 {
268   PetscFunctionBegin;
269   *flg = PETSC_TRUE;
270   PetscFunctionReturn(0);
271 }
272 EXTERN_C_END
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
278 {
279   PetscInt           n = A->rmap->n;
280   PetscErrorCode     ierr;
281 
282   PetscFunctionBegin;
283   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
284   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
285   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
286     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
287     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
288     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
289     (*B)->ops->iludtfactor       = MatILUDTFactor_SeqAIJ;
290   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
291     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
292     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
293     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
294     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
295   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
296   (*B)->factor = ftype;
297   PetscFunctionReturn(0);
298 }
299 EXTERN_C_END
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
303 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
304 {
305   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
306   IS                 isicol;
307   PetscErrorCode     ierr;
308   const PetscInt     *r,*ic;
309   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
310   PetscInt           *bi,*bj,*ajtmp;
311   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
312   PetscReal          f;
313   PetscInt           nlnk,*lnk,k,**bi_ptr;
314   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
315   PetscBT            lnkbt;
316 
317   PetscFunctionBegin;
318   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
319   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
320   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
321   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
322 
323   /* get new row pointers */
324   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
325   bi[0] = 0;
326 
327   /* bdiag is location of diagonal in factor */
328   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
329   bdiag[0] = 0;
330 
331   /* linked list for storing column indices of the active row */
332   nlnk = n + 1;
333   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
334 
335   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
336 
337   /* initial FreeSpace size is f*(ai[n]+1) */
338   f = info->fill;
339   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
340   current_space = free_space;
341 
342   for (i=0; i<n; i++) {
343     /* copy previous fill into linked list */
344     nzi = 0;
345     nnz = ai[r[i]+1] - ai[r[i]];
346     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
347     ajtmp = aj + ai[r[i]];
348     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
349     nzi += nlnk;
350 
351     /* add pivot rows into linked list */
352     row = lnk[n];
353     while (row < i) {
354       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
355       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
356       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
357       nzi += nlnk;
358       row  = lnk[row];
359     }
360     bi[i+1] = bi[i] + nzi;
361     im[i]   = nzi;
362 
363     /* mark bdiag */
364     nzbd = 0;
365     nnz  = nzi;
366     k    = lnk[n];
367     while (nnz-- && k < i){
368       nzbd++;
369       k = lnk[k];
370     }
371     bdiag[i] = bi[i] + nzbd;
372 
373     /* if free space is not available, make more free space */
374     if (current_space->local_remaining<nzi) {
375       nnz = (n - i)*nzi; /* estimated and max additional space needed */
376       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
377       reallocs++;
378     }
379 
380     /* copy data into free space, then initialize lnk */
381     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
382     bi_ptr[i] = current_space->array;
383     current_space->array           += nzi;
384     current_space->local_used      += nzi;
385     current_space->local_remaining -= nzi;
386   }
387 #if defined(PETSC_USE_INFO)
388   if (ai[n] != 0) {
389     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
390     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
391     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
392     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
393     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
394   } else {
395     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
396   }
397 #endif
398 
399   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
400   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
401 
402   /* destroy list of free space and other temporary array(s) */
403   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
404   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
405   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
406   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
407 
408   /* put together the new matrix */
409   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
410   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
411   b    = (Mat_SeqAIJ*)(B)->data;
412   b->free_a       = PETSC_TRUE;
413   b->free_ij      = PETSC_TRUE;
414   b->singlemalloc = PETSC_FALSE;
415   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
416   b->j          = bj;
417   b->i          = bi;
418   b->diag       = bdiag;
419   b->ilen       = 0;
420   b->imax       = 0;
421   b->row        = isrow;
422   b->col        = iscol;
423   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
424   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
425   b->icol       = isicol;
426   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
427 
428   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
429   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
430   b->maxnz = b->nz = bi[n] ;
431 
432   (B)->factor                = MAT_FACTOR_LU;
433   (B)->info.factor_mallocs   = reallocs;
434   (B)->info.fill_ratio_given = f;
435 
436   if (ai[n]) {
437     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
438   } else {
439     (B)->info.fill_ratio_needed = 0.0;
440   }
441   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
442   (B)->ops->solve            = MatSolve_SeqAIJ;
443   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
444   /* switch to inodes if appropriate */
445   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 /*
450     Trouble in factorization, should we dump the original matrix?
451 */
452 #undef __FUNCT__
453 #define __FUNCT__ "MatFactorDumpMatrix"
454 PetscErrorCode MatFactorDumpMatrix(Mat A)
455 {
456   PetscErrorCode ierr;
457   PetscTruth     flg = PETSC_FALSE;
458 
459   PetscFunctionBegin;
460   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
461   if (flg) {
462     PetscViewer viewer;
463     char        filename[PETSC_MAX_PATH_LEN];
464 
465     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
466     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
467     ierr = MatView(A,viewer);CHKERRQ(ierr);
468     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
469   }
470   PetscFunctionReturn(0);
471 }
472 
473 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
474 
475 /* ----------------------------------------------------------- */
476 #undef __FUNCT__
477 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
478 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
479 {
480   Mat            C=B;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
482   IS             isrow = b->row,isicol = b->icol;
483   PetscErrorCode ierr;
484   const PetscInt  *r,*ic,*ics;
485   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
486   PetscInt       *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj;
487   MatScalar      *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a;
488   PetscReal      rs = 0.0;
489   LUShift_Ctx    sctx;
490   PetscInt       newshift,*ddiag;
491 
492   PetscFunctionBegin;
493   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
494   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
495   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
496   ics  = ic;
497 
498   sctx.shift_top      = 0;
499   sctx.nshift_max     = 0;
500   sctx.shift_lo       = 0;
501   sctx.shift_hi       = 0;
502   sctx.shift_fraction = 0;
503 
504   /* if both shift schemes are chosen by user, only use info->shiftpd */
505   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
506     ddiag          = a->diag;
507     sctx.shift_top = info->zeropivot;
508     for (i=0; i<n; i++) {
509       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
510       d  = (aa)[ddiag[i]];
511       rs = -PetscAbsScalar(d) - PetscRealPart(d);
512       v  = aa+ai[i];
513       nz = ai[i+1] - ai[i];
514       for (j=0; j<nz; j++)
515 	rs += PetscAbsScalar(v[j]);
516       if (rs>sctx.shift_top) sctx.shift_top = rs;
517     }
518     sctx.shift_top   *= 1.1;
519     sctx.nshift_max   = 5;
520     sctx.shift_lo     = 0.;
521     sctx.shift_hi     = 1.;
522   }
523 
524   sctx.shift_amount = 0.0;
525   sctx.nshift       = 0;
526   do {
527     sctx.lushift = PETSC_FALSE;
528     for (i=0; i<n; i++){
529       nz    = bi[i+1] - bi[i];
530       bjtmp = bj + bi[i];
531       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
532 
533       /* load in initial (unfactored row) */
534       nz    = ai[r[i]+1] - ai[r[i]];
535       ajtmp = aj + ai[r[i]];
536       v     = aa + ai[r[i]];
537       for (j=0; j<nz; j++) {
538         rtmp[ics[ajtmp[j]]] = v[j];
539       }
540       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
541 
542       row = *bjtmp++;
543       while  (row < i) {
544         pc = rtmp + row;
545         if (*pc != 0.0) {
546           pv         = b->a + diag_offset[row];
547           pj         = b->j + diag_offset[row] + 1;
548           multiplier = *pc / *pv++;
549           *pc        = multiplier;
550           nz         = bi[row+1] - diag_offset[row] - 1;
551           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
552           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
553         }
554         row = *bjtmp++;
555       }
556       /* finished row so stick it into b->a */
557       pv   = b->a + bi[i] ;
558       pj   = b->j + bi[i] ;
559       nz   = bi[i+1] - bi[i];
560       diag = diag_offset[i] - bi[i];
561       rs   = -PetscAbsScalar(pv[diag]);
562       for (j=0; j<nz; j++) {
563         pv[j] = rtmp[pj[j]];
564         rs   += PetscAbsScalar(pv[j]);
565       }
566 
567       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
568       sctx.rs  = rs;
569       sctx.pv  = pv[diag];
570       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
571       if (newshift == 1) break;
572     }
573 
574     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
575       /*
576        * if no shift in this attempt & shifting & started shifting & can refine,
577        * then try lower shift
578        */
579       sctx.shift_hi       = sctx.shift_fraction;
580       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
581       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
582       sctx.lushift        = PETSC_TRUE;
583       sctx.nshift++;
584     }
585   } while (sctx.lushift);
586 
587   /* invert diagonal entries for simplier triangular solves */
588   for (i=0; i<n; i++) {
589     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
590   }
591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
594   if (b->inode.use) {
595     C->ops->solve   = MatSolve_Inode;
596   } else {
597     PetscTruth row_identity, col_identity;
598     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
599     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
600     if (row_identity && col_identity) {
601       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
602     } else {
603       C->ops->solve   = MatSolve_SeqAIJ;
604     }
605   }
606   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
607   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
608   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
609   C->ops->matsolve           = MatMatSolve_SeqAIJ;
610   C->assembled    = PETSC_TRUE;
611   C->preallocated = PETSC_TRUE;
612   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
613   if (sctx.nshift){
614      if (info->shiftpd) {
615       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
616     } else if (info->shiftnz) {
617       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624    This routine implements inplace ILU(0) with row or/and column permutations.
625    Input:
626      A - original matrix
627    Output;
628      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
629          a->j (col index) is permuted by the inverse of colperm, then sorted
630          a->a reordered accordingly with a->j
631          a->diag (ptr to diagonal elements) is updated.
632 */
633 #undef __FUNCT__
634 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
635 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
636 {
637   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
638   IS             isrow = a->row,isicol = a->icol;
639   PetscErrorCode ierr;
640   const PetscInt *r,*ic,*ics;
641   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
642   PetscInt       *ajtmp,nz,row;
643   PetscInt       *diag = a->diag,nbdiag,*pj;
644   PetscScalar    *rtmp,*pc,multiplier,d;
645   MatScalar      *v,*pv;
646   PetscReal      rs;
647   LUShift_Ctx    sctx;
648   PetscInt       newshift;
649 
650   PetscFunctionBegin;
651   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
652   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
653   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
654   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
655   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
656   ics = ic;
657 
658   sctx.shift_top      = 0;
659   sctx.nshift_max     = 0;
660   sctx.shift_lo       = 0;
661   sctx.shift_hi       = 0;
662   sctx.shift_fraction = 0;
663 
664   /* if both shift schemes are chosen by user, only use info->shiftpd */
665   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
666     sctx.shift_top = 0;
667     for (i=0; i<n; i++) {
668       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
669       d  = (a->a)[diag[i]];
670       rs = -PetscAbsScalar(d) - PetscRealPart(d);
671       v  = a->a+ai[i];
672       nz = ai[i+1] - ai[i];
673       for (j=0; j<nz; j++)
674 	rs += PetscAbsScalar(v[j]);
675       if (rs>sctx.shift_top) sctx.shift_top = rs;
676     }
677     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
678     sctx.shift_top    *= 1.1;
679     sctx.nshift_max   = 5;
680     sctx.shift_lo     = 0.;
681     sctx.shift_hi     = 1.;
682   }
683 
684   sctx.shift_amount = 0;
685   sctx.nshift       = 0;
686   do {
687     sctx.lushift = PETSC_FALSE;
688     for (i=0; i<n; i++){
689       /* load in initial unfactored row */
690       nz    = ai[r[i]+1] - ai[r[i]];
691       ajtmp = aj + ai[r[i]];
692       v     = a->a + ai[r[i]];
693       /* sort permuted ajtmp and values v accordingly */
694       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
695       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
696 
697       diag[r[i]] = ai[r[i]];
698       for (j=0; j<nz; j++) {
699         rtmp[ajtmp[j]] = v[j];
700         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
701       }
702       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
703 
704       row = *ajtmp++;
705       while  (row < i) {
706         pc = rtmp + row;
707         if (*pc != 0.0) {
708           pv         = a->a + diag[r[row]];
709           pj         = aj + diag[r[row]] + 1;
710 
711           multiplier = *pc / *pv++;
712           *pc        = multiplier;
713           nz         = ai[r[row]+1] - diag[r[row]] - 1;
714           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
715           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
716         }
717         row = *ajtmp++;
718       }
719       /* finished row so overwrite it onto a->a */
720       pv   = a->a + ai[r[i]] ;
721       pj   = aj + ai[r[i]] ;
722       nz   = ai[r[i]+1] - ai[r[i]];
723       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
724 
725       rs   = 0.0;
726       for (j=0; j<nz; j++) {
727         pv[j] = rtmp[pj[j]];
728         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
729       }
730 
731       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
732       sctx.rs  = rs;
733       sctx.pv  = pv[nbdiag];
734       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
735       if (newshift == 1) break;
736     }
737 
738     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
739       /*
740        * if no shift in this attempt & shifting & started shifting & can refine,
741        * then try lower shift
742        */
743       sctx.shift_hi        = sctx.shift_fraction;
744       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
745       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
746       sctx.lushift         = PETSC_TRUE;
747       sctx.nshift++;
748     }
749   } while (sctx.lushift);
750 
751   /* invert diagonal entries for simplier triangular solves */
752   for (i=0; i<n; i++) {
753     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
754   }
755 
756   ierr = PetscFree(rtmp);CHKERRQ(ierr);
757   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
758   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
759   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
760   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
761   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
762   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
763   A->assembled = PETSC_TRUE;
764   A->preallocated = PETSC_TRUE;
765   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
766   if (sctx.nshift){
767     if (info->shiftpd) {
768       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
769     } else if (info->shiftnz) {
770       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
771     }
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 /* ----------------------------------------------------------- */
777 #undef __FUNCT__
778 #define __FUNCT__ "MatLUFactor_SeqAIJ"
779 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
780 {
781   PetscErrorCode ierr;
782   Mat            C;
783 
784   PetscFunctionBegin;
785   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
786   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
787   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
788   A->ops->solve            = C->ops->solve;
789   A->ops->solvetranspose   = C->ops->solvetranspose;
790   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
791   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 /* ----------------------------------------------------------- */
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatSolve_SeqAIJ"
798 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
799 {
800   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
801   IS                iscol = a->col,isrow = a->row;
802   PetscErrorCode    ierr;
803   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
804   PetscInt          nz;
805   const PetscInt    *rout,*cout,*r,*c;
806   PetscScalar       *x,*tmp,*tmps,sum;
807   const PetscScalar *b;
808   const MatScalar   *aa = a->a,*v;
809 
810   PetscFunctionBegin;
811   if (!n) PetscFunctionReturn(0);
812 
813   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
814   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
815   tmp  = a->solve_work;
816 
817   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
818   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
819 
820   /* forward solve the lower triangular */
821   tmp[0] = b[*r++];
822   tmps   = tmp;
823   for (i=1; i<n; i++) {
824     v   = aa + ai[i] ;
825     vi  = aj + ai[i] ;
826     nz  = a->diag[i] - ai[i];
827     sum = b[*r++];
828     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
829     tmp[i] = sum;
830   }
831 
832   /* backward solve the upper triangular */
833   for (i=n-1; i>=0; i--){
834     v   = aa + a->diag[i] + 1;
835     vi  = aj + a->diag[i] + 1;
836     nz  = ai[i+1] - a->diag[i] - 1;
837     sum = tmp[i];
838     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
839     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
840   }
841 
842   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
843   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
844   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
845   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
846   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatMatSolve_SeqAIJ"
852 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
853 {
854   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
855   IS              iscol = a->col,isrow = a->row;
856   PetscErrorCode  ierr;
857   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
858   PetscInt        nz,neq;
859   const PetscInt  *rout,*cout,*r,*c;
860   PetscScalar     *x,*b,*tmp,*tmps,sum;
861   const MatScalar *aa = a->a,*v;
862   PetscTruth      bisdense,xisdense;
863 
864   PetscFunctionBegin;
865   if (!n) PetscFunctionReturn(0);
866 
867   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
868   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
869   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
870   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
871 
872   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
873   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
874 
875   tmp  = a->solve_work;
876   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
877   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
878 
879   for (neq=0; neq<B->cmap->n; neq++){
880     /* forward solve the lower triangular */
881     tmp[0] = b[r[0]];
882     tmps   = tmp;
883     for (i=1; i<n; i++) {
884       v   = aa + ai[i] ;
885       vi  = aj + ai[i] ;
886       nz  = a->diag[i] - ai[i];
887       sum = b[r[i]];
888       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
889       tmp[i] = sum;
890     }
891     /* backward solve the upper triangular */
892     for (i=n-1; i>=0; i--){
893       v   = aa + a->diag[i] + 1;
894       vi  = aj + a->diag[i] + 1;
895       nz  = ai[i+1] - a->diag[i] - 1;
896       sum = tmp[i];
897       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
898       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
899     }
900 
901     b += n;
902     x += n;
903   }
904   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
905   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
906   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
907   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
908   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
914 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
915 {
916   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
917   IS              iscol = a->col,isrow = a->row;
918   PetscErrorCode  ierr;
919   const PetscInt  *r,*c,*rout,*cout;
920   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
921   PetscInt        nz,row;
922   PetscScalar     *x,*b,*tmp,*tmps,sum;
923   const MatScalar *aa = a->a,*v;
924 
925   PetscFunctionBegin;
926   if (!n) PetscFunctionReturn(0);
927 
928   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
929   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
930   tmp  = a->solve_work;
931 
932   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
933   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
934 
935   /* forward solve the lower triangular */
936   tmp[0] = b[*r++];
937   tmps   = tmp;
938   for (row=1; row<n; row++) {
939     i   = rout[row]; /* permuted row */
940     v   = aa + ai[i] ;
941     vi  = aj + ai[i] ;
942     nz  = a->diag[i] - ai[i];
943     sum = b[*r++];
944     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
945     tmp[row] = sum;
946   }
947 
948   /* backward solve the upper triangular */
949   for (row=n-1; row>=0; row--){
950     i   = rout[row]; /* permuted row */
951     v   = aa + a->diag[i] + 1;
952     vi  = aj + a->diag[i] + 1;
953     nz  = ai[i+1] - a->diag[i] - 1;
954     sum = tmp[row];
955     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
956     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
957   }
958 
959   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
960   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
961   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
962   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 /* ----------------------------------------------------------- */
968 #undef __FUNCT__
969 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
970 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
971 {
972   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscInt          n = A->rmap->n;
975   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
976   PetscScalar       *x;
977   const PetscScalar *b;
978   const MatScalar   *aa = a->a;
979 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
980   PetscInt          adiag_i,i,nz,ai_i;
981   const MatScalar   *v;
982   PetscScalar       sum;
983 #endif
984 
985   PetscFunctionBegin;
986   if (!n) PetscFunctionReturn(0);
987 
988   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
989   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
990 
991 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
992   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
993 #else
994   /* forward solve the lower triangular */
995   x[0] = b[0];
996   for (i=1; i<n; i++) {
997     ai_i = ai[i];
998     v    = aa + ai_i;
999     vi   = aj + ai_i;
1000     nz   = adiag[i] - ai_i;
1001     sum  = b[i];
1002     while (nz--) sum -= *v++ * x[*vi++];
1003     x[i] = sum;
1004   }
1005 
1006   /* backward solve the upper triangular */
1007   for (i=n-1; i>=0; i--){
1008     adiag_i = adiag[i];
1009     v       = aa + adiag_i + 1;
1010     vi      = aj + adiag_i + 1;
1011     nz      = ai[i+1] - adiag_i - 1;
1012     sum     = x[i];
1013     while (nz--) sum -= *v++ * x[*vi++];
1014     x[i]    = sum*aa[adiag_i];
1015   }
1016 #endif
1017   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1018   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1019   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1025 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1026 {
1027   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1028   IS              iscol = a->col,isrow = a->row;
1029   PetscErrorCode  ierr;
1030   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1031   PetscInt        nz;
1032   const PetscInt  *rout,*cout,*r,*c;
1033   PetscScalar     *x,*b,*tmp,sum;
1034   const MatScalar *aa = a->a,*v;
1035 
1036   PetscFunctionBegin;
1037   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1038 
1039   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1040   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1041   tmp  = a->solve_work;
1042 
1043   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1044   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1045 
1046   /* forward solve the lower triangular */
1047   tmp[0] = b[*r++];
1048   for (i=1; i<n; i++) {
1049     v   = aa + ai[i] ;
1050     vi  = aj + ai[i] ;
1051     nz  = a->diag[i] - ai[i];
1052     sum = b[*r++];
1053     while (nz--) sum -= *v++ * tmp[*vi++ ];
1054     tmp[i] = sum;
1055   }
1056 
1057   /* backward solve the upper triangular */
1058   for (i=n-1; i>=0; i--){
1059     v   = aa + a->diag[i] + 1;
1060     vi  = aj + a->diag[i] + 1;
1061     nz  = ai[i+1] - a->diag[i] - 1;
1062     sum = tmp[i];
1063     while (nz--) sum -= *v++ * tmp[*vi++ ];
1064     tmp[i] = sum*aa[a->diag[i]];
1065     x[*c--] += tmp[i];
1066   }
1067 
1068   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1069   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1070   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1071   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1072   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 /* -------------------------------------------------------------------*/
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1079 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1080 {
1081   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1082   IS              iscol = a->col,isrow = a->row;
1083   PetscErrorCode  ierr;
1084   const PetscInt  *rout,*cout,*r,*c;
1085   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1086   PetscInt        nz,*diag = a->diag;
1087   PetscScalar     *x,*b,*tmp,s1;
1088   const MatScalar *aa = a->a,*v;
1089 
1090   PetscFunctionBegin;
1091   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1092   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1093   tmp  = a->solve_work;
1094 
1095   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1096   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1097 
1098   /* copy the b into temp work space according to permutation */
1099   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1100 
1101   /* forward solve the U^T */
1102   for (i=0; i<n; i++) {
1103     v   = aa + diag[i] ;
1104     vi  = aj + diag[i] + 1;
1105     nz  = ai[i+1] - diag[i] - 1;
1106     s1  = tmp[i];
1107     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1108     while (nz--) {
1109       tmp[*vi++ ] -= (*v++)*s1;
1110     }
1111     tmp[i] = s1;
1112   }
1113 
1114   /* backward solve the L^T */
1115   for (i=n-1; i>=0; i--){
1116     v   = aa + diag[i] - 1 ;
1117     vi  = aj + diag[i] - 1 ;
1118     nz  = diag[i] - ai[i];
1119     s1  = tmp[i];
1120     while (nz--) {
1121       tmp[*vi-- ] -= (*v--)*s1;
1122     }
1123   }
1124 
1125   /* copy tmp into x according to permutation */
1126   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1127 
1128   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1129   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1130   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1132 
1133   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1139 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1140 {
1141   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1142   IS              iscol = a->col,isrow = a->row;
1143   PetscErrorCode  ierr;
1144   const PetscInt  *r,*c,*rout,*cout;
1145   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1146   PetscInt        nz,*diag = a->diag;
1147   PetscScalar     *x,*b,*tmp;
1148   const MatScalar *aa = a->a,*v;
1149 
1150   PetscFunctionBegin;
1151   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1152 
1153   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1155   tmp = a->solve_work;
1156 
1157   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1158   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1159 
1160   /* copy the b into temp work space according to permutation */
1161   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1162 
1163   /* forward solve the U^T */
1164   for (i=0; i<n; i++) {
1165     v   = aa + diag[i] ;
1166     vi  = aj + diag[i] + 1;
1167     nz  = ai[i+1] - diag[i] - 1;
1168     tmp[i] *= *v++;
1169     while (nz--) {
1170       tmp[*vi++ ] -= (*v++)*tmp[i];
1171     }
1172   }
1173 
1174   /* backward solve the L^T */
1175   for (i=n-1; i>=0; i--){
1176     v   = aa + diag[i] - 1 ;
1177     vi  = aj + diag[i] - 1 ;
1178     nz  = diag[i] - ai[i];
1179     while (nz--) {
1180       tmp[*vi-- ] -= (*v--)*tmp[i];
1181     }
1182   }
1183 
1184   /* copy tmp into x according to permutation */
1185   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1186 
1187   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1188   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1189   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1190   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1191 
1192   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 /* ----------------------------------------------------------------*/
1196 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1197 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1201 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1202 {
1203   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1204   IS                 isicol;
1205   PetscErrorCode     ierr;
1206   const PetscInt     *r,*ic;
1207   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1208   PetscInt           *bi,*cols,nnz,*cols_lvl;
1209   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1210   PetscInt           i,levels,diagonal_fill;
1211   PetscTruth         col_identity,row_identity;
1212   PetscReal          f;
1213   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1214   PetscBT            lnkbt;
1215   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1216   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1217   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1218   PetscTruth         missing;
1219 
1220   PetscFunctionBegin;
1221   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1222   f             = info->fill;
1223   levels        = (PetscInt)info->levels;
1224   diagonal_fill = (PetscInt)info->diagonal_fill;
1225   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1226 
1227   /* special case that simply copies fill pattern */
1228   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1229   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1230   if (!levels && row_identity && col_identity) {
1231     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1232     fact->factor = MAT_FACTOR_ILU;
1233     (fact)->info.factor_mallocs    = 0;
1234     (fact)->info.fill_ratio_given  = info->fill;
1235     (fact)->info.fill_ratio_needed = 1.0;
1236     b               = (Mat_SeqAIJ*)(fact)->data;
1237     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1238     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1239     b->row              = isrow;
1240     b->col              = iscol;
1241     b->icol             = isicol;
1242     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1243     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1244     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1245     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1246     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1247     PetscFunctionReturn(0);
1248   }
1249 
1250   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1251   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1252 
1253   /* get new row pointers */
1254   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1255   bi[0] = 0;
1256   /* bdiag is location of diagonal in factor */
1257   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1258   bdiag[0]  = 0;
1259 
1260   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1261   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1262 
1263   /* create a linked list for storing column indices of the active row */
1264   nlnk = n + 1;
1265   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1266 
1267   /* initial FreeSpace size is f*(ai[n]+1) */
1268   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1269   current_space = free_space;
1270   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1271   current_space_lvl = free_space_lvl;
1272 
1273   for (i=0; i<n; i++) {
1274     nzi = 0;
1275     /* copy current row into linked list */
1276     nnz  = ai[r[i]+1] - ai[r[i]];
1277     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1278     cols = aj + ai[r[i]];
1279     lnk[i] = -1; /* marker to indicate if diagonal exists */
1280     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1281     nzi += nlnk;
1282 
1283     /* make sure diagonal entry is included */
1284     if (diagonal_fill && lnk[i] == -1) {
1285       fm = n;
1286       while (lnk[fm] < i) fm = lnk[fm];
1287       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1288       lnk[fm]    = i;
1289       lnk_lvl[i] = 0;
1290       nzi++; dcount++;
1291     }
1292 
1293     /* add pivot rows into the active row */
1294     nzbd = 0;
1295     prow = lnk[n];
1296     while (prow < i) {
1297       nnz      = bdiag[prow];
1298       cols     = bj_ptr[prow] + nnz + 1;
1299       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1300       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1301       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1302       nzi += nlnk;
1303       prow = lnk[prow];
1304       nzbd++;
1305     }
1306     bdiag[i] = nzbd;
1307     bi[i+1]  = bi[i] + nzi;
1308 
1309     /* if free space is not available, make more free space */
1310     if (current_space->local_remaining<nzi) {
1311       nnz = nzi*(n - i); /* estimated and max additional space needed */
1312       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1313       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1314       reallocs++;
1315     }
1316 
1317     /* copy data into free_space and free_space_lvl, then initialize lnk */
1318     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1319     bj_ptr[i]    = current_space->array;
1320     bjlvl_ptr[i] = current_space_lvl->array;
1321 
1322     /* make sure the active row i has diagonal entry */
1323     if (*(bj_ptr[i]+bdiag[i]) != i) {
1324       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1325     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1326     }
1327 
1328     current_space->array           += nzi;
1329     current_space->local_used      += nzi;
1330     current_space->local_remaining -= nzi;
1331     current_space_lvl->array           += nzi;
1332     current_space_lvl->local_used      += nzi;
1333     current_space_lvl->local_remaining -= nzi;
1334   }
1335 
1336   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1337   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1338 
1339   /* destroy list of free space and other temporary arrays */
1340   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1341   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1342   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1343   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1344   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1345 
1346 #if defined(PETSC_USE_INFO)
1347   {
1348     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1349     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1350     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1351     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1352     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1353     if (diagonal_fill) {
1354       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1355     }
1356   }
1357 #endif
1358 
1359   /* put together the new matrix */
1360   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1361   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1362   b = (Mat_SeqAIJ*)(fact)->data;
1363   b->free_a       = PETSC_TRUE;
1364   b->free_ij      = PETSC_TRUE;
1365   b->singlemalloc = PETSC_FALSE;
1366   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1367   b->j          = bj;
1368   b->i          = bi;
1369   for (i=0; i<n; i++) bdiag[i] += bi[i];
1370   b->diag       = bdiag;
1371   b->ilen       = 0;
1372   b->imax       = 0;
1373   b->row        = isrow;
1374   b->col        = iscol;
1375   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1376   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1377   b->icol       = isicol;
1378   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1379   /* In b structure:  Free imax, ilen, old a, old j.
1380      Allocate bdiag, solve_work, new a, new j */
1381   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1382   b->maxnz             = b->nz = bi[n] ;
1383   (fact)->info.factor_mallocs    = reallocs;
1384   (fact)->info.fill_ratio_given  = f;
1385   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1386   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1387   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1394 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1395 {
1396   Mat            C = B;
1397   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1398   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1399   IS             ip=b->row,iip = b->icol;
1400   PetscErrorCode ierr;
1401   const PetscInt *rip,*riip;
1402   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1403   PetscInt       *ai=a->i,*aj=a->j;
1404   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1405   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1406   PetscReal      zeropivot,rs,shiftnz;
1407   PetscReal      shiftpd;
1408   ChShift_Ctx    sctx;
1409   PetscInt       newshift;
1410   PetscTruth     perm_identity;
1411 
1412   PetscFunctionBegin;
1413 
1414   shiftnz   = info->shiftnz;
1415   shiftpd   = info->shiftpd;
1416   zeropivot = info->zeropivot;
1417 
1418   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1419   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1420 
1421   /* initialization */
1422   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1423   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1424   jl   = il + mbs;
1425   rtmp = (MatScalar*)(jl + mbs);
1426 
1427   sctx.shift_amount = 0;
1428   sctx.nshift       = 0;
1429   do {
1430     sctx.chshift = PETSC_FALSE;
1431     for (i=0; i<mbs; i++) {
1432       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1433     }
1434 
1435     for (k = 0; k<mbs; k++){
1436       bval = ba + bi[k];
1437       /* initialize k-th row by the perm[k]-th row of A */
1438       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1439       for (j = jmin; j < jmax; j++){
1440         col = riip[aj[j]];
1441         if (col >= k){ /* only take upper triangular entry */
1442           rtmp[col] = aa[j];
1443           *bval++  = 0.0; /* for in-place factorization */
1444         }
1445       }
1446       /* shift the diagonal of the matrix */
1447       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1448 
1449       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1450       dk = rtmp[k];
1451       i = jl[k]; /* first row to be added to k_th row  */
1452 
1453       while (i < k){
1454         nexti = jl[i]; /* next row to be added to k_th row */
1455 
1456         /* compute multiplier, update diag(k) and U(i,k) */
1457         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1458         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1459         dk += uikdi*ba[ili];
1460         ba[ili] = uikdi; /* -U(i,k) */
1461 
1462         /* add multiple of row i to k-th row */
1463         jmin = ili + 1; jmax = bi[i+1];
1464         if (jmin < jmax){
1465           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1466           /* update il and jl for row i */
1467           il[i] = jmin;
1468           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1469         }
1470         i = nexti;
1471       }
1472 
1473       /* shift the diagonals when zero pivot is detected */
1474       /* compute rs=sum of abs(off-diagonal) */
1475       rs   = 0.0;
1476       jmin = bi[k]+1;
1477       nz   = bi[k+1] - jmin;
1478       bcol = bj + jmin;
1479       while (nz--){
1480         rs += PetscAbsScalar(rtmp[*bcol]);
1481         bcol++;
1482       }
1483 
1484       sctx.rs = rs;
1485       sctx.pv = dk;
1486       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1487 
1488       if (newshift == 1) {
1489         if (!sctx.shift_amount) {
1490           sctx.shift_amount = 1e-5;
1491         }
1492         break;
1493       }
1494 
1495       /* copy data into U(k,:) */
1496       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1497       jmin = bi[k]+1; jmax = bi[k+1];
1498       if (jmin < jmax) {
1499         for (j=jmin; j<jmax; j++){
1500           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1501         }
1502         /* add the k-th row into il and jl */
1503         il[k] = jmin;
1504         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1505       }
1506     }
1507   } while (sctx.chshift);
1508   ierr = PetscFree(il);CHKERRQ(ierr);
1509 
1510   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1511   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1512 
1513   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1514   if (perm_identity){
1515     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1516     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1517     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1518     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1519   } else {
1520     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1521     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1522     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1523     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1524   }
1525 
1526   C->assembled    = PETSC_TRUE;
1527   C->preallocated = PETSC_TRUE;
1528   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1529   if (sctx.nshift){
1530     if (shiftnz) {
1531       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1532     } else if (shiftpd) {
1533       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1534     }
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1541 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1542 {
1543   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1544   Mat_SeqSBAIJ       *b;
1545   PetscErrorCode     ierr;
1546   PetscTruth         perm_identity,missing;
1547   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1548   const PetscInt     *rip,*riip;
1549   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1550   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1551   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1552   PetscReal          fill=info->fill,levels=info->levels;
1553   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1554   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1555   PetscBT            lnkbt;
1556   IS                 iperm;
1557 
1558   PetscFunctionBegin;
1559   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1560   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1561   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1562   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1563   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1564 
1565   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1566   ui[0] = 0;
1567 
1568   /* ICC(0) without matrix ordering: simply copies fill pattern */
1569   if (!levels && perm_identity) {
1570 
1571     for (i=0; i<am; i++) {
1572       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1573     }
1574     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1575     cols = uj;
1576     for (i=0; i<am; i++) {
1577       aj    = a->j + a->diag[i];
1578       ncols = ui[i+1] - ui[i];
1579       for (j=0; j<ncols; j++) *cols++ = *aj++;
1580     }
1581   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1582     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1583     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1584 
1585     /* initialization */
1586     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1587 
1588     /* jl: linked list for storing indices of the pivot rows
1589        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1590     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1591     il         = jl + am;
1592     uj_ptr     = (PetscInt**)(il + am);
1593     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1594     for (i=0; i<am; i++){
1595       jl[i] = am; il[i] = 0;
1596     }
1597 
1598     /* create and initialize a linked list for storing column indices of the active row k */
1599     nlnk = am + 1;
1600     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1601 
1602     /* initial FreeSpace size is fill*(ai[am]+1) */
1603     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1604     current_space = free_space;
1605     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1606     current_space_lvl = free_space_lvl;
1607 
1608     for (k=0; k<am; k++){  /* for each active row k */
1609       /* initialize lnk by the column indices of row rip[k] of A */
1610       nzk   = 0;
1611       ncols = ai[rip[k]+1] - ai[rip[k]];
1612       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1613       ncols_upper = 0;
1614       for (j=0; j<ncols; j++){
1615         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1616         if (riip[i] >= k){ /* only take upper triangular entry */
1617           ajtmp[ncols_upper] = i;
1618           ncols_upper++;
1619         }
1620       }
1621       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1622       nzk += nlnk;
1623 
1624       /* update lnk by computing fill-in for each pivot row to be merged in */
1625       prow = jl[k]; /* 1st pivot row */
1626 
1627       while (prow < k){
1628         nextprow = jl[prow];
1629 
1630         /* merge prow into k-th row */
1631         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1632         jmax = ui[prow+1];
1633         ncols = jmax-jmin;
1634         i     = jmin - ui[prow];
1635         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1636         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1637         j     = *(uj - 1);
1638         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1639         nzk += nlnk;
1640 
1641         /* update il and jl for prow */
1642         if (jmin < jmax){
1643           il[prow] = jmin;
1644           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1645         }
1646         prow = nextprow;
1647       }
1648 
1649       /* if free space is not available, make more free space */
1650       if (current_space->local_remaining<nzk) {
1651         i = am - k + 1; /* num of unfactored rows */
1652         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1653         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1654         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1655         reallocs++;
1656       }
1657 
1658       /* copy data into free_space and free_space_lvl, then initialize lnk */
1659       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1660       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1661 
1662       /* add the k-th row into il and jl */
1663       if (nzk > 1){
1664         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1665         jl[k] = jl[i]; jl[i] = k;
1666         il[k] = ui[k] + 1;
1667       }
1668       uj_ptr[k]     = current_space->array;
1669       uj_lvl_ptr[k] = current_space_lvl->array;
1670 
1671       current_space->array           += nzk;
1672       current_space->local_used      += nzk;
1673       current_space->local_remaining -= nzk;
1674 
1675       current_space_lvl->array           += nzk;
1676       current_space_lvl->local_used      += nzk;
1677       current_space_lvl->local_remaining -= nzk;
1678 
1679       ui[k+1] = ui[k] + nzk;
1680     }
1681 
1682 #if defined(PETSC_USE_INFO)
1683     if (ai[am] != 0) {
1684       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1685       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1686       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1687       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1688     } else {
1689       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1690     }
1691 #endif
1692 
1693     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1694     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1695     ierr = PetscFree(jl);CHKERRQ(ierr);
1696     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1697 
1698     /* destroy list of free space and other temporary array(s) */
1699     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1700     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1701     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1702     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1703 
1704   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1705 
1706   /* put together the new matrix in MATSEQSBAIJ format */
1707 
1708   b    = (Mat_SeqSBAIJ*)(fact)->data;
1709   b->singlemalloc = PETSC_FALSE;
1710   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1711   b->j    = uj;
1712   b->i    = ui;
1713   b->diag = 0;
1714   b->ilen = 0;
1715   b->imax = 0;
1716   b->row  = perm;
1717   b->col  = perm;
1718   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1719   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1720   b->icol = iperm;
1721   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1722   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1723   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1724   b->maxnz   = b->nz = ui[am];
1725   b->free_a  = PETSC_TRUE;
1726   b->free_ij = PETSC_TRUE;
1727 
1728   (fact)->info.factor_mallocs    = reallocs;
1729   (fact)->info.fill_ratio_given  = fill;
1730   if (ai[am] != 0) {
1731     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1732   } else {
1733     (fact)->info.fill_ratio_needed = 0.0;
1734   }
1735   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1741 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1742 {
1743   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1744   Mat_SeqSBAIJ       *b;
1745   PetscErrorCode     ierr;
1746   PetscTruth         perm_identity;
1747   PetscReal          fill = info->fill;
1748   const PetscInt     *rip,*riip;
1749   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1750   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1751   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1752   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1753   PetscBT            lnkbt;
1754   IS                 iperm;
1755 
1756   PetscFunctionBegin;
1757   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1758   /* check whether perm is the identity mapping */
1759   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1760   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1761   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1762   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1763 
1764   /* initialization */
1765   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1766   ui[0] = 0;
1767 
1768   /* jl: linked list for storing indices of the pivot rows
1769      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1770   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1771   il     = jl + am;
1772   cols   = il + am;
1773   ui_ptr = (PetscInt**)(cols + am);
1774   for (i=0; i<am; i++){
1775     jl[i] = am; il[i] = 0;
1776   }
1777 
1778   /* create and initialize a linked list for storing column indices of the active row k */
1779   nlnk = am + 1;
1780   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1781 
1782   /* initial FreeSpace size is fill*(ai[am]+1) */
1783   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1784   current_space = free_space;
1785 
1786   for (k=0; k<am; k++){  /* for each active row k */
1787     /* initialize lnk by the column indices of row rip[k] of A */
1788     nzk   = 0;
1789     ncols = ai[rip[k]+1] - ai[rip[k]];
1790     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1791     ncols_upper = 0;
1792     for (j=0; j<ncols; j++){
1793       i = riip[*(aj + ai[rip[k]] + j)];
1794       if (i >= k){ /* only take upper triangular entry */
1795         cols[ncols_upper] = i;
1796         ncols_upper++;
1797       }
1798     }
1799     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1800     nzk += nlnk;
1801 
1802     /* update lnk by computing fill-in for each pivot row to be merged in */
1803     prow = jl[k]; /* 1st pivot row */
1804 
1805     while (prow < k){
1806       nextprow = jl[prow];
1807       /* merge prow into k-th row */
1808       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1809       jmax = ui[prow+1];
1810       ncols = jmax-jmin;
1811       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1812       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1813       nzk += nlnk;
1814 
1815       /* update il and jl for prow */
1816       if (jmin < jmax){
1817         il[prow] = jmin;
1818         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1819       }
1820       prow = nextprow;
1821     }
1822 
1823     /* if free space is not available, make more free space */
1824     if (current_space->local_remaining<nzk) {
1825       i = am - k + 1; /* num of unfactored rows */
1826       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1827       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1828       reallocs++;
1829     }
1830 
1831     /* copy data into free space, then initialize lnk */
1832     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1833 
1834     /* add the k-th row into il and jl */
1835     if (nzk-1 > 0){
1836       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1837       jl[k] = jl[i]; jl[i] = k;
1838       il[k] = ui[k] + 1;
1839     }
1840     ui_ptr[k] = current_space->array;
1841     current_space->array           += nzk;
1842     current_space->local_used      += nzk;
1843     current_space->local_remaining -= nzk;
1844 
1845     ui[k+1] = ui[k] + nzk;
1846   }
1847 
1848 #if defined(PETSC_USE_INFO)
1849   if (ai[am] != 0) {
1850     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1851     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1852     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1853     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1854   } else {
1855      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1856   }
1857 #endif
1858 
1859   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1860   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1861   ierr = PetscFree(jl);CHKERRQ(ierr);
1862 
1863   /* destroy list of free space and other temporary array(s) */
1864   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1865   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1866   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1867 
1868   /* put together the new matrix in MATSEQSBAIJ format */
1869 
1870   b = (Mat_SeqSBAIJ*)(fact)->data;
1871   b->singlemalloc = PETSC_FALSE;
1872   b->free_a       = PETSC_TRUE;
1873   b->free_ij      = PETSC_TRUE;
1874   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1875   b->j    = uj;
1876   b->i    = ui;
1877   b->diag = 0;
1878   b->ilen = 0;
1879   b->imax = 0;
1880   b->row  = perm;
1881   b->col  = perm;
1882   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1883   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1884   b->icol = iperm;
1885   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1886   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1887   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1888   b->maxnz = b->nz = ui[am];
1889 
1890   (fact)->info.factor_mallocs    = reallocs;
1891   (fact)->info.fill_ratio_given  = fill;
1892   if (ai[am] != 0) {
1893     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1894   } else {
1895     (fact)->info.fill_ratio_needed = 0.0;
1896   }
1897   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt"
1903 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx)
1904 {
1905   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1906   PetscErrorCode    ierr;
1907   PetscInt          n = A->rmap->n;
1908   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
1909   PetscScalar       *x,sum;
1910   const PetscScalar *b;
1911   const MatScalar   *aa = a->a,*v;
1912   PetscInt          i,nz;
1913 
1914   PetscFunctionBegin;
1915   if (!n) PetscFunctionReturn(0);
1916 
1917   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1919 
1920   /* forward solve the lower triangular */
1921   x[0] = b[0];
1922   v    = aa;
1923   vi   = aj;
1924   for (i=1; i<n; i++) {
1925     nz  = ai[i+1] - ai[i];
1926     sum = b[i];
1927     while (nz--) sum -= *v++ * x[*vi++];
1928     x[i] = sum;
1929   }
1930 
1931   /* backward solve the upper triangular */
1932   v   = aa + adiag[n] + 1;
1933   vi  = aj + adiag[n] + 1;
1934   for (i=n-1; i>=0; i--){
1935     nz  = adiag[i] - adiag[i+1] - 1;
1936     sum = x[i];
1937     while (nz--) sum -= *v++ * x[*vi++];
1938     x[i] = sum*aa[adiag[i]];
1939     v++; vi++;
1940   }
1941 
1942   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1943   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1944   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSolve_SeqAIJ_iludt"
1950 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx)
1951 {
1952   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1953   IS                iscol = a->col,isrow = a->row;
1954   PetscErrorCode    ierr;
1955   PetscInt          i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag;
1956   PetscInt          nz;
1957   const PetscInt    *rout,*cout,*r,*c;
1958   PetscScalar       *x,*tmp,*tmps;
1959   const PetscScalar *b;
1960   const MatScalar   *aa = a->a,*v;
1961 
1962   PetscFunctionBegin;
1963   if (!n) PetscFunctionReturn(0);
1964 
1965   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1966   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1967   tmp  = a->solve_work;
1968 
1969   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1970   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1971 
1972   /* forward solve the lower triangular */
1973   tmp[0] = b[*r++];
1974   tmps   = tmp;
1975   v      = aa;
1976   vi     = aj;
1977   for (i=1; i<n; i++) {
1978     nz  = ai[i+1] - ai[i];
1979     tmp[i] = b[*r++];
1980     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1981     v += nz; vi += nz;
1982   }
1983 
1984   /* backward solve the upper triangular */
1985   v   = aa + adiag[n] + 1;
1986   vi  = aj + adiag[n] + 1;
1987   for (i=n-1; i>=0; i--){
1988     nz  = adiag[i] - adiag[i+1] - 1;
1989     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1990     x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]];
1991     v += nz+1; vi += nz+1;
1992   }
1993 
1994   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1995   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1996   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1997   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1998   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
2004 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
2005 {
2006   Mat                B = *fact;
2007   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
2008   IS                 isicol;
2009   PetscErrorCode     ierr;
2010   const PetscInt     *r,*ic;
2011   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
2012   PetscInt           *bi,*bj,*bdiag;
2013   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au;
2014   PetscInt           nlnk,*lnk;
2015   PetscBT            lnkbt;
2016   PetscTruth         row_identity,icol_identity,both_identity;
2017   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp;
2018   const PetscInt     *ics;
2019   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
2020   PetscReal          dt=info->dt,shift=info->shiftinblocks;
2021   PetscInt           nnz_max;
2022   PetscTruth         missing;
2023 
2024   PetscFunctionBegin;
2025   //printf("MatILUDTFactor_SeqAIJ is called...\n");
2026   /* ------- symbolic factorization, can be reused ---------*/
2027   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2028   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2029   adiag=a->diag;
2030 
2031   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
2032 
2033   /* bdiag is location of diagonal in factor */
2034   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
2035 
2036   /* allocate row pointers bi */
2037   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
2038 
2039   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
2040   dtcount = (PetscInt)info->dtcount;
2041   if (dtcount > n/2) dtcount = n/2;
2042   nnz_max  = ai[n]+2*n*dtcount+2;
2043   ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2044   ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr);
2045 
2046   /* put together the new matrix */
2047   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2048   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
2049   b    = (Mat_SeqAIJ*)(B)->data;
2050   b->free_a       = PETSC_TRUE;
2051   b->free_ij      = PETSC_TRUE;
2052   b->singlemalloc = PETSC_FALSE;
2053   b->a          = ba;
2054   b->j          = bj;
2055   b->i          = bi;
2056   b->diag       = bdiag;
2057   b->ilen       = 0;
2058   b->imax       = 0;
2059   b->row        = isrow;
2060   b->col        = iscol;
2061   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2062   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2063   b->icol       = isicol;
2064   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2065 
2066   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2067   b->maxnz = nnz_max;
2068 
2069   (B)->factor                = MAT_FACTOR_ILUDT;
2070   (B)->info.factor_mallocs   = 0;
2071   (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
2072   CHKMEMQ;
2073   /* ------- end of symbolic factorization ---------*/
2074 
2075   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2076   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
2077   ics  = ic;
2078 
2079   /* linked list for storing column indices of the active row */
2080   nlnk = n + 1;
2081   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2082 
2083   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
2084   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
2085   jtmp = im + n;
2086   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
2087   ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2088   vtmp = rtmp + n;
2089 
2090   bi[0]    = 0;
2091   bdiag[0] = nnz_max-1; /* location of diagonal in factor B */
2092   for (i=0; i<n; i++) {
2093     //printf("row %d ...\n",i);
2094     /* copy initial fill into linked list */
2095     nzi = 0; /* nonzeros for active row i */
2096     nzi = ai[r[i]+1] - ai[r[i]];
2097     if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
2098     nzi_al = adiag[r[i]] - ai[r[i]];
2099     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
2100     ajtmp = aj + ai[r[i]];
2101     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2102 
2103     /* load in initial (unfactored row) */
2104     ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2105     aatmp = a->a + ai[r[i]];
2106     for (j=0; j<nzi; j++) {
2107       rtmp[ics[*ajtmp++]] = *aatmp++;
2108     }
2109 
2110     /* add pivot rows into linked list */
2111     row = lnk[n];
2112     while (row < i) {
2113       nzi_bl = bi[row+1] - bi[row] + 1;
2114       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
2115       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
2116       nzi  += nlnk;
2117       row   = lnk[row];
2118     }
2119 
2120     /* copy data from lnk into jtmp, then initialize lnk */
2121     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
2122 
2123     /* numerical factorization */
2124     //printf("row %d:\n",i);
2125     bjtmp = jtmp;
2126     row   = *bjtmp++; /* 1st pivot row */
2127     while  (row < i) {
2128       //printf("  prow %d\n",row);
2129       pc         = rtmp + row;
2130       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
2131       multiplier = (*pc) * (*pv);
2132       *pc        = multiplier;
2133       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
2134         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
2135         pv         = ba + bdiag[row+1] + 1;
2136         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
2137         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
2138         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
2139         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
2140       }
2141       row = *bjtmp++;
2142     }
2143 
2144     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
2145     nzi_bl = 0; j = 0;
2146     while (jtmp[j] < i){ /* Note: jtmp is sorted */
2147       vtmp[j] = rtmp[jtmp[j]];
2148       nzi_bl++; j++;
2149     }
2150     nzi_bu = nzi - nzi_bl -1;
2151     while (j < nzi){
2152       vtmp[j] = rtmp[jtmp[j]];
2153       j++;
2154     }
2155 
2156     bjtmp = bj + bi[i];
2157     batmp = ba + bi[i];
2158     /* apply level dropping rule to L part */
2159     ncut = nzi_al + dtcount;
2160     if (ncut < nzi_bl){
2161       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
2162       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
2163     } else {
2164       ncut = nzi_bl;
2165     }
2166     for (j=0; j<ncut; j++){
2167       bjtmp[j] = jtmp[j];
2168       batmp[j] = vtmp[j];
2169       //printf(" (%d,%g),",bjtmp[j],batmp[j]);
2170     }
2171     bi[i+1] = bi[i] + ncut;
2172     nzi = ncut + 1;
2173 
2174     /* apply level dropping rule to U part */
2175     ncut = nzi_au + dtcount;
2176     if (ncut < nzi_bu){
2177       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
2178       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
2179     } else {
2180       ncut = nzi_bu;
2181     }
2182     nzi += ncut;
2183 
2184     /* mark bdiagonal */
2185     bdiag[i+1]   = bdiag[i] - (ncut + 1);
2186     bjtmp = bj + bdiag[i];
2187     batmp = ba + bdiag[i];
2188     *bjtmp = i;
2189     *batmp = rtmp[i];
2190     if (*batmp == 0.0) *batmp = dt+shift;
2191     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
2192     //printf(" (%d,%g),",*bjtmp,*batmp);
2193 
2194     bjtmp = bj + bdiag[i+1]+1;
2195     batmp = ba + bdiag[i+1]+1;
2196     for (k=0; k<ncut; k++){
2197       bjtmp[k] = jtmp[nzi_bl+1+k];
2198       batmp[k] = vtmp[nzi_bl+1+k];
2199       //printf(" (%d,%g),",bjtmp[k],batmp[k]);
2200     }
2201     //printf("\n");
2202 
2203     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
2204     /*
2205     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
2206     printf(" ----------------------------\n");
2207     */
2208   } /* for (i=0; i<n; i++) */
2209   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
2210   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
2211 
2212   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2213   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2214 
2215   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2216   ierr = PetscFree(im);CHKERRQ(ierr);
2217   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2218 
2219   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
2220   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
2221 
2222   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2223   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
2224   both_identity = (PetscTruth) (row_identity && icol_identity);
2225   if (row_identity && icol_identity) {
2226     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt;
2227   } else {
2228     B->ops->solve = MatSolve_SeqAIJ_iludt;
2229   }
2230 
2231   B->ops->lufactorsymbolic  = MatILUDTFactorSymbolic_SeqAIJ;
2232   B->ops->lufactornumeric   = MatILUDTFactorNumeric_SeqAIJ;
2233   B->ops->solveadd          = 0;
2234   B->ops->solvetranspose    = 0;
2235   B->ops->solvetransposeadd = 0;
2236   B->ops->matsolve          = 0;
2237   B->assembled              = PETSC_TRUE;
2238   B->preallocated           = PETSC_TRUE;
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 /* a wraper of MatILUDTFactor_SeqAIJ() */
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
2245 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
2246 {
2247   PetscErrorCode     ierr;
2248 
2249   PetscFunctionBegin;
2250   //printf("MatILUDTFactorSymbolic_SeqAIJ is called...\n");
2251   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
2252 
2253   fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ;
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 /*
2258    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
2259    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
2260 */
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
2263 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
2264 {
2265   Mat            C=fact;
2266   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
2267   IS             isrow = b->row,isicol = b->icol;
2268   PetscErrorCode ierr;
2269   const PetscInt *r,*ic,*ics;
2270   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
2271   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
2272   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
2273   PetscReal      dt=info->dt,shift=info->shiftinblocks;
2274   PetscTruth     row_identity, col_identity;
2275 
2276   PetscFunctionBegin;
2277   //printf("MatILUDTFactorNumeric_SeqAIJ is called...\n");
2278   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2279   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
2280   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2281   ics  = ic;
2282 
2283   for (i=0; i<n; i++){
2284     /* initialize rtmp array */
2285     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
2286     bjtmp = bj + bi[i];
2287     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
2288     rtmp[i] = 0.0;
2289     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
2290     bjtmp = bj + bdiag[i+1] + 1;
2291     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
2292 
2293     /* load in initial unfactored row of A */
2294     //printf("row %d\n",i);
2295     nz    = ai[r[i]+1] - ai[r[i]];
2296     ajtmp = aj + ai[r[i]];
2297     v     = aa + ai[r[i]];
2298     for (j=0; j<nz; j++) {
2299       rtmp[ics[*ajtmp++]] = v[j];
2300       //printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]);
2301     }
2302       //printf("\n");
2303 
2304     /* numerical factorization */
2305     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
2306     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
2307     //printf("%d-: nzl %d\n",i,nzl);
2308     k = 0;
2309     while (k < nzl){
2310       row   = *bjtmp++;
2311       //printf("  prow %d\n",row);
2312       pc         = rtmp + row;
2313       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
2314       multiplier = (*pc) * (*pv);
2315       *pc        = multiplier;
2316       if (PetscAbsScalar(multiplier) > dt){
2317         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
2318         pv         = b->a + bdiag[row+1] + 1;
2319         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
2320         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
2321         //ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
2322       }
2323       k++;
2324     }
2325 
2326     /* finished row so stick it into b->a */
2327     /* L-part */
2328     pv = b->a + bi[i] ;
2329     pj = bj + bi[i] ;
2330     nzl = bi[i+1] - bi[i];
2331     for (j=0; j<nzl; j++) {
2332       pv[j] = rtmp[pj[j]];
2333       //printf(" (%d,%g),",pj[j],pv[j]);
2334     }
2335 
2336     /* diagonal: invert diagonal entries for simplier triangular solves */
2337     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
2338     b->a[bdiag[i]] = 1.0/rtmp[i];
2339     //printf(" (%d,%g),",i,b->a[bdiag[i]]);
2340 
2341     /* U-part */
2342     pv = b->a + bdiag[i+1] + 1;
2343     pj = bj + bdiag[i+1] + 1;
2344     nzu = bdiag[i] - bdiag[i+1] - 1;
2345     for (j=0; j<nzu; j++) {
2346       pv[j] = rtmp[pj[j]];
2347         //printf(" (%d,%g),",pj[j],pv[j]);
2348     }
2349       //printf("\n");
2350   }
2351 
2352   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2353   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2354   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2355 
2356   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2357   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
2358   if (row_identity && col_identity) {
2359     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_iludt;
2360   } else {
2361     C->ops->solve   = MatSolve_SeqAIJ_iludt;
2362   }
2363   C->ops->solveadd           = 0;
2364   C->ops->solvetranspose     = 0;
2365   C->ops->solvetransposeadd  = 0;
2366   C->ops->matsolve           = 0;
2367   C->assembled    = PETSC_TRUE;
2368   C->preallocated = PETSC_TRUE;
2369   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372