xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 8fa24674de2593b5737853765c1d8f915d67a0f0)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 /*
12       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
13 */
14 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
15 {
16   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
17   PetscErrorCode    ierr;
18   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current, newcurrent,*order;
19   const PetscInt    *ai = a->i, *aj = a->j;
20   const PetscScalar *aa = a->a;
21   PetscTruth        *done;
22   PetscReal         best,past,future;
23 
24   PetscFunctionBegin;
25   /* pick initial row */
26   best = -1;
27   for (i=0; i<n; i++) {
28     future = 0;
29     for (j=ai[i]; j<ai[i+1]; j++) {
30       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
31     }
32     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
33     if (past/future > best) {
34       best = past/future;
35       current = i;
36     }
37   }
38 
39   ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr);
40   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr);
42   order[0] = current;
43   for (i=0; i<n-1; i++) {
44     done[current] = PETSC_TRUE;
45     best          = -1;
46     /* loop over all neighbors of current pivot */
47     for (j=ai[current]; j<ai[current+1]; j++) {
48       jj = aj[j];
49       if (done[jj]) continue;
50       /* loop over columns of potential next row computing weights for below and above diagonal */
51       past = future = 0.0;
52       for (k=ai[jj]; k<ai[jj+1]; k++) {
53         kk = aj[k];
54         if (done[kk]) past += PetscAbsScalar(aa[k]);
55         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
56       }
57       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
58       if (past/future > best) {
59         best = past/future;
60         newcurrent = jj;
61       }
62     }
63     if (best == -1) { /* no neighbors to select from so select best of all that remain */
64       best = -1;
65       for (k=0; k<n; k++) {
66         if (done[k]) continue;
67         future = 0;
68         past   = 0;
69         for (j=ai[k]; j<ai[k+1]; j++) {
70           kk = aj[j];
71           if (done[kk]) past += PetscAbsScalar(aa[j]);
72           else if (kk != k) future  += PetscAbsScalar(aa[j]);
73         }
74         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
75         if (past/future > best) {
76           best = past/future;
77           newcurrent = k;
78         }
79       }
80     }
81     if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current");
82     current = newcurrent;
83     order[i+1] = current;
84   }
85   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr);
86   *icol = *irow;
87   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
88   ierr = PetscFree(done);CHKERRQ(ierr);
89   ierr = PetscFree(order);CHKERRQ(ierr);
90   PetscFunctionReturn(0);
91 }
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
96 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
97 {
98   PetscFunctionBegin;
99   *flg = PETSC_TRUE;
100   PetscFunctionReturn(0);
101 }
102 EXTERN_C_END
103 
104 EXTERN_C_BEGIN
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
107 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
108 {
109   PetscInt           n = A->rmap->n;
110   PetscErrorCode     ierr;
111 
112   PetscFunctionBegin;
113   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
114   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
115   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
116     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
117     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
119     (*B)->ops->iludtfactor       = MatILUDTFactor_SeqAIJ;
120   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
121     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
122     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
123     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
124     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
125   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
126   (*B)->factor = ftype;
127   PetscFunctionReturn(0);
128 }
129 EXTERN_C_END
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
133 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
134 {
135   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
136   IS                 isicol;
137   PetscErrorCode     ierr;
138   const PetscInt     *r,*ic;
139   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
140   PetscInt           *bi,*bj,*ajtmp;
141   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
142   PetscReal          f;
143   PetscInt           nlnk,*lnk,k,**bi_ptr;
144   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
145   PetscBT            lnkbt;
146 
147   PetscFunctionBegin;
148   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
149   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
150   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
151   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
152 
153   /* get new row pointers */
154   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
155   bi[0] = 0;
156 
157   /* bdiag is location of diagonal in factor */
158   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
159   bdiag[0] = 0;
160 
161   /* linked list for storing column indices of the active row */
162   nlnk = n + 1;
163   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
164 
165   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
166 
167   /* initial FreeSpace size is f*(ai[n]+1) */
168   f = info->fill;
169   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
170   current_space = free_space;
171 
172   for (i=0; i<n; i++) {
173     /* copy previous fill into linked list */
174     nzi = 0;
175     nnz = ai[r[i]+1] - ai[r[i]];
176     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
177     ajtmp = aj + ai[r[i]];
178     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
179     nzi += nlnk;
180 
181     /* add pivot rows into linked list */
182     row = lnk[n];
183     while (row < i) {
184       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
185       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
186       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
187       nzi += nlnk;
188       row  = lnk[row];
189     }
190     bi[i+1] = bi[i] + nzi;
191     im[i]   = nzi;
192 
193     /* mark bdiag */
194     nzbd = 0;
195     nnz  = nzi;
196     k    = lnk[n];
197     while (nnz-- && k < i){
198       nzbd++;
199       k = lnk[k];
200     }
201     bdiag[i] = bi[i] + nzbd;
202 
203     /* if free space is not available, make more free space */
204     if (current_space->local_remaining<nzi) {
205       nnz = (n - i)*nzi; /* estimated and max additional space needed */
206       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
207       reallocs++;
208     }
209 
210     /* copy data into free space, then initialize lnk */
211     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
212     bi_ptr[i] = current_space->array;
213     current_space->array           += nzi;
214     current_space->local_used      += nzi;
215     current_space->local_remaining -= nzi;
216   }
217 #if defined(PETSC_USE_INFO)
218   if (ai[n] != 0) {
219     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
220     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
221     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
222     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
223     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
224   } else {
225     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
226   }
227 #endif
228 
229   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
230   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
231 
232   /* destroy list of free space and other temporary array(s) */
233   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
234   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
235   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
236   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
237 
238   /* put together the new matrix */
239   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
240   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
241   b    = (Mat_SeqAIJ*)(B)->data;
242   b->free_a       = PETSC_TRUE;
243   b->free_ij      = PETSC_TRUE;
244   b->singlemalloc = PETSC_FALSE;
245   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
246   b->j          = bj;
247   b->i          = bi;
248   b->diag       = bdiag;
249   b->ilen       = 0;
250   b->imax       = 0;
251   b->row        = isrow;
252   b->col        = iscol;
253   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
254   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
255   b->icol       = isicol;
256   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
257 
258   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
259   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
260   b->maxnz = b->nz = bi[n] ;
261 
262   (B)->factor                = MAT_FACTOR_LU;
263   (B)->info.factor_mallocs   = reallocs;
264   (B)->info.fill_ratio_given = f;
265 
266   if (ai[n]) {
267     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
268   } else {
269     (B)->info.fill_ratio_needed = 0.0;
270   }
271   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
272   (B)->ops->solve            = MatSolve_SeqAIJ;
273   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
274   /* switch to inodes if appropriate */
275   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 /*
280     Trouble in factorization, should we dump the original matrix?
281 */
282 #undef __FUNCT__
283 #define __FUNCT__ "MatFactorDumpMatrix"
284 PetscErrorCode MatFactorDumpMatrix(Mat A)
285 {
286   PetscErrorCode ierr;
287   PetscTruth     flg = PETSC_FALSE;
288 
289   PetscFunctionBegin;
290   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
291   if (flg) {
292     PetscViewer viewer;
293     char        filename[PETSC_MAX_PATH_LEN];
294 
295     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
296     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
297     ierr = MatView(A,viewer);CHKERRQ(ierr);
298     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
304 
305 /* ----------------------------------------------------------- */
306 #undef __FUNCT__
307 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
308 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
309 {
310   Mat            C=B;
311   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
312   IS             isrow = b->row,isicol = b->icol;
313   PetscErrorCode ierr;
314   const PetscInt  *r,*ic,*ics;
315   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
316   PetscInt       *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj;
317   MatScalar      *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a;
318   PetscReal      rs=0.0;
319   LUShift_Ctx    sctx;
320   PetscInt       newshift,*ddiag;
321 
322   PetscFunctionBegin;
323   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
324   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
325   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
326   ics  = ic;
327 
328   sctx.shift_top      = 0;
329   sctx.nshift_max     = 0;
330   sctx.shift_lo       = 0;
331   sctx.shift_hi       = 0;
332   sctx.shift_fraction = 0;
333 
334   /* if both shift schemes are chosen by user, only use info->shiftpd */
335   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
336     ddiag          = a->diag;
337     sctx.shift_top = info->zeropivot;
338     for (i=0; i<n; i++) {
339       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
340       d  = (aa)[ddiag[i]];
341       rs = -PetscAbsScalar(d) - PetscRealPart(d);
342       v  = aa+ai[i];
343       nz = ai[i+1] - ai[i];
344       for (j=0; j<nz; j++)
345 	rs += PetscAbsScalar(v[j]);
346       if (rs>sctx.shift_top) sctx.shift_top = rs;
347     }
348     sctx.shift_top   *= 1.1;
349     sctx.nshift_max   = 5;
350     sctx.shift_lo     = 0.;
351     sctx.shift_hi     = 1.;
352   }
353 
354   sctx.shift_amount = 0.0;
355   sctx.nshift       = 0;
356   do {
357     sctx.lushift = PETSC_FALSE;
358     for (i=0; i<n; i++){
359       nz    = bi[i+1] - bi[i];
360       bjtmp = bj + bi[i];
361       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
362 
363       /* load in initial (unfactored row) */
364       nz    = ai[r[i]+1] - ai[r[i]];
365       ajtmp = aj + ai[r[i]];
366       v     = aa + ai[r[i]];
367       for (j=0; j<nz; j++) {
368         rtmp[ics[ajtmp[j]]] = v[j];
369       }
370       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
371 
372       row = *bjtmp++;
373       while  (row < i) {
374         pc = rtmp + row;
375         if (*pc != 0.0) {
376           pv         = b->a + diag_offset[row];
377           pj         = b->j + diag_offset[row] + 1;
378           multiplier = *pc / *pv++;
379           *pc        = multiplier;
380           nz         = bi[row+1] - diag_offset[row] - 1;
381           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
382           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
383         }
384         row = *bjtmp++;
385       }
386       /* finished row so stick it into b->a */
387       pv   = b->a + bi[i] ;
388       pj   = b->j + bi[i] ;
389       nz   = bi[i+1] - bi[i];
390       diag = diag_offset[i] - bi[i];
391       rs   = -PetscAbsScalar(pv[diag]);
392       for (j=0; j<nz; j++) {
393         pv[j] = rtmp[pj[j]];
394         rs   += PetscAbsScalar(pv[j]);
395       }
396 
397       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
398       sctx.rs  = rs;
399       sctx.pv  = pv[diag];
400       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
401       if (newshift == 1) break;
402     }
403 
404     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
405       /*
406        * if no shift in this attempt & shifting & started shifting & can refine,
407        * then try lower shift
408        */
409       sctx.shift_hi       = sctx.shift_fraction;
410       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
411       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
412       sctx.lushift        = PETSC_TRUE;
413       sctx.nshift++;
414     }
415   } while (sctx.lushift);
416 
417   /* invert diagonal entries for simplier triangular solves */
418   for (i=0; i<n; i++) {
419     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
420   }
421   ierr = PetscFree(rtmp);CHKERRQ(ierr);
422   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
423   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
424   if (b->inode.use) {
425     C->ops->solve   = MatSolve_Inode;
426   } else {
427     PetscTruth row_identity, col_identity;
428     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
429     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
430     if (row_identity && col_identity) {
431       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
432     } else {
433       C->ops->solve   = MatSolve_SeqAIJ;
434     }
435   }
436   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
437   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
438   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
439   C->ops->matsolve           = MatMatSolve_SeqAIJ;
440   C->assembled    = PETSC_TRUE;
441   C->preallocated = PETSC_TRUE;
442   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
443   if (sctx.nshift){
444      if (info->shiftpd) {
445       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
446     } else if (info->shiftnz) {
447       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
448     }
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 /*
454    This routine implements inplace ILU(0) with row or/and column permutations.
455    Input:
456      A - original matrix
457    Output;
458      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
459          a->j (col index) is permuted by the inverse of colperm, then sorted
460          a->a reordered accordingly with a->j
461          a->diag (ptr to diagonal elements) is updated.
462 */
463 #undef __FUNCT__
464 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
465 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
466 {
467   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
468   IS             isrow = a->row,isicol = a->icol;
469   PetscErrorCode ierr;
470   const PetscInt *r,*ic,*ics;
471   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
472   PetscInt       *ajtmp,nz,row;
473   PetscInt       *diag = a->diag,nbdiag,*pj;
474   PetscScalar    *rtmp,*pc,multiplier,d;
475   MatScalar      *v,*pv;
476   PetscReal      rs;
477   LUShift_Ctx    sctx;
478   PetscInt       newshift;
479 
480   PetscFunctionBegin;
481   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
482   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
483   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
484   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
485   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
486   ics = ic;
487 
488   sctx.shift_top      = 0;
489   sctx.nshift_max     = 0;
490   sctx.shift_lo       = 0;
491   sctx.shift_hi       = 0;
492   sctx.shift_fraction = 0;
493 
494   /* if both shift schemes are chosen by user, only use info->shiftpd */
495   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
496     sctx.shift_top = 0;
497     for (i=0; i<n; i++) {
498       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
499       d  = (a->a)[diag[i]];
500       rs = -PetscAbsScalar(d) - PetscRealPart(d);
501       v  = a->a+ai[i];
502       nz = ai[i+1] - ai[i];
503       for (j=0; j<nz; j++)
504 	rs += PetscAbsScalar(v[j]);
505       if (rs>sctx.shift_top) sctx.shift_top = rs;
506     }
507     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
508     sctx.shift_top    *= 1.1;
509     sctx.nshift_max   = 5;
510     sctx.shift_lo     = 0.;
511     sctx.shift_hi     = 1.;
512   }
513 
514   sctx.shift_amount = 0;
515   sctx.nshift       = 0;
516   do {
517     sctx.lushift = PETSC_FALSE;
518     for (i=0; i<n; i++){
519       /* load in initial unfactored row */
520       nz    = ai[r[i]+1] - ai[r[i]];
521       ajtmp = aj + ai[r[i]];
522       v     = a->a + ai[r[i]];
523       /* sort permuted ajtmp and values v accordingly */
524       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
525       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
526 
527       diag[r[i]] = ai[r[i]];
528       for (j=0; j<nz; j++) {
529         rtmp[ajtmp[j]] = v[j];
530         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
531       }
532       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
533 
534       row = *ajtmp++;
535       while  (row < i) {
536         pc = rtmp + row;
537         if (*pc != 0.0) {
538           pv         = a->a + diag[r[row]];
539           pj         = aj + diag[r[row]] + 1;
540 
541           multiplier = *pc / *pv++;
542           *pc        = multiplier;
543           nz         = ai[r[row]+1] - diag[r[row]] - 1;
544           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
546         }
547         row = *ajtmp++;
548       }
549       /* finished row so overwrite it onto a->a */
550       pv   = a->a + ai[r[i]] ;
551       pj   = aj + ai[r[i]] ;
552       nz   = ai[r[i]+1] - ai[r[i]];
553       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
554 
555       rs   = 0.0;
556       for (j=0; j<nz; j++) {
557         pv[j] = rtmp[pj[j]];
558         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
559       }
560 
561       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
562       sctx.rs  = rs;
563       sctx.pv  = pv[nbdiag];
564       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
565       if (newshift == 1) break;
566     }
567 
568     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
569       /*
570        * if no shift in this attempt & shifting & started shifting & can refine,
571        * then try lower shift
572        */
573       sctx.shift_hi        = sctx.shift_fraction;
574       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
575       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
576       sctx.lushift         = PETSC_TRUE;
577       sctx.nshift++;
578     }
579   } while (sctx.lushift);
580 
581   /* invert diagonal entries for simplier triangular solves */
582   for (i=0; i<n; i++) {
583     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
584   }
585 
586   ierr = PetscFree(rtmp);CHKERRQ(ierr);
587   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
588   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
589   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
590   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
591   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
592   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
593   A->assembled = PETSC_TRUE;
594   A->preallocated = PETSC_TRUE;
595   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
596   if (sctx.nshift){
597     if (info->shiftpd) {
598       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
599     } else if (info->shiftnz) {
600       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
601     }
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 /* ----------------------------------------------------------- */
607 #undef __FUNCT__
608 #define __FUNCT__ "MatLUFactor_SeqAIJ"
609 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
610 {
611   PetscErrorCode ierr;
612   Mat            C;
613 
614   PetscFunctionBegin;
615   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
616   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
617   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
618   A->ops->solve            = C->ops->solve;
619   A->ops->solvetranspose   = C->ops->solvetranspose;
620   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
621   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 /* ----------------------------------------------------------- */
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatSolve_SeqAIJ"
628 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
629 {
630   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
631   IS                iscol = a->col,isrow = a->row;
632   PetscErrorCode    ierr;
633   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
634   PetscInt          nz;
635   const PetscInt    *rout,*cout,*r,*c;
636   PetscScalar       *x,*tmp,*tmps,sum;
637   const PetscScalar *b;
638   const MatScalar   *aa = a->a,*v;
639 
640   PetscFunctionBegin;
641   if (!n) PetscFunctionReturn(0);
642 
643   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
644   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
645   tmp  = a->solve_work;
646 
647   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
648   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
649 
650   /* forward solve the lower triangular */
651   tmp[0] = b[*r++];
652   tmps   = tmp;
653   for (i=1; i<n; i++) {
654     v   = aa + ai[i] ;
655     vi  = aj + ai[i] ;
656     nz  = a->diag[i] - ai[i];
657     sum = b[*r++];
658     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
659     tmp[i] = sum;
660   }
661 
662   /* backward solve the upper triangular */
663   for (i=n-1; i>=0; i--){
664     v   = aa + a->diag[i] + 1;
665     vi  = aj + a->diag[i] + 1;
666     nz  = ai[i+1] - a->diag[i] - 1;
667     sum = tmp[i];
668     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
669     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
670   }
671 
672   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
673   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
674   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
675   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
676   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatMatSolve_SeqAIJ"
682 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
683 {
684   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
685   IS              iscol = a->col,isrow = a->row;
686   PetscErrorCode  ierr;
687   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
688   PetscInt        nz,neq;
689   const PetscInt  *rout,*cout,*r,*c;
690   PetscScalar     *x,*b,*tmp,*tmps,sum;
691   const MatScalar *aa = a->a,*v;
692   PetscTruth      bisdense,xisdense;
693 
694   PetscFunctionBegin;
695   if (!n) PetscFunctionReturn(0);
696 
697   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
698   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
699   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
700   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
701 
702   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
703   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
704 
705   tmp  = a->solve_work;
706   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
707   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
708 
709   for (neq=0; neq<B->cmap->n; neq++){
710     /* forward solve the lower triangular */
711     tmp[0] = b[r[0]];
712     tmps   = tmp;
713     for (i=1; i<n; i++) {
714       v   = aa + ai[i] ;
715       vi  = aj + ai[i] ;
716       nz  = a->diag[i] - ai[i];
717       sum = b[r[i]];
718       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
719       tmp[i] = sum;
720     }
721     /* backward solve the upper triangular */
722     for (i=n-1; i>=0; i--){
723       v   = aa + a->diag[i] + 1;
724       vi  = aj + a->diag[i] + 1;
725       nz  = ai[i+1] - a->diag[i] - 1;
726       sum = tmp[i];
727       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
728       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
729     }
730 
731     b += n;
732     x += n;
733   }
734   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
735   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
736   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
737   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
738   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
744 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
745 {
746   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
747   IS              iscol = a->col,isrow = a->row;
748   PetscErrorCode  ierr;
749   const PetscInt  *r,*c,*rout,*cout;
750   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
751   PetscInt        nz,row;
752   PetscScalar     *x,*b,*tmp,*tmps,sum;
753   const MatScalar *aa = a->a,*v;
754 
755   PetscFunctionBegin;
756   if (!n) PetscFunctionReturn(0);
757 
758   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
759   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
760   tmp  = a->solve_work;
761 
762   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
763   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
764 
765   /* forward solve the lower triangular */
766   tmp[0] = b[*r++];
767   tmps   = tmp;
768   for (row=1; row<n; row++) {
769     i   = rout[row]; /* permuted row */
770     v   = aa + ai[i] ;
771     vi  = aj + ai[i] ;
772     nz  = a->diag[i] - ai[i];
773     sum = b[*r++];
774     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
775     tmp[row] = sum;
776   }
777 
778   /* backward solve the upper triangular */
779   for (row=n-1; row>=0; row--){
780     i   = rout[row]; /* permuted row */
781     v   = aa + a->diag[i] + 1;
782     vi  = aj + a->diag[i] + 1;
783     nz  = ai[i+1] - a->diag[i] - 1;
784     sum = tmp[row];
785     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
786     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
787   }
788 
789   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
790   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
791   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
792   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
793   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 /* ----------------------------------------------------------- */
798 #undef __FUNCT__
799 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
800 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
801 {
802   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
803   PetscErrorCode    ierr;
804   PetscInt          n = A->rmap->n;
805   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
806   PetscScalar       *x;
807   const PetscScalar *b;
808   const MatScalar   *aa = a->a;
809 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
810   PetscInt          adiag_i,i,nz,ai_i;
811   const MatScalar   *v;
812   PetscScalar       sum;
813 #endif
814 
815   PetscFunctionBegin;
816   if (!n) PetscFunctionReturn(0);
817 
818   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
822   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
823 #else
824   /* forward solve the lower triangular */
825   x[0] = b[0];
826   for (i=1; i<n; i++) {
827     ai_i = ai[i];
828     v    = aa + ai_i;
829     vi   = aj + ai_i;
830     nz   = adiag[i] - ai_i;
831     sum  = b[i];
832     while (nz--) sum -= *v++ * x[*vi++];
833     x[i] = sum;
834   }
835 
836   /* backward solve the upper triangular */
837   for (i=n-1; i>=0; i--){
838     adiag_i = adiag[i];
839     v       = aa + adiag_i + 1;
840     vi      = aj + adiag_i + 1;
841     nz      = ai[i+1] - adiag_i - 1;
842     sum     = x[i];
843     while (nz--) sum -= *v++ * x[*vi++];
844     x[i]    = sum*aa[adiag_i];
845   }
846 #endif
847   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
848   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
849   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
855 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
856 {
857   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
858   IS              iscol = a->col,isrow = a->row;
859   PetscErrorCode  ierr;
860   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
861   PetscInt        nz;
862   const PetscInt  *rout,*cout,*r,*c;
863   PetscScalar     *x,*b,*tmp,sum;
864   const MatScalar *aa = a->a,*v;
865 
866   PetscFunctionBegin;
867   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
868 
869   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
870   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
871   tmp  = a->solve_work;
872 
873   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
874   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
875 
876   /* forward solve the lower triangular */
877   tmp[0] = b[*r++];
878   for (i=1; i<n; i++) {
879     v   = aa + ai[i] ;
880     vi  = aj + ai[i] ;
881     nz  = a->diag[i] - ai[i];
882     sum = b[*r++];
883     while (nz--) sum -= *v++ * tmp[*vi++ ];
884     tmp[i] = sum;
885   }
886 
887   /* backward solve the upper triangular */
888   for (i=n-1; i>=0; i--){
889     v   = aa + a->diag[i] + 1;
890     vi  = aj + a->diag[i] + 1;
891     nz  = ai[i+1] - a->diag[i] - 1;
892     sum = tmp[i];
893     while (nz--) sum -= *v++ * tmp[*vi++ ];
894     tmp[i] = sum*aa[a->diag[i]];
895     x[*c--] += tmp[i];
896   }
897 
898   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
899   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
900   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
901   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
902   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
903 
904   PetscFunctionReturn(0);
905 }
906 /* -------------------------------------------------------------------*/
907 #undef __FUNCT__
908 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
909 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
910 {
911   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
912   IS              iscol = a->col,isrow = a->row;
913   PetscErrorCode  ierr;
914   const PetscInt  *rout,*cout,*r,*c;
915   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
916   PetscInt        nz,*diag = a->diag;
917   PetscScalar     *x,*b,*tmp,s1;
918   const MatScalar *aa = a->a,*v;
919 
920   PetscFunctionBegin;
921   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
922   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
923   tmp  = a->solve_work;
924 
925   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
926   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
927 
928   /* copy the b into temp work space according to permutation */
929   for (i=0; i<n; i++) tmp[i] = b[c[i]];
930 
931   /* forward solve the U^T */
932   for (i=0; i<n; i++) {
933     v   = aa + diag[i] ;
934     vi  = aj + diag[i] + 1;
935     nz  = ai[i+1] - diag[i] - 1;
936     s1  = tmp[i];
937     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
938     while (nz--) {
939       tmp[*vi++ ] -= (*v++)*s1;
940     }
941     tmp[i] = s1;
942   }
943 
944   /* backward solve the L^T */
945   for (i=n-1; i>=0; i--){
946     v   = aa + diag[i] - 1 ;
947     vi  = aj + diag[i] - 1 ;
948     nz  = diag[i] - ai[i];
949     s1  = tmp[i];
950     while (nz--) {
951       tmp[*vi-- ] -= (*v--)*s1;
952     }
953   }
954 
955   /* copy tmp into x according to permutation */
956   for (i=0; i<n; i++) x[r[i]] = tmp[i];
957 
958   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
959   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
960   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
961   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
962 
963   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
969 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
970 {
971   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
972   IS              iscol = a->col,isrow = a->row;
973   PetscErrorCode  ierr;
974   const PetscInt  *r,*c,*rout,*cout;
975   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
976   PetscInt        nz,*diag = a->diag;
977   PetscScalar     *x,*b,*tmp;
978   const MatScalar *aa = a->a,*v;
979 
980   PetscFunctionBegin;
981   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
982 
983   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
984   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
985   tmp = a->solve_work;
986 
987   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
988   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
989 
990   /* copy the b into temp work space according to permutation */
991   for (i=0; i<n; i++) tmp[i] = b[c[i]];
992 
993   /* forward solve the U^T */
994   for (i=0; i<n; i++) {
995     v   = aa + diag[i] ;
996     vi  = aj + diag[i] + 1;
997     nz  = ai[i+1] - diag[i] - 1;
998     tmp[i] *= *v++;
999     while (nz--) {
1000       tmp[*vi++ ] -= (*v++)*tmp[i];
1001     }
1002   }
1003 
1004   /* backward solve the L^T */
1005   for (i=n-1; i>=0; i--){
1006     v   = aa + diag[i] - 1 ;
1007     vi  = aj + diag[i] - 1 ;
1008     nz  = diag[i] - ai[i];
1009     while (nz--) {
1010       tmp[*vi-- ] -= (*v--)*tmp[i];
1011     }
1012   }
1013 
1014   /* copy tmp into x according to permutation */
1015   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1016 
1017   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1018   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1019   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1020   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1021 
1022   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 /* ----------------------------------------------------------------*/
1026 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1027 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1031 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1032 {
1033   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1034   IS                 isicol;
1035   PetscErrorCode     ierr;
1036   const PetscInt     *r,*ic;
1037   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1038   PetscInt           *bi,*cols,nnz,*cols_lvl;
1039   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1040   PetscInt           i,levels,diagonal_fill;
1041   PetscTruth         col_identity,row_identity;
1042   PetscReal          f;
1043   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1044   PetscBT            lnkbt;
1045   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1046   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1047   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1048   PetscTruth         missing;
1049 
1050   PetscFunctionBegin;
1051   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1052   f             = info->fill;
1053   levels        = (PetscInt)info->levels;
1054   diagonal_fill = (PetscInt)info->diagonal_fill;
1055   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1056 
1057   /* special case that simply copies fill pattern */
1058   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1059   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1060   if (!levels && row_identity && col_identity) {
1061     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1062     fact->factor = MAT_FACTOR_ILU;
1063     (fact)->info.factor_mallocs    = 0;
1064     (fact)->info.fill_ratio_given  = info->fill;
1065     (fact)->info.fill_ratio_needed = 1.0;
1066     b               = (Mat_SeqAIJ*)(fact)->data;
1067     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1068     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1069     b->row              = isrow;
1070     b->col              = iscol;
1071     b->icol             = isicol;
1072     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1073     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1074     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1075     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1076     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1077     PetscFunctionReturn(0);
1078   }
1079 
1080   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1081   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1082 
1083   /* get new row pointers */
1084   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1085   bi[0] = 0;
1086   /* bdiag is location of diagonal in factor */
1087   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1088   bdiag[0]  = 0;
1089 
1090   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1091   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1092 
1093   /* create a linked list for storing column indices of the active row */
1094   nlnk = n + 1;
1095   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1096 
1097   /* initial FreeSpace size is f*(ai[n]+1) */
1098   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1099   current_space = free_space;
1100   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1101   current_space_lvl = free_space_lvl;
1102 
1103   for (i=0; i<n; i++) {
1104     nzi = 0;
1105     /* copy current row into linked list */
1106     nnz  = ai[r[i]+1] - ai[r[i]];
1107     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1108     cols = aj + ai[r[i]];
1109     lnk[i] = -1; /* marker to indicate if diagonal exists */
1110     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1111     nzi += nlnk;
1112 
1113     /* make sure diagonal entry is included */
1114     if (diagonal_fill && lnk[i] == -1) {
1115       fm = n;
1116       while (lnk[fm] < i) fm = lnk[fm];
1117       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1118       lnk[fm]    = i;
1119       lnk_lvl[i] = 0;
1120       nzi++; dcount++;
1121     }
1122 
1123     /* add pivot rows into the active row */
1124     nzbd = 0;
1125     prow = lnk[n];
1126     while (prow < i) {
1127       nnz      = bdiag[prow];
1128       cols     = bj_ptr[prow] + nnz + 1;
1129       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1130       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1131       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1132       nzi += nlnk;
1133       prow = lnk[prow];
1134       nzbd++;
1135     }
1136     bdiag[i] = nzbd;
1137     bi[i+1]  = bi[i] + nzi;
1138 
1139     /* if free space is not available, make more free space */
1140     if (current_space->local_remaining<nzi) {
1141       nnz = nzi*(n - i); /* estimated and max additional space needed */
1142       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1143       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1144       reallocs++;
1145     }
1146 
1147     /* copy data into free_space and free_space_lvl, then initialize lnk */
1148     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1149     bj_ptr[i]    = current_space->array;
1150     bjlvl_ptr[i] = current_space_lvl->array;
1151 
1152     /* make sure the active row i has diagonal entry */
1153     if (*(bj_ptr[i]+bdiag[i]) != i) {
1154       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1155     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1156     }
1157 
1158     current_space->array           += nzi;
1159     current_space->local_used      += nzi;
1160     current_space->local_remaining -= nzi;
1161     current_space_lvl->array           += nzi;
1162     current_space_lvl->local_used      += nzi;
1163     current_space_lvl->local_remaining -= nzi;
1164   }
1165 
1166   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1167   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1168 
1169   /* destroy list of free space and other temporary arrays */
1170   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1171   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1172   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1173   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1174   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1175 
1176 #if defined(PETSC_USE_INFO)
1177   {
1178     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1179     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1180     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1181     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1182     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1183     if (diagonal_fill) {
1184       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1185     }
1186   }
1187 #endif
1188 
1189   /* put together the new matrix */
1190   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1191   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1192   b = (Mat_SeqAIJ*)(fact)->data;
1193   b->free_a       = PETSC_TRUE;
1194   b->free_ij      = PETSC_TRUE;
1195   b->singlemalloc = PETSC_FALSE;
1196   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1197   b->j          = bj;
1198   b->i          = bi;
1199   for (i=0; i<n; i++) bdiag[i] += bi[i];
1200   b->diag       = bdiag;
1201   b->ilen       = 0;
1202   b->imax       = 0;
1203   b->row        = isrow;
1204   b->col        = iscol;
1205   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1206   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1207   b->icol       = isicol;
1208   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1209   /* In b structure:  Free imax, ilen, old a, old j.
1210      Allocate bdiag, solve_work, new a, new j */
1211   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1212   b->maxnz             = b->nz = bi[n] ;
1213   (fact)->info.factor_mallocs    = reallocs;
1214   (fact)->info.fill_ratio_given  = f;
1215   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1216   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1217   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1224 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1225 {
1226   Mat            C = B;
1227   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1228   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1229   IS             ip=b->row,iip = b->icol;
1230   PetscErrorCode ierr;
1231   const PetscInt *rip,*riip;
1232   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1233   PetscInt       *ai=a->i,*aj=a->j;
1234   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1235   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1236   PetscReal      zeropivot,rs,shiftnz;
1237   PetscReal      shiftpd;
1238   ChShift_Ctx    sctx;
1239   PetscInt       newshift;
1240   PetscTruth     perm_identity;
1241 
1242   PetscFunctionBegin;
1243 
1244   shiftnz   = info->shiftnz;
1245   shiftpd   = info->shiftpd;
1246   zeropivot = info->zeropivot;
1247 
1248   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1249   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1250 
1251   /* initialization */
1252   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1253   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1254   jl   = il + mbs;
1255   rtmp = (MatScalar*)(jl + mbs);
1256 
1257   sctx.shift_amount = 0;
1258   sctx.nshift       = 0;
1259   do {
1260     sctx.chshift = PETSC_FALSE;
1261     for (i=0; i<mbs; i++) {
1262       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1263     }
1264 
1265     for (k = 0; k<mbs; k++){
1266       bval = ba + bi[k];
1267       /* initialize k-th row by the perm[k]-th row of A */
1268       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1269       for (j = jmin; j < jmax; j++){
1270         col = riip[aj[j]];
1271         if (col >= k){ /* only take upper triangular entry */
1272           rtmp[col] = aa[j];
1273           *bval++  = 0.0; /* for in-place factorization */
1274         }
1275       }
1276       /* shift the diagonal of the matrix */
1277       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1278 
1279       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1280       dk = rtmp[k];
1281       i = jl[k]; /* first row to be added to k_th row  */
1282 
1283       while (i < k){
1284         nexti = jl[i]; /* next row to be added to k_th row */
1285 
1286         /* compute multiplier, update diag(k) and U(i,k) */
1287         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1288         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1289         dk += uikdi*ba[ili];
1290         ba[ili] = uikdi; /* -U(i,k) */
1291 
1292         /* add multiple of row i to k-th row */
1293         jmin = ili + 1; jmax = bi[i+1];
1294         if (jmin < jmax){
1295           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1296           /* update il and jl for row i */
1297           il[i] = jmin;
1298           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1299         }
1300         i = nexti;
1301       }
1302 
1303       /* shift the diagonals when zero pivot is detected */
1304       /* compute rs=sum of abs(off-diagonal) */
1305       rs   = 0.0;
1306       jmin = bi[k]+1;
1307       nz   = bi[k+1] - jmin;
1308       bcol = bj + jmin;
1309       while (nz--){
1310         rs += PetscAbsScalar(rtmp[*bcol]);
1311         bcol++;
1312       }
1313 
1314       sctx.rs = rs;
1315       sctx.pv = dk;
1316       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1317 
1318       if (newshift == 1) {
1319         if (!sctx.shift_amount) {
1320           sctx.shift_amount = 1e-5;
1321         }
1322         break;
1323       }
1324 
1325       /* copy data into U(k,:) */
1326       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1327       jmin = bi[k]+1; jmax = bi[k+1];
1328       if (jmin < jmax) {
1329         for (j=jmin; j<jmax; j++){
1330           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1331         }
1332         /* add the k-th row into il and jl */
1333         il[k] = jmin;
1334         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1335       }
1336     }
1337   } while (sctx.chshift);
1338   ierr = PetscFree(il);CHKERRQ(ierr);
1339 
1340   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1341   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1342 
1343   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1344   if (perm_identity){
1345     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1346     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1347     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1348     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1349   } else {
1350     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1351     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1352     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1353     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1354   }
1355 
1356   C->assembled    = PETSC_TRUE;
1357   C->preallocated = PETSC_TRUE;
1358   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1359   if (sctx.nshift){
1360     if (shiftnz) {
1361       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1362     } else if (shiftpd) {
1363       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1364     }
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1371 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1372 {
1373   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1374   Mat_SeqSBAIJ       *b;
1375   PetscErrorCode     ierr;
1376   PetscTruth         perm_identity,missing;
1377   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1378   const PetscInt     *rip,*riip;
1379   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1380   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1381   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1382   PetscReal          fill=info->fill,levels=info->levels;
1383   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1384   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1385   PetscBT            lnkbt;
1386   IS                 iperm;
1387 
1388   PetscFunctionBegin;
1389   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1390   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1391   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1392   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1393   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1394 
1395   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1396   ui[0] = 0;
1397 
1398   /* ICC(0) without matrix ordering: simply copies fill pattern */
1399   if (!levels && perm_identity) {
1400 
1401     for (i=0; i<am; i++) {
1402       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1403     }
1404     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1405     cols = uj;
1406     for (i=0; i<am; i++) {
1407       aj    = a->j + a->diag[i];
1408       ncols = ui[i+1] - ui[i];
1409       for (j=0; j<ncols; j++) *cols++ = *aj++;
1410     }
1411   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1412     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1413     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1414 
1415     /* initialization */
1416     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1417 
1418     /* jl: linked list for storing indices of the pivot rows
1419        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1420     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1421     il         = jl + am;
1422     uj_ptr     = (PetscInt**)(il + am);
1423     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1424     for (i=0; i<am; i++){
1425       jl[i] = am; il[i] = 0;
1426     }
1427 
1428     /* create and initialize a linked list for storing column indices of the active row k */
1429     nlnk = am + 1;
1430     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1431 
1432     /* initial FreeSpace size is fill*(ai[am]+1) */
1433     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1434     current_space = free_space;
1435     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1436     current_space_lvl = free_space_lvl;
1437 
1438     for (k=0; k<am; k++){  /* for each active row k */
1439       /* initialize lnk by the column indices of row rip[k] of A */
1440       nzk   = 0;
1441       ncols = ai[rip[k]+1] - ai[rip[k]];
1442       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1443       ncols_upper = 0;
1444       for (j=0; j<ncols; j++){
1445         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1446         if (riip[i] >= k){ /* only take upper triangular entry */
1447           ajtmp[ncols_upper] = i;
1448           ncols_upper++;
1449         }
1450       }
1451       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1452       nzk += nlnk;
1453 
1454       /* update lnk by computing fill-in for each pivot row to be merged in */
1455       prow = jl[k]; /* 1st pivot row */
1456 
1457       while (prow < k){
1458         nextprow = jl[prow];
1459 
1460         /* merge prow into k-th row */
1461         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1462         jmax = ui[prow+1];
1463         ncols = jmax-jmin;
1464         i     = jmin - ui[prow];
1465         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1466         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1467         j     = *(uj - 1);
1468         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1469         nzk += nlnk;
1470 
1471         /* update il and jl for prow */
1472         if (jmin < jmax){
1473           il[prow] = jmin;
1474           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1475         }
1476         prow = nextprow;
1477       }
1478 
1479       /* if free space is not available, make more free space */
1480       if (current_space->local_remaining<nzk) {
1481         i = am - k + 1; /* num of unfactored rows */
1482         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1483         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1484         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1485         reallocs++;
1486       }
1487 
1488       /* copy data into free_space and free_space_lvl, then initialize lnk */
1489       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1490       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1491 
1492       /* add the k-th row into il and jl */
1493       if (nzk > 1){
1494         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1495         jl[k] = jl[i]; jl[i] = k;
1496         il[k] = ui[k] + 1;
1497       }
1498       uj_ptr[k]     = current_space->array;
1499       uj_lvl_ptr[k] = current_space_lvl->array;
1500 
1501       current_space->array           += nzk;
1502       current_space->local_used      += nzk;
1503       current_space->local_remaining -= nzk;
1504 
1505       current_space_lvl->array           += nzk;
1506       current_space_lvl->local_used      += nzk;
1507       current_space_lvl->local_remaining -= nzk;
1508 
1509       ui[k+1] = ui[k] + nzk;
1510     }
1511 
1512 #if defined(PETSC_USE_INFO)
1513     if (ai[am] != 0) {
1514       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1515       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1516       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1517       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1518     } else {
1519       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1520     }
1521 #endif
1522 
1523     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1524     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1525     ierr = PetscFree(jl);CHKERRQ(ierr);
1526     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1527 
1528     /* destroy list of free space and other temporary array(s) */
1529     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1530     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1531     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1532     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1533 
1534   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1535 
1536   /* put together the new matrix in MATSEQSBAIJ format */
1537 
1538   b    = (Mat_SeqSBAIJ*)(fact)->data;
1539   b->singlemalloc = PETSC_FALSE;
1540   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1541   b->j    = uj;
1542   b->i    = ui;
1543   b->diag = 0;
1544   b->ilen = 0;
1545   b->imax = 0;
1546   b->row  = perm;
1547   b->col  = perm;
1548   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1549   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1550   b->icol = iperm;
1551   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1552   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1553   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1554   b->maxnz   = b->nz = ui[am];
1555   b->free_a  = PETSC_TRUE;
1556   b->free_ij = PETSC_TRUE;
1557 
1558   (fact)->info.factor_mallocs    = reallocs;
1559   (fact)->info.fill_ratio_given  = fill;
1560   if (ai[am] != 0) {
1561     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1562   } else {
1563     (fact)->info.fill_ratio_needed = 0.0;
1564   }
1565   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1571 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1572 {
1573   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1574   Mat_SeqSBAIJ       *b;
1575   PetscErrorCode     ierr;
1576   PetscTruth         perm_identity;
1577   PetscReal          fill = info->fill;
1578   const PetscInt     *rip,*riip;
1579   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1580   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1581   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1582   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1583   PetscBT            lnkbt;
1584   IS                 iperm;
1585 
1586   PetscFunctionBegin;
1587   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1588   /* check whether perm is the identity mapping */
1589   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1590   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1591   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1592   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1593 
1594   /* initialization */
1595   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1596   ui[0] = 0;
1597 
1598   /* jl: linked list for storing indices of the pivot rows
1599      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1600   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1601   il     = jl + am;
1602   cols   = il + am;
1603   ui_ptr = (PetscInt**)(cols + am);
1604   for (i=0; i<am; i++){
1605     jl[i] = am; il[i] = 0;
1606   }
1607 
1608   /* create and initialize a linked list for storing column indices of the active row k */
1609   nlnk = am + 1;
1610   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1611 
1612   /* initial FreeSpace size is fill*(ai[am]+1) */
1613   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1614   current_space = free_space;
1615 
1616   for (k=0; k<am; k++){  /* for each active row k */
1617     /* initialize lnk by the column indices of row rip[k] of A */
1618     nzk   = 0;
1619     ncols = ai[rip[k]+1] - ai[rip[k]];
1620     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1621     ncols_upper = 0;
1622     for (j=0; j<ncols; j++){
1623       i = riip[*(aj + ai[rip[k]] + j)];
1624       if (i >= k){ /* only take upper triangular entry */
1625         cols[ncols_upper] = i;
1626         ncols_upper++;
1627       }
1628     }
1629     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1630     nzk += nlnk;
1631 
1632     /* update lnk by computing fill-in for each pivot row to be merged in */
1633     prow = jl[k]; /* 1st pivot row */
1634 
1635     while (prow < k){
1636       nextprow = jl[prow];
1637       /* merge prow into k-th row */
1638       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1639       jmax = ui[prow+1];
1640       ncols = jmax-jmin;
1641       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1642       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1643       nzk += nlnk;
1644 
1645       /* update il and jl for prow */
1646       if (jmin < jmax){
1647         il[prow] = jmin;
1648         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1649       }
1650       prow = nextprow;
1651     }
1652 
1653     /* if free space is not available, make more free space */
1654     if (current_space->local_remaining<nzk) {
1655       i = am - k + 1; /* num of unfactored rows */
1656       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1657       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1658       reallocs++;
1659     }
1660 
1661     /* copy data into free space, then initialize lnk */
1662     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1663 
1664     /* add the k-th row into il and jl */
1665     if (nzk-1 > 0){
1666       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1667       jl[k] = jl[i]; jl[i] = k;
1668       il[k] = ui[k] + 1;
1669     }
1670     ui_ptr[k] = current_space->array;
1671     current_space->array           += nzk;
1672     current_space->local_used      += nzk;
1673     current_space->local_remaining -= nzk;
1674 
1675     ui[k+1] = ui[k] + nzk;
1676   }
1677 
1678 #if defined(PETSC_USE_INFO)
1679   if (ai[am] != 0) {
1680     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1681     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1682     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1683     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1684   } else {
1685      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1686   }
1687 #endif
1688 
1689   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1690   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1691   ierr = PetscFree(jl);CHKERRQ(ierr);
1692 
1693   /* destroy list of free space and other temporary array(s) */
1694   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1695   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1696   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1697 
1698   /* put together the new matrix in MATSEQSBAIJ format */
1699 
1700   b = (Mat_SeqSBAIJ*)(fact)->data;
1701   b->singlemalloc = PETSC_FALSE;
1702   b->free_a       = PETSC_TRUE;
1703   b->free_ij      = PETSC_TRUE;
1704   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1705   b->j    = uj;
1706   b->i    = ui;
1707   b->diag = 0;
1708   b->ilen = 0;
1709   b->imax = 0;
1710   b->row  = perm;
1711   b->col  = perm;
1712   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1713   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1714   b->icol = iperm;
1715   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1716   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1717   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1718   b->maxnz = b->nz = ui[am];
1719 
1720   (fact)->info.factor_mallocs    = reallocs;
1721   (fact)->info.fill_ratio_given  = fill;
1722   if (ai[am] != 0) {
1723     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1724   } else {
1725     (fact)->info.fill_ratio_needed = 0.0;
1726   }
1727   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt"
1733 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx)
1734 {
1735   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1736   PetscErrorCode    ierr;
1737   PetscInt          n = A->rmap->n;
1738   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
1739   PetscScalar       *x,sum;
1740   const PetscScalar *b;
1741   const MatScalar   *aa = a->a,*v;
1742   PetscInt          i,nz;
1743 
1744   PetscFunctionBegin;
1745   if (!n) PetscFunctionReturn(0);
1746 
1747   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1748   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1749 
1750   /* forward solve the lower triangular */
1751   x[0] = b[0];
1752   v    = aa;
1753   vi   = aj;
1754   for (i=1; i<n; i++) {
1755     nz  = ai[i+1] - ai[i];
1756     sum = b[i];
1757     while (nz--) sum -= *v++ * x[*vi++];
1758     x[i] = sum;
1759   }
1760 
1761   /* backward solve the upper triangular */
1762   v   = aa + adiag[n] + 1;
1763   vi  = aj + adiag[n] + 1;
1764   for (i=n-1; i>=0; i--){
1765     nz  = adiag[i] - adiag[i+1] - 1;
1766     sum = x[i];
1767     while (nz--) sum -= *v++ * x[*vi++];
1768     x[i] = sum*aa[adiag[i]];
1769     v++; vi++;
1770   }
1771 
1772   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1773   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1774   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatSolve_SeqAIJ_iludt"
1780 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx)
1781 {
1782   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1783   IS                iscol = a->col,isrow = a->row;
1784   PetscErrorCode    ierr;
1785   PetscInt          i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag;
1786   PetscInt          nz;
1787   const PetscInt    *rout,*cout,*r,*c;
1788   PetscScalar       *x,*tmp,*tmps;
1789   const PetscScalar *b;
1790   const MatScalar   *aa = a->a,*v;
1791 
1792   PetscFunctionBegin;
1793   if (!n) PetscFunctionReturn(0);
1794 
1795   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1796   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1797   tmp  = a->solve_work;
1798 
1799   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1800   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1801 
1802   /* forward solve the lower triangular */
1803   tmp[0] = b[*r++];
1804   tmps   = tmp;
1805   v      = aa;
1806   vi     = aj;
1807   for (i=1; i<n; i++) {
1808     nz  = ai[i+1] - ai[i];
1809     tmp[i] = b[*r++];
1810     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1811     v += nz; vi += nz;
1812   }
1813 
1814   /* backward solve the upper triangular */
1815   v   = aa + adiag[n] + 1;
1816   vi  = aj + adiag[n] + 1;
1817   for (i=n-1; i>=0; i--){
1818     nz  = adiag[i] - adiag[i+1] - 1;
1819     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1820     x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]];
1821     v += nz+1; vi += nz+1;
1822   }
1823 
1824   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1825   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1826   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1827   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1828   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
1834 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
1835 {
1836   Mat                B = *fact;
1837   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
1838   IS                 isicol;
1839   PetscErrorCode     ierr;
1840   const PetscInt     *r,*ic;
1841   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
1842   PetscInt           *bi,*bj,*bdiag;
1843   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au;
1844   PetscInt           nlnk,*lnk;
1845   PetscBT            lnkbt;
1846   PetscTruth         row_identity,icol_identity,both_identity;
1847   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp;
1848   const PetscInt     *ics;
1849   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
1850   PetscReal          dt=info->dt,shift=info->shiftinblocks;
1851   PetscInt           nnz_max;
1852   PetscTruth         missing;
1853 
1854   PetscFunctionBegin;
1855   /* ------- symbolic factorization, can be reused ---------*/
1856   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1857   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1858   adiag=a->diag;
1859 
1860   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1861 
1862   /* bdiag is location of diagonal in factor */
1863   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1864 
1865   /* allocate row pointers bi */
1866   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1867 
1868   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
1869   dtcount = (PetscInt)info->dtcount;
1870   if (dtcount > n/2) dtcount = n/2;
1871   nnz_max  = ai[n]+2*n*dtcount+2;
1872   ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1873   ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr);
1874 
1875   /* put together the new matrix */
1876   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1877   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
1878   b    = (Mat_SeqAIJ*)(B)->data;
1879   b->free_a       = PETSC_TRUE;
1880   b->free_ij      = PETSC_TRUE;
1881   b->singlemalloc = PETSC_FALSE;
1882   b->a          = ba;
1883   b->j          = bj;
1884   b->i          = bi;
1885   b->diag       = bdiag;
1886   b->ilen       = 0;
1887   b->imax       = 0;
1888   b->row        = isrow;
1889   b->col        = iscol;
1890   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1891   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1892   b->icol       = isicol;
1893   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1894 
1895   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1896   b->maxnz = nnz_max;
1897 
1898   (B)->factor                = MAT_FACTOR_ILUDT;
1899   (B)->info.factor_mallocs   = 0;
1900   (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
1901   CHKMEMQ;
1902   /* ------- end of symbolic factorization ---------*/
1903 
1904   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1905   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1906   ics  = ic;
1907 
1908   /* linked list for storing column indices of the active row */
1909   nlnk = n + 1;
1910   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1911 
1912   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
1913   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
1914   jtmp = im + n;
1915   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
1916   ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1917   vtmp = rtmp + n;
1918 
1919   bi[0]    = 0;
1920   bdiag[0] = nnz_max-1; /* location of diagonal in factor B */
1921   for (i=0; i<n; i++) {
1922     /* copy initial fill into linked list */
1923     nzi = 0; /* nonzeros for active row i */
1924     nzi = ai[r[i]+1] - ai[r[i]];
1925     if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1926     nzi_al = adiag[r[i]] - ai[r[i]];
1927     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
1928     ajtmp = aj + ai[r[i]];
1929     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1930 
1931     /* load in initial (unfactored row) */
1932     ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1933     aatmp = a->a + ai[r[i]];
1934     for (j=0; j<nzi; j++) {
1935       rtmp[ics[*ajtmp++]] = *aatmp++;
1936     }
1937 
1938     /* add pivot rows into linked list */
1939     row = lnk[n];
1940     while (row < i) {
1941       nzi_bl = bi[row+1] - bi[row] + 1;
1942       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
1943       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
1944       nzi  += nlnk;
1945       row   = lnk[row];
1946     }
1947 
1948     /* copy data from lnk into jtmp, then initialize lnk */
1949     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
1950 
1951     /* numerical factorization */
1952     bjtmp = jtmp;
1953     row   = *bjtmp++; /* 1st pivot row */
1954     while  (row < i) {
1955       pc         = rtmp + row;
1956       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
1957       multiplier = (*pc) * (*pv);
1958       *pc        = multiplier;
1959       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
1960         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
1961         pv         = ba + bdiag[row+1] + 1;
1962         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
1963         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
1964         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
1965         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
1966       }
1967       row = *bjtmp++;
1968     }
1969 
1970     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
1971     nzi_bl = 0; j = 0;
1972     while (jtmp[j] < i){ /* Note: jtmp is sorted */
1973       vtmp[j] = rtmp[jtmp[j]];
1974       nzi_bl++; j++;
1975     }
1976     nzi_bu = nzi - nzi_bl -1;
1977     while (j < nzi){
1978       vtmp[j] = rtmp[jtmp[j]];
1979       j++;
1980     }
1981 
1982     bjtmp = bj + bi[i];
1983     batmp = ba + bi[i];
1984     /* apply level dropping rule to L part */
1985     ncut = nzi_al + dtcount;
1986     if (ncut < nzi_bl){
1987       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
1988       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
1989     } else {
1990       ncut = nzi_bl;
1991     }
1992     for (j=0; j<ncut; j++){
1993       bjtmp[j] = jtmp[j];
1994       batmp[j] = vtmp[j];
1995       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
1996     }
1997     bi[i+1] = bi[i] + ncut;
1998     nzi = ncut + 1;
1999 
2000     /* apply level dropping rule to U part */
2001     ncut = nzi_au + dtcount;
2002     if (ncut < nzi_bu){
2003       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
2004       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
2005     } else {
2006       ncut = nzi_bu;
2007     }
2008     nzi += ncut;
2009 
2010     /* mark bdiagonal */
2011     bdiag[i+1]   = bdiag[i] - (ncut + 1);
2012     bjtmp = bj + bdiag[i];
2013     batmp = ba + bdiag[i];
2014     *bjtmp = i;
2015     *batmp = rtmp[i];
2016     if (*batmp == 0.0) *batmp = dt+shift;
2017     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
2018     /* printf(" (%d,%g),",*bjtmp,*batmp); */
2019 
2020     bjtmp = bj + bdiag[i+1]+1;
2021     batmp = ba + bdiag[i+1]+1;
2022     for (k=0; k<ncut; k++){
2023       bjtmp[k] = jtmp[nzi_bl+1+k];
2024       batmp[k] = vtmp[nzi_bl+1+k];
2025       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
2026     }
2027     /* printf("\n"); */
2028 
2029     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
2030     /*
2031     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
2032     printf(" ----------------------------\n");
2033     */
2034   } /* for (i=0; i<n; i++) */
2035   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
2036   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
2037 
2038   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2039   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2040 
2041   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2042   ierr = PetscFree(im);CHKERRQ(ierr);
2043   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2044 
2045   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
2046   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
2047 
2048   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2049   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
2050   both_identity = (PetscTruth) (row_identity && icol_identity);
2051   if (row_identity && icol_identity) {
2052     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt;
2053   } else {
2054     B->ops->solve = MatSolve_SeqAIJ_iludt;
2055   }
2056 
2057   B->ops->lufactorsymbolic  = MatILUDTFactorSymbolic_SeqAIJ;
2058   B->ops->lufactornumeric   = MatILUDTFactorNumeric_SeqAIJ;
2059   B->ops->solveadd          = 0;
2060   B->ops->solvetranspose    = 0;
2061   B->ops->solvetransposeadd = 0;
2062   B->ops->matsolve          = 0;
2063   B->assembled              = PETSC_TRUE;
2064   B->preallocated           = PETSC_TRUE;
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /* a wraper of MatILUDTFactor_SeqAIJ() */
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
2071 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
2072 {
2073   PetscErrorCode     ierr;
2074 
2075   PetscFunctionBegin;
2076   /* printf("MatILUDTFactorSymbolic_SeqAIJ is called...\n"); */
2077   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
2078 
2079   fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ;
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 /*
2084    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
2085    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
2086 */
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
2089 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
2090 {
2091   Mat            C=fact;
2092   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
2093   IS             isrow = b->row,isicol = b->icol;
2094   PetscErrorCode ierr;
2095   const PetscInt *r,*ic,*ics;
2096   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
2097   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
2098   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
2099   PetscReal      dt=info->dt,shift=info->shiftinblocks;
2100   PetscTruth     row_identity, col_identity;
2101 
2102   PetscFunctionBegin;
2103   /* printf("MatILUDTFactorNumeric_SeqAIJ is called...\n"); */
2104   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2105   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
2106   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2107   ics  = ic;
2108 
2109   for (i=0; i<n; i++){
2110     /* initialize rtmp array */
2111     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
2112     bjtmp = bj + bi[i];
2113     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
2114     rtmp[i] = 0.0;
2115     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
2116     bjtmp = bj + bdiag[i+1] + 1;
2117     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
2118 
2119     /* load in initial unfactored row of A */
2120     /* printf("row %d\n",i); */
2121     nz    = ai[r[i]+1] - ai[r[i]];
2122     ajtmp = aj + ai[r[i]];
2123     v     = aa + ai[r[i]];
2124     for (j=0; j<nz; j++) {
2125       rtmp[ics[*ajtmp++]] = v[j];
2126       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
2127     }
2128     /* printf("\n"); */
2129 
2130     /* numerical factorization */
2131     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
2132     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
2133     k = 0;
2134     while (k < nzl){
2135       row   = *bjtmp++;
2136       /* printf("  prow %d\n",row); */
2137       pc         = rtmp + row;
2138       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
2139       multiplier = (*pc) * (*pv);
2140       *pc        = multiplier;
2141       if (PetscAbsScalar(multiplier) > dt){
2142         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
2143         pv         = b->a + bdiag[row+1] + 1;
2144         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
2145         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
2146         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
2147       }
2148       k++;
2149     }
2150 
2151     /* finished row so stick it into b->a */
2152     /* L-part */
2153     pv = b->a + bi[i] ;
2154     pj = bj + bi[i] ;
2155     nzl = bi[i+1] - bi[i];
2156     for (j=0; j<nzl; j++) {
2157       pv[j] = rtmp[pj[j]];
2158       /* printf(" (%d,%g),",pj[j],pv[j]); */
2159     }
2160 
2161     /* diagonal: invert diagonal entries for simplier triangular solves */
2162     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
2163     b->a[bdiag[i]] = 1.0/rtmp[i];
2164     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
2165 
2166     /* U-part */
2167     pv = b->a + bdiag[i+1] + 1;
2168     pj = bj + bdiag[i+1] + 1;
2169     nzu = bdiag[i] - bdiag[i+1] - 1;
2170     for (j=0; j<nzu; j++) {
2171       pv[j] = rtmp[pj[j]];
2172       /* printf(" (%d,%g),",pj[j],pv[j]); */
2173     }
2174     /* printf("\n"); */
2175   }
2176 
2177   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2178   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2179   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2180 
2181   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2182   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
2183   if (row_identity && col_identity) {
2184     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_iludt;
2185   } else {
2186     C->ops->solve   = MatSolve_SeqAIJ_iludt;
2187   }
2188   C->ops->solveadd           = 0;
2189   C->ops->solvetranspose     = 0;
2190   C->ops->solvetransposeadd  = 0;
2191   C->ops->matsolve           = 0;
2192   C->assembled    = PETSC_TRUE;
2193   C->preallocated = PETSC_TRUE;
2194   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197