xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision fddf0933058fbad754b3e36ebcce44b378b6bc5d)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/seq/aij.h"
4 #include "../src/inline/dot.h"
5 #include "../src/inline/spops.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
24 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
25 {
26   PetscFunctionBegin;
27   *flg = PETSC_TRUE;
28   PetscFunctionReturn(0);
29 }
30 EXTERN_C_END
31 
32 EXTERN_C_BEGIN
33 #undef __FUNCT__
34 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
35 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
36 {
37   PetscInt           n = A->rmap->n;
38   PetscErrorCode     ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
42   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
43   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
44     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
45     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
46     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
47     (*B)->ops->iludtfactor       = MatILUDTFactor_SeqAIJ;
48   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
49     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
50     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
51     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
52     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
53   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
54   (*B)->factor = ftype;
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
61 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
62 {
63   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
64   IS                 isicol;
65   PetscErrorCode     ierr;
66   const PetscInt     *r,*ic;
67   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
68   PetscInt           *bi,*bj,*ajtmp;
69   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
70   PetscReal          f;
71   PetscInt           nlnk,*lnk,k,**bi_ptr;
72   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
73   PetscBT            lnkbt;
74 
75   PetscFunctionBegin;
76   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
77   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
78   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
79   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
80 
81   /* get new row pointers */
82   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
83   bi[0] = 0;
84 
85   /* bdiag is location of diagonal in factor */
86   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
87   bdiag[0] = 0;
88 
89   /* linked list for storing column indices of the active row */
90   nlnk = n + 1;
91   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
92 
93   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
94 
95   /* initial FreeSpace size is f*(ai[n]+1) */
96   f = info->fill;
97   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
98   current_space = free_space;
99 
100   for (i=0; i<n; i++) {
101     /* copy previous fill into linked list */
102     nzi = 0;
103     nnz = ai[r[i]+1] - ai[r[i]];
104     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
105     ajtmp = aj + ai[r[i]];
106     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
107     nzi += nlnk;
108 
109     /* add pivot rows into linked list */
110     row = lnk[n];
111     while (row < i) {
112       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
113       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
114       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
115       nzi += nlnk;
116       row  = lnk[row];
117     }
118     bi[i+1] = bi[i] + nzi;
119     im[i]   = nzi;
120 
121     /* mark bdiag */
122     nzbd = 0;
123     nnz  = nzi;
124     k    = lnk[n];
125     while (nnz-- && k < i){
126       nzbd++;
127       k = lnk[k];
128     }
129     bdiag[i] = bi[i] + nzbd;
130 
131     /* if free space is not available, make more free space */
132     if (current_space->local_remaining<nzi) {
133       nnz = (n - i)*nzi; /* estimated and max additional space needed */
134       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
135       reallocs++;
136     }
137 
138     /* copy data into free space, then initialize lnk */
139     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
140     bi_ptr[i] = current_space->array;
141     current_space->array           += nzi;
142     current_space->local_used      += nzi;
143     current_space->local_remaining -= nzi;
144   }
145 #if defined(PETSC_USE_INFO)
146   if (ai[n] != 0) {
147     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
148     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
149     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
150     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
151     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
152   } else {
153     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
154   }
155 #endif
156 
157   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
158   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
159 
160   /* destroy list of free space and other temporary array(s) */
161   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
162   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
163   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
164   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
165 
166   /* put together the new matrix */
167   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
168   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
169   b    = (Mat_SeqAIJ*)(B)->data;
170   b->free_a       = PETSC_TRUE;
171   b->free_ij      = PETSC_TRUE;
172   b->singlemalloc = PETSC_FALSE;
173   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
174   b->j          = bj;
175   b->i          = bi;
176   b->diag       = bdiag;
177   b->ilen       = 0;
178   b->imax       = 0;
179   b->row        = isrow;
180   b->col        = iscol;
181   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
182   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
183   b->icol       = isicol;
184   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
185 
186   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
187   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
188   b->maxnz = b->nz = bi[n] ;
189 
190   (B)->factor                = MAT_FACTOR_LU;
191   (B)->info.factor_mallocs   = reallocs;
192   (B)->info.fill_ratio_given = f;
193 
194   if (ai[n]) {
195     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
196   } else {
197     (B)->info.fill_ratio_needed = 0.0;
198   }
199   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
200   (B)->ops->solve            = MatSolve_SeqAIJ;
201   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
202   /* switch to inodes if appropriate */
203   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 /*
208     Trouble in factorization, should we dump the original matrix?
209 */
210 #undef __FUNCT__
211 #define __FUNCT__ "MatFactorDumpMatrix"
212 PetscErrorCode MatFactorDumpMatrix(Mat A)
213 {
214   PetscErrorCode ierr;
215   PetscTruth     flg = PETSC_FALSE;
216 
217   PetscFunctionBegin;
218   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
219   if (flg) {
220     PetscViewer viewer;
221     char        filename[PETSC_MAX_PATH_LEN];
222 
223     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
224     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
225     ierr = MatView(A,viewer);CHKERRQ(ierr);
226     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
227   }
228   PetscFunctionReturn(0);
229 }
230 
231 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
232 
233 /* ----------------------------------------------------------- */
234 #undef __FUNCT__
235 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
236 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
237 {
238   Mat            C=B;
239   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
240   IS             isrow = b->row,isicol = b->icol;
241   PetscErrorCode ierr;
242   const PetscInt  *r,*ic,*ics;
243   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
244   PetscInt       *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj;
245   MatScalar      *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a;
246   PetscReal      rs=0.0;
247   LUShift_Ctx    sctx;
248   PetscInt       newshift,*ddiag;
249 
250   PetscFunctionBegin;
251   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
252   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
253   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
254   ics  = ic;
255 
256   sctx.shift_top      = 0;
257   sctx.nshift_max     = 0;
258   sctx.shift_lo       = 0;
259   sctx.shift_hi       = 0;
260   sctx.shift_fraction = 0;
261 
262   /* if both shift schemes are chosen by user, only use info->shiftpd */
263   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
264     ddiag          = a->diag;
265     sctx.shift_top = info->zeropivot;
266     for (i=0; i<n; i++) {
267       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
268       d  = (aa)[ddiag[i]];
269       rs = -PetscAbsScalar(d) - PetscRealPart(d);
270       v  = aa+ai[i];
271       nz = ai[i+1] - ai[i];
272       for (j=0; j<nz; j++)
273 	rs += PetscAbsScalar(v[j]);
274       if (rs>sctx.shift_top) sctx.shift_top = rs;
275     }
276     sctx.shift_top   *= 1.1;
277     sctx.nshift_max   = 5;
278     sctx.shift_lo     = 0.;
279     sctx.shift_hi     = 1.;
280   }
281 
282   sctx.shift_amount = 0.0;
283   sctx.nshift       = 0;
284   do {
285     sctx.lushift = PETSC_FALSE;
286     for (i=0; i<n; i++){
287       nz    = bi[i+1] - bi[i];
288       bjtmp = bj + bi[i];
289       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
290 
291       /* load in initial (unfactored row) */
292       nz    = ai[r[i]+1] - ai[r[i]];
293       ajtmp = aj + ai[r[i]];
294       v     = aa + ai[r[i]];
295       for (j=0; j<nz; j++) {
296         rtmp[ics[ajtmp[j]]] = v[j];
297       }
298       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
299 
300       row = *bjtmp++;
301       while  (row < i) {
302         pc = rtmp + row;
303         if (*pc != 0.0) {
304           pv         = b->a + diag_offset[row];
305           pj         = b->j + diag_offset[row] + 1;
306           multiplier = *pc / *pv++;
307           *pc        = multiplier;
308           nz         = bi[row+1] - diag_offset[row] - 1;
309           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
310           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
311         }
312         row = *bjtmp++;
313       }
314       /* finished row so stick it into b->a */
315       pv   = b->a + bi[i] ;
316       pj   = b->j + bi[i] ;
317       nz   = bi[i+1] - bi[i];
318       diag = diag_offset[i] - bi[i];
319       rs   = -PetscAbsScalar(pv[diag]);
320       for (j=0; j<nz; j++) {
321         pv[j] = rtmp[pj[j]];
322         rs   += PetscAbsScalar(pv[j]);
323       }
324 
325       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
326       sctx.rs  = rs;
327       sctx.pv  = pv[diag];
328       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
329       if (newshift == 1) break;
330     }
331 
332     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
333       /*
334        * if no shift in this attempt & shifting & started shifting & can refine,
335        * then try lower shift
336        */
337       sctx.shift_hi       = sctx.shift_fraction;
338       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
339       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
340       sctx.lushift        = PETSC_TRUE;
341       sctx.nshift++;
342     }
343   } while (sctx.lushift);
344 
345   /* invert diagonal entries for simplier triangular solves */
346   for (i=0; i<n; i++) {
347     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
348   }
349   ierr = PetscFree(rtmp);CHKERRQ(ierr);
350   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
351   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
352   if (b->inode.use) {
353     C->ops->solve   = MatSolve_Inode;
354   } else {
355     PetscTruth row_identity, col_identity;
356     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
357     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
358     if (row_identity && col_identity) {
359       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
360     } else {
361       C->ops->solve   = MatSolve_SeqAIJ;
362     }
363   }
364   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
365   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
366   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
367   C->ops->matsolve           = MatMatSolve_SeqAIJ;
368   C->assembled    = PETSC_TRUE;
369   C->preallocated = PETSC_TRUE;
370   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
371   if (sctx.nshift){
372      if (info->shiftpd) {
373       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
374     } else if (info->shiftnz) {
375       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
376     }
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 /*
382    This routine implements inplace ILU(0) with row or/and column permutations.
383    Input:
384      A - original matrix
385    Output;
386      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
387          a->j (col index) is permuted by the inverse of colperm, then sorted
388          a->a reordered accordingly with a->j
389          a->diag (ptr to diagonal elements) is updated.
390 */
391 #undef __FUNCT__
392 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
393 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
394 {
395   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
396   IS             isrow = a->row,isicol = a->icol;
397   PetscErrorCode ierr;
398   const PetscInt *r,*ic,*ics;
399   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
400   PetscInt       *ajtmp,nz,row;
401   PetscInt       *diag = a->diag,nbdiag,*pj;
402   PetscScalar    *rtmp,*pc,multiplier,d;
403   MatScalar      *v,*pv;
404   PetscReal      rs;
405   LUShift_Ctx    sctx;
406   PetscInt       newshift;
407 
408   PetscFunctionBegin;
409   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
410   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
411   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
412   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
413   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
414   ics = ic;
415 
416   sctx.shift_top      = 0;
417   sctx.nshift_max     = 0;
418   sctx.shift_lo       = 0;
419   sctx.shift_hi       = 0;
420   sctx.shift_fraction = 0;
421 
422   /* if both shift schemes are chosen by user, only use info->shiftpd */
423   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
424     sctx.shift_top = 0;
425     for (i=0; i<n; i++) {
426       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
427       d  = (a->a)[diag[i]];
428       rs = -PetscAbsScalar(d) - PetscRealPart(d);
429       v  = a->a+ai[i];
430       nz = ai[i+1] - ai[i];
431       for (j=0; j<nz; j++)
432 	rs += PetscAbsScalar(v[j]);
433       if (rs>sctx.shift_top) sctx.shift_top = rs;
434     }
435     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
436     sctx.shift_top    *= 1.1;
437     sctx.nshift_max   = 5;
438     sctx.shift_lo     = 0.;
439     sctx.shift_hi     = 1.;
440   }
441 
442   sctx.shift_amount = 0;
443   sctx.nshift       = 0;
444   do {
445     sctx.lushift = PETSC_FALSE;
446     for (i=0; i<n; i++){
447       /* load in initial unfactored row */
448       nz    = ai[r[i]+1] - ai[r[i]];
449       ajtmp = aj + ai[r[i]];
450       v     = a->a + ai[r[i]];
451       /* sort permuted ajtmp and values v accordingly */
452       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
453       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
454 
455       diag[r[i]] = ai[r[i]];
456       for (j=0; j<nz; j++) {
457         rtmp[ajtmp[j]] = v[j];
458         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
459       }
460       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
461 
462       row = *ajtmp++;
463       while  (row < i) {
464         pc = rtmp + row;
465         if (*pc != 0.0) {
466           pv         = a->a + diag[r[row]];
467           pj         = aj + diag[r[row]] + 1;
468 
469           multiplier = *pc / *pv++;
470           *pc        = multiplier;
471           nz         = ai[r[row]+1] - diag[r[row]] - 1;
472           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
473           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
474         }
475         row = *ajtmp++;
476       }
477       /* finished row so overwrite it onto a->a */
478       pv   = a->a + ai[r[i]] ;
479       pj   = aj + ai[r[i]] ;
480       nz   = ai[r[i]+1] - ai[r[i]];
481       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
482 
483       rs   = 0.0;
484       for (j=0; j<nz; j++) {
485         pv[j] = rtmp[pj[j]];
486         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
487       }
488 
489       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
490       sctx.rs  = rs;
491       sctx.pv  = pv[nbdiag];
492       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
493       if (newshift == 1) break;
494     }
495 
496     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
497       /*
498        * if no shift in this attempt & shifting & started shifting & can refine,
499        * then try lower shift
500        */
501       sctx.shift_hi        = sctx.shift_fraction;
502       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
503       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
504       sctx.lushift         = PETSC_TRUE;
505       sctx.nshift++;
506     }
507   } while (sctx.lushift);
508 
509   /* invert diagonal entries for simplier triangular solves */
510   for (i=0; i<n; i++) {
511     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
512   }
513 
514   ierr = PetscFree(rtmp);CHKERRQ(ierr);
515   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
516   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
517   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
518   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
519   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
520   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
521   A->assembled = PETSC_TRUE;
522   A->preallocated = PETSC_TRUE;
523   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
524   if (sctx.nshift){
525     if (info->shiftpd) {
526       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
527     } else if (info->shiftnz) {
528       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 /* ----------------------------------------------------------- */
535 #undef __FUNCT__
536 #define __FUNCT__ "MatLUFactor_SeqAIJ"
537 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
538 {
539   PetscErrorCode ierr;
540   Mat            C;
541 
542   PetscFunctionBegin;
543   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
544   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
545   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
546   A->ops->solve            = C->ops->solve;
547   A->ops->solvetranspose   = C->ops->solvetranspose;
548   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
549   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 /* ----------------------------------------------------------- */
553 
554 #undef __FUNCT__
555 #define __FUNCT__ "MatSolve_SeqAIJ"
556 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
557 {
558   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
559   IS                iscol = a->col,isrow = a->row;
560   PetscErrorCode    ierr;
561   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
562   PetscInt          nz;
563   const PetscInt    *rout,*cout,*r,*c;
564   PetscScalar       *x,*tmp,*tmps,sum;
565   const PetscScalar *b;
566   const MatScalar   *aa = a->a,*v;
567 
568   PetscFunctionBegin;
569   if (!n) PetscFunctionReturn(0);
570 
571   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   tmp  = a->solve_work;
574 
575   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
576   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
577 
578   /* forward solve the lower triangular */
579   tmp[0] = b[*r++];
580   tmps   = tmp;
581   for (i=1; i<n; i++) {
582     v   = aa + ai[i] ;
583     vi  = aj + ai[i] ;
584     nz  = a->diag[i] - ai[i];
585     sum = b[*r++];
586     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
587     tmp[i] = sum;
588   }
589 
590   /* backward solve the upper triangular */
591   for (i=n-1; i>=0; i--){
592     v   = aa + a->diag[i] + 1;
593     vi  = aj + a->diag[i] + 1;
594     nz  = ai[i+1] - a->diag[i] - 1;
595     sum = tmp[i];
596     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
597     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
598   }
599 
600   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
601   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
602   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
603   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
604   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMatSolve_SeqAIJ"
610 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
611 {
612   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
613   IS              iscol = a->col,isrow = a->row;
614   PetscErrorCode  ierr;
615   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
616   PetscInt        nz,neq;
617   const PetscInt  *rout,*cout,*r,*c;
618   PetscScalar     *x,*b,*tmp,*tmps,sum;
619   const MatScalar *aa = a->a,*v;
620   PetscTruth      bisdense,xisdense;
621 
622   PetscFunctionBegin;
623   if (!n) PetscFunctionReturn(0);
624 
625   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
626   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
627   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
628   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
629 
630   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
631   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
632 
633   tmp  = a->solve_work;
634   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
635   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
636 
637   for (neq=0; neq<B->cmap->n; neq++){
638     /* forward solve the lower triangular */
639     tmp[0] = b[r[0]];
640     tmps   = tmp;
641     for (i=1; i<n; i++) {
642       v   = aa + ai[i] ;
643       vi  = aj + ai[i] ;
644       nz  = a->diag[i] - ai[i];
645       sum = b[r[i]];
646       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
647       tmp[i] = sum;
648     }
649     /* backward solve the upper triangular */
650     for (i=n-1; i>=0; i--){
651       v   = aa + a->diag[i] + 1;
652       vi  = aj + a->diag[i] + 1;
653       nz  = ai[i+1] - a->diag[i] - 1;
654       sum = tmp[i];
655       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
656       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
657     }
658 
659     b += n;
660     x += n;
661   }
662   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
663   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
664   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
665   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
666   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
672 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
673 {
674   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
675   IS              iscol = a->col,isrow = a->row;
676   PetscErrorCode  ierr;
677   const PetscInt  *r,*c,*rout,*cout;
678   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
679   PetscInt        nz,row;
680   PetscScalar     *x,*b,*tmp,*tmps,sum;
681   const MatScalar *aa = a->a,*v;
682 
683   PetscFunctionBegin;
684   if (!n) PetscFunctionReturn(0);
685 
686   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
687   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
688   tmp  = a->solve_work;
689 
690   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
691   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
692 
693   /* forward solve the lower triangular */
694   tmp[0] = b[*r++];
695   tmps   = tmp;
696   for (row=1; row<n; row++) {
697     i   = rout[row]; /* permuted row */
698     v   = aa + ai[i] ;
699     vi  = aj + ai[i] ;
700     nz  = a->diag[i] - ai[i];
701     sum = b[*r++];
702     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
703     tmp[row] = sum;
704   }
705 
706   /* backward solve the upper triangular */
707   for (row=n-1; row>=0; row--){
708     i   = rout[row]; /* permuted row */
709     v   = aa + a->diag[i] + 1;
710     vi  = aj + a->diag[i] + 1;
711     nz  = ai[i+1] - a->diag[i] - 1;
712     sum = tmp[row];
713     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
714     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
715   }
716 
717   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
718   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
719   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
720   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
721   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 /* ----------------------------------------------------------- */
726 #undef __FUNCT__
727 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
728 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
729 {
730   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
731   PetscErrorCode    ierr;
732   PetscInt          n = A->rmap->n;
733   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
734   PetscScalar       *x;
735   const PetscScalar *b;
736   const MatScalar   *aa = a->a;
737 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
738   PetscInt          adiag_i,i,nz,ai_i;
739   const MatScalar   *v;
740   PetscScalar       sum;
741 #endif
742 
743   PetscFunctionBegin;
744   if (!n) PetscFunctionReturn(0);
745 
746   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
747   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
748 
749 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
750   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
751 #else
752   /* forward solve the lower triangular */
753   x[0] = b[0];
754   for (i=1; i<n; i++) {
755     ai_i = ai[i];
756     v    = aa + ai_i;
757     vi   = aj + ai_i;
758     nz   = adiag[i] - ai_i;
759     sum  = b[i];
760     while (nz--) sum -= *v++ * x[*vi++];
761     x[i] = sum;
762   }
763 
764   /* backward solve the upper triangular */
765   for (i=n-1; i>=0; i--){
766     adiag_i = adiag[i];
767     v       = aa + adiag_i + 1;
768     vi      = aj + adiag_i + 1;
769     nz      = ai[i+1] - adiag_i - 1;
770     sum     = x[i];
771     while (nz--) sum -= *v++ * x[*vi++];
772     x[i]    = sum*aa[adiag_i];
773   }
774 #endif
775   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
776   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
777   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
783 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
784 {
785   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
786   IS              iscol = a->col,isrow = a->row;
787   PetscErrorCode  ierr;
788   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
789   PetscInt        nz;
790   const PetscInt  *rout,*cout,*r,*c;
791   PetscScalar     *x,*b,*tmp,sum;
792   const MatScalar *aa = a->a,*v;
793 
794   PetscFunctionBegin;
795   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
796 
797   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
798   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
799   tmp  = a->solve_work;
800 
801   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
802   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
803 
804   /* forward solve the lower triangular */
805   tmp[0] = b[*r++];
806   for (i=1; i<n; i++) {
807     v   = aa + ai[i] ;
808     vi  = aj + ai[i] ;
809     nz  = a->diag[i] - ai[i];
810     sum = b[*r++];
811     while (nz--) sum -= *v++ * tmp[*vi++ ];
812     tmp[i] = sum;
813   }
814 
815   /* backward solve the upper triangular */
816   for (i=n-1; i>=0; i--){
817     v   = aa + a->diag[i] + 1;
818     vi  = aj + a->diag[i] + 1;
819     nz  = ai[i+1] - a->diag[i] - 1;
820     sum = tmp[i];
821     while (nz--) sum -= *v++ * tmp[*vi++ ];
822     tmp[i] = sum*aa[a->diag[i]];
823     x[*c--] += tmp[i];
824   }
825 
826   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
827   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
828   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
829   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
830   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 /* -------------------------------------------------------------------*/
835 #undef __FUNCT__
836 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
837 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
838 {
839   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
840   IS              iscol = a->col,isrow = a->row;
841   PetscErrorCode  ierr;
842   const PetscInt  *rout,*cout,*r,*c;
843   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
844   PetscInt        nz,*diag = a->diag;
845   PetscScalar     *x,*b,*tmp,s1;
846   const MatScalar *aa = a->a,*v;
847 
848   PetscFunctionBegin;
849   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
850   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
851   tmp  = a->solve_work;
852 
853   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
854   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
855 
856   /* copy the b into temp work space according to permutation */
857   for (i=0; i<n; i++) tmp[i] = b[c[i]];
858 
859   /* forward solve the U^T */
860   for (i=0; i<n; i++) {
861     v   = aa + diag[i] ;
862     vi  = aj + diag[i] + 1;
863     nz  = ai[i+1] - diag[i] - 1;
864     s1  = tmp[i];
865     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
866     while (nz--) {
867       tmp[*vi++ ] -= (*v++)*s1;
868     }
869     tmp[i] = s1;
870   }
871 
872   /* backward solve the L^T */
873   for (i=n-1; i>=0; i--){
874     v   = aa + diag[i] - 1 ;
875     vi  = aj + diag[i] - 1 ;
876     nz  = diag[i] - ai[i];
877     s1  = tmp[i];
878     while (nz--) {
879       tmp[*vi-- ] -= (*v--)*s1;
880     }
881   }
882 
883   /* copy tmp into x according to permutation */
884   for (i=0; i<n; i++) x[r[i]] = tmp[i];
885 
886   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
887   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
888   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
889   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
890 
891   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
897 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
898 {
899   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
900   IS              iscol = a->col,isrow = a->row;
901   PetscErrorCode  ierr;
902   const PetscInt  *r,*c,*rout,*cout;
903   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
904   PetscInt        nz,*diag = a->diag;
905   PetscScalar     *x,*b,*tmp;
906   const MatScalar *aa = a->a,*v;
907 
908   PetscFunctionBegin;
909   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
910 
911   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
912   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
913   tmp = a->solve_work;
914 
915   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
916   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
917 
918   /* copy the b into temp work space according to permutation */
919   for (i=0; i<n; i++) tmp[i] = b[c[i]];
920 
921   /* forward solve the U^T */
922   for (i=0; i<n; i++) {
923     v   = aa + diag[i] ;
924     vi  = aj + diag[i] + 1;
925     nz  = ai[i+1] - diag[i] - 1;
926     tmp[i] *= *v++;
927     while (nz--) {
928       tmp[*vi++ ] -= (*v++)*tmp[i];
929     }
930   }
931 
932   /* backward solve the L^T */
933   for (i=n-1; i>=0; i--){
934     v   = aa + diag[i] - 1 ;
935     vi  = aj + diag[i] - 1 ;
936     nz  = diag[i] - ai[i];
937     while (nz--) {
938       tmp[*vi-- ] -= (*v--)*tmp[i];
939     }
940   }
941 
942   /* copy tmp into x according to permutation */
943   for (i=0; i<n; i++) x[r[i]] += tmp[i];
944 
945   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
946   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
947   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
948   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
949 
950   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 /* ----------------------------------------------------------------*/
954 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
955 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
956 
957 #undef __FUNCT__
958 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
959 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
960 {
961   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
962   IS                 isicol;
963   PetscErrorCode     ierr;
964   const PetscInt     *r,*ic;
965   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
966   PetscInt           *bi,*cols,nnz,*cols_lvl;
967   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
968   PetscInt           i,levels,diagonal_fill;
969   PetscTruth         col_identity,row_identity;
970   PetscReal          f;
971   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
972   PetscBT            lnkbt;
973   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
974   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
975   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
976   PetscTruth         missing;
977 
978   PetscFunctionBegin;
979   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
980   f             = info->fill;
981   levels        = (PetscInt)info->levels;
982   diagonal_fill = (PetscInt)info->diagonal_fill;
983   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
984 
985   /* special case that simply copies fill pattern */
986   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
987   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
988   if (!levels && row_identity && col_identity) {
989     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
990     fact->factor = MAT_FACTOR_ILU;
991     (fact)->info.factor_mallocs    = 0;
992     (fact)->info.fill_ratio_given  = info->fill;
993     (fact)->info.fill_ratio_needed = 1.0;
994     b               = (Mat_SeqAIJ*)(fact)->data;
995     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
996     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
997     b->row              = isrow;
998     b->col              = iscol;
999     b->icol             = isicol;
1000     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1001     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1002     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1003     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1004     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1005     PetscFunctionReturn(0);
1006   }
1007 
1008   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1009   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1010 
1011   /* get new row pointers */
1012   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1013   bi[0] = 0;
1014   /* bdiag is location of diagonal in factor */
1015   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1016   bdiag[0]  = 0;
1017 
1018   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1019   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1020 
1021   /* create a linked list for storing column indices of the active row */
1022   nlnk = n + 1;
1023   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1024 
1025   /* initial FreeSpace size is f*(ai[n]+1) */
1026   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1027   current_space = free_space;
1028   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1029   current_space_lvl = free_space_lvl;
1030 
1031   for (i=0; i<n; i++) {
1032     nzi = 0;
1033     /* copy current row into linked list */
1034     nnz  = ai[r[i]+1] - ai[r[i]];
1035     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1036     cols = aj + ai[r[i]];
1037     lnk[i] = -1; /* marker to indicate if diagonal exists */
1038     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1039     nzi += nlnk;
1040 
1041     /* make sure diagonal entry is included */
1042     if (diagonal_fill && lnk[i] == -1) {
1043       fm = n;
1044       while (lnk[fm] < i) fm = lnk[fm];
1045       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1046       lnk[fm]    = i;
1047       lnk_lvl[i] = 0;
1048       nzi++; dcount++;
1049     }
1050 
1051     /* add pivot rows into the active row */
1052     nzbd = 0;
1053     prow = lnk[n];
1054     while (prow < i) {
1055       nnz      = bdiag[prow];
1056       cols     = bj_ptr[prow] + nnz + 1;
1057       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1058       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1059       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1060       nzi += nlnk;
1061       prow = lnk[prow];
1062       nzbd++;
1063     }
1064     bdiag[i] = nzbd;
1065     bi[i+1]  = bi[i] + nzi;
1066 
1067     /* if free space is not available, make more free space */
1068     if (current_space->local_remaining<nzi) {
1069       nnz = nzi*(n - i); /* estimated and max additional space needed */
1070       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1071       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1072       reallocs++;
1073     }
1074 
1075     /* copy data into free_space and free_space_lvl, then initialize lnk */
1076     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1077     bj_ptr[i]    = current_space->array;
1078     bjlvl_ptr[i] = current_space_lvl->array;
1079 
1080     /* make sure the active row i has diagonal entry */
1081     if (*(bj_ptr[i]+bdiag[i]) != i) {
1082       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1083     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1084     }
1085 
1086     current_space->array           += nzi;
1087     current_space->local_used      += nzi;
1088     current_space->local_remaining -= nzi;
1089     current_space_lvl->array           += nzi;
1090     current_space_lvl->local_used      += nzi;
1091     current_space_lvl->local_remaining -= nzi;
1092   }
1093 
1094   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1095   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1096 
1097   /* destroy list of free space and other temporary arrays */
1098   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1099   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1100   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1101   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1102   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1103 
1104 #if defined(PETSC_USE_INFO)
1105   {
1106     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1107     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1108     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1109     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1110     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1111     if (diagonal_fill) {
1112       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1113     }
1114   }
1115 #endif
1116 
1117   /* put together the new matrix */
1118   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1119   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1120   b = (Mat_SeqAIJ*)(fact)->data;
1121   b->free_a       = PETSC_TRUE;
1122   b->free_ij      = PETSC_TRUE;
1123   b->singlemalloc = PETSC_FALSE;
1124   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1125   b->j          = bj;
1126   b->i          = bi;
1127   for (i=0; i<n; i++) bdiag[i] += bi[i];
1128   b->diag       = bdiag;
1129   b->ilen       = 0;
1130   b->imax       = 0;
1131   b->row        = isrow;
1132   b->col        = iscol;
1133   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1134   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1135   b->icol       = isicol;
1136   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1137   /* In b structure:  Free imax, ilen, old a, old j.
1138      Allocate bdiag, solve_work, new a, new j */
1139   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1140   b->maxnz             = b->nz = bi[n] ;
1141   (fact)->info.factor_mallocs    = reallocs;
1142   (fact)->info.fill_ratio_given  = f;
1143   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1144   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1145   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #include "../src/mat/impls/sbaij/seq/sbaij.h"
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1152 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
1153 {
1154   Mat            C = B;
1155   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1156   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1157   IS             ip=b->row,iip = b->icol;
1158   PetscErrorCode ierr;
1159   const PetscInt *rip,*riip;
1160   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1161   PetscInt       *ai=a->i,*aj=a->j;
1162   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1163   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1164   PetscReal      zeropivot,rs,shiftnz;
1165   PetscReal      shiftpd;
1166   ChShift_Ctx    sctx;
1167   PetscInt       newshift;
1168   PetscTruth     perm_identity;
1169 
1170   PetscFunctionBegin;
1171 
1172   shiftnz   = info->shiftnz;
1173   shiftpd   = info->shiftpd;
1174   zeropivot = info->zeropivot;
1175 
1176   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1177   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1178 
1179   /* initialization */
1180   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1181   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1182   jl   = il + mbs;
1183   rtmp = (MatScalar*)(jl + mbs);
1184 
1185   sctx.shift_amount = 0;
1186   sctx.nshift       = 0;
1187   do {
1188     sctx.chshift = PETSC_FALSE;
1189     for (i=0; i<mbs; i++) {
1190       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1191     }
1192 
1193     for (k = 0; k<mbs; k++){
1194       bval = ba + bi[k];
1195       /* initialize k-th row by the perm[k]-th row of A */
1196       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1197       for (j = jmin; j < jmax; j++){
1198         col = riip[aj[j]];
1199         if (col >= k){ /* only take upper triangular entry */
1200           rtmp[col] = aa[j];
1201           *bval++  = 0.0; /* for in-place factorization */
1202         }
1203       }
1204       /* shift the diagonal of the matrix */
1205       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1206 
1207       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1208       dk = rtmp[k];
1209       i = jl[k]; /* first row to be added to k_th row  */
1210 
1211       while (i < k){
1212         nexti = jl[i]; /* next row to be added to k_th row */
1213 
1214         /* compute multiplier, update diag(k) and U(i,k) */
1215         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1216         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1217         dk += uikdi*ba[ili];
1218         ba[ili] = uikdi; /* -U(i,k) */
1219 
1220         /* add multiple of row i to k-th row */
1221         jmin = ili + 1; jmax = bi[i+1];
1222         if (jmin < jmax){
1223           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1224           /* update il and jl for row i */
1225           il[i] = jmin;
1226           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1227         }
1228         i = nexti;
1229       }
1230 
1231       /* shift the diagonals when zero pivot is detected */
1232       /* compute rs=sum of abs(off-diagonal) */
1233       rs   = 0.0;
1234       jmin = bi[k]+1;
1235       nz   = bi[k+1] - jmin;
1236       bcol = bj + jmin;
1237       while (nz--){
1238         rs += PetscAbsScalar(rtmp[*bcol]);
1239         bcol++;
1240       }
1241 
1242       sctx.rs = rs;
1243       sctx.pv = dk;
1244       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1245 
1246       if (newshift == 1) {
1247         if (!sctx.shift_amount) {
1248           sctx.shift_amount = 1e-5;
1249         }
1250         break;
1251       }
1252 
1253       /* copy data into U(k,:) */
1254       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1255       jmin = bi[k]+1; jmax = bi[k+1];
1256       if (jmin < jmax) {
1257         for (j=jmin; j<jmax; j++){
1258           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1259         }
1260         /* add the k-th row into il and jl */
1261         il[k] = jmin;
1262         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1263       }
1264     }
1265   } while (sctx.chshift);
1266   ierr = PetscFree(il);CHKERRQ(ierr);
1267 
1268   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1269   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1270 
1271   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1272   if (perm_identity){
1273     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1274     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1275     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1276     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1277   } else {
1278     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1279     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1280     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1281     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1282   }
1283 
1284   C->assembled    = PETSC_TRUE;
1285   C->preallocated = PETSC_TRUE;
1286   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1287   if (sctx.nshift){
1288     if (shiftnz) {
1289       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1290     } else if (shiftpd) {
1291       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1292     }
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1299 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1300 {
1301   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1302   Mat_SeqSBAIJ       *b;
1303   PetscErrorCode     ierr;
1304   PetscTruth         perm_identity,missing;
1305   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1306   const PetscInt     *rip,*riip;
1307   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1308   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1309   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1310   PetscReal          fill=info->fill,levels=info->levels;
1311   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1312   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1313   PetscBT            lnkbt;
1314   IS                 iperm;
1315 
1316   PetscFunctionBegin;
1317   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1318   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1319   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1320   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1321   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1322 
1323   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1324   ui[0] = 0;
1325 
1326   /* ICC(0) without matrix ordering: simply copies fill pattern */
1327   if (!levels && perm_identity) {
1328 
1329     for (i=0; i<am; i++) {
1330       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1331     }
1332     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1333     cols = uj;
1334     for (i=0; i<am; i++) {
1335       aj    = a->j + a->diag[i];
1336       ncols = ui[i+1] - ui[i];
1337       for (j=0; j<ncols; j++) *cols++ = *aj++;
1338     }
1339   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1340     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1341     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1342 
1343     /* initialization */
1344     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1345 
1346     /* jl: linked list for storing indices of the pivot rows
1347        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1348     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1349     il         = jl + am;
1350     uj_ptr     = (PetscInt**)(il + am);
1351     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1352     for (i=0; i<am; i++){
1353       jl[i] = am; il[i] = 0;
1354     }
1355 
1356     /* create and initialize a linked list for storing column indices of the active row k */
1357     nlnk = am + 1;
1358     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1359 
1360     /* initial FreeSpace size is fill*(ai[am]+1) */
1361     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1362     current_space = free_space;
1363     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1364     current_space_lvl = free_space_lvl;
1365 
1366     for (k=0; k<am; k++){  /* for each active row k */
1367       /* initialize lnk by the column indices of row rip[k] of A */
1368       nzk   = 0;
1369       ncols = ai[rip[k]+1] - ai[rip[k]];
1370       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1371       ncols_upper = 0;
1372       for (j=0; j<ncols; j++){
1373         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1374         if (riip[i] >= k){ /* only take upper triangular entry */
1375           ajtmp[ncols_upper] = i;
1376           ncols_upper++;
1377         }
1378       }
1379       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1380       nzk += nlnk;
1381 
1382       /* update lnk by computing fill-in for each pivot row to be merged in */
1383       prow = jl[k]; /* 1st pivot row */
1384 
1385       while (prow < k){
1386         nextprow = jl[prow];
1387 
1388         /* merge prow into k-th row */
1389         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1390         jmax = ui[prow+1];
1391         ncols = jmax-jmin;
1392         i     = jmin - ui[prow];
1393         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1394         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1395         j     = *(uj - 1);
1396         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1397         nzk += nlnk;
1398 
1399         /* update il and jl for prow */
1400         if (jmin < jmax){
1401           il[prow] = jmin;
1402           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1403         }
1404         prow = nextprow;
1405       }
1406 
1407       /* if free space is not available, make more free space */
1408       if (current_space->local_remaining<nzk) {
1409         i = am - k + 1; /* num of unfactored rows */
1410         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1411         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1412         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1413         reallocs++;
1414       }
1415 
1416       /* copy data into free_space and free_space_lvl, then initialize lnk */
1417       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1418       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1419 
1420       /* add the k-th row into il and jl */
1421       if (nzk > 1){
1422         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1423         jl[k] = jl[i]; jl[i] = k;
1424         il[k] = ui[k] + 1;
1425       }
1426       uj_ptr[k]     = current_space->array;
1427       uj_lvl_ptr[k] = current_space_lvl->array;
1428 
1429       current_space->array           += nzk;
1430       current_space->local_used      += nzk;
1431       current_space->local_remaining -= nzk;
1432 
1433       current_space_lvl->array           += nzk;
1434       current_space_lvl->local_used      += nzk;
1435       current_space_lvl->local_remaining -= nzk;
1436 
1437       ui[k+1] = ui[k] + nzk;
1438     }
1439 
1440 #if defined(PETSC_USE_INFO)
1441     if (ai[am] != 0) {
1442       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1443       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1444       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1445       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1446     } else {
1447       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1448     }
1449 #endif
1450 
1451     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1452     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1453     ierr = PetscFree(jl);CHKERRQ(ierr);
1454     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1455 
1456     /* destroy list of free space and other temporary array(s) */
1457     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1458     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1459     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1460     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1461 
1462   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1463 
1464   /* put together the new matrix in MATSEQSBAIJ format */
1465 
1466   b    = (Mat_SeqSBAIJ*)(fact)->data;
1467   b->singlemalloc = PETSC_FALSE;
1468   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1469   b->j    = uj;
1470   b->i    = ui;
1471   b->diag = 0;
1472   b->ilen = 0;
1473   b->imax = 0;
1474   b->row  = perm;
1475   b->col  = perm;
1476   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1477   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1478   b->icol = iperm;
1479   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1480   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1481   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1482   b->maxnz   = b->nz = ui[am];
1483   b->free_a  = PETSC_TRUE;
1484   b->free_ij = PETSC_TRUE;
1485 
1486   (fact)->info.factor_mallocs    = reallocs;
1487   (fact)->info.fill_ratio_given  = fill;
1488   if (ai[am] != 0) {
1489     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1490   } else {
1491     (fact)->info.fill_ratio_needed = 0.0;
1492   }
1493   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1499 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
1500 {
1501   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1502   Mat_SeqSBAIJ       *b;
1503   PetscErrorCode     ierr;
1504   PetscTruth         perm_identity;
1505   PetscReal          fill = info->fill;
1506   const PetscInt     *rip,*riip;
1507   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1508   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1509   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1510   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1511   PetscBT            lnkbt;
1512   IS                 iperm;
1513 
1514   PetscFunctionBegin;
1515   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1516   /* check whether perm is the identity mapping */
1517   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1518   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1519   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1520   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1521 
1522   /* initialization */
1523   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1524   ui[0] = 0;
1525 
1526   /* jl: linked list for storing indices of the pivot rows
1527      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1528   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1529   il     = jl + am;
1530   cols   = il + am;
1531   ui_ptr = (PetscInt**)(cols + am);
1532   for (i=0; i<am; i++){
1533     jl[i] = am; il[i] = 0;
1534   }
1535 
1536   /* create and initialize a linked list for storing column indices of the active row k */
1537   nlnk = am + 1;
1538   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1539 
1540   /* initial FreeSpace size is fill*(ai[am]+1) */
1541   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1542   current_space = free_space;
1543 
1544   for (k=0; k<am; k++){  /* for each active row k */
1545     /* initialize lnk by the column indices of row rip[k] of A */
1546     nzk   = 0;
1547     ncols = ai[rip[k]+1] - ai[rip[k]];
1548     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
1549     ncols_upper = 0;
1550     for (j=0; j<ncols; j++){
1551       i = riip[*(aj + ai[rip[k]] + j)];
1552       if (i >= k){ /* only take upper triangular entry */
1553         cols[ncols_upper] = i;
1554         ncols_upper++;
1555       }
1556     }
1557     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1558     nzk += nlnk;
1559 
1560     /* update lnk by computing fill-in for each pivot row to be merged in */
1561     prow = jl[k]; /* 1st pivot row */
1562 
1563     while (prow < k){
1564       nextprow = jl[prow];
1565       /* merge prow into k-th row */
1566       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1567       jmax = ui[prow+1];
1568       ncols = jmax-jmin;
1569       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1570       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1571       nzk += nlnk;
1572 
1573       /* update il and jl for prow */
1574       if (jmin < jmax){
1575         il[prow] = jmin;
1576         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1577       }
1578       prow = nextprow;
1579     }
1580 
1581     /* if free space is not available, make more free space */
1582     if (current_space->local_remaining<nzk) {
1583       i = am - k + 1; /* num of unfactored rows */
1584       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1585       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1586       reallocs++;
1587     }
1588 
1589     /* copy data into free space, then initialize lnk */
1590     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1591 
1592     /* add the k-th row into il and jl */
1593     if (nzk-1 > 0){
1594       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1595       jl[k] = jl[i]; jl[i] = k;
1596       il[k] = ui[k] + 1;
1597     }
1598     ui_ptr[k] = current_space->array;
1599     current_space->array           += nzk;
1600     current_space->local_used      += nzk;
1601     current_space->local_remaining -= nzk;
1602 
1603     ui[k+1] = ui[k] + nzk;
1604   }
1605 
1606 #if defined(PETSC_USE_INFO)
1607   if (ai[am] != 0) {
1608     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1609     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1610     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1611     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1612   } else {
1613      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1614   }
1615 #endif
1616 
1617   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1618   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1619   ierr = PetscFree(jl);CHKERRQ(ierr);
1620 
1621   /* destroy list of free space and other temporary array(s) */
1622   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1623   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1624   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1625 
1626   /* put together the new matrix in MATSEQSBAIJ format */
1627 
1628   b = (Mat_SeqSBAIJ*)(fact)->data;
1629   b->singlemalloc = PETSC_FALSE;
1630   b->free_a       = PETSC_TRUE;
1631   b->free_ij      = PETSC_TRUE;
1632   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1633   b->j    = uj;
1634   b->i    = ui;
1635   b->diag = 0;
1636   b->ilen = 0;
1637   b->imax = 0;
1638   b->row  = perm;
1639   b->col  = perm;
1640   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1641   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1642   b->icol = iperm;
1643   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1644   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1645   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1646   b->maxnz = b->nz = ui[am];
1647 
1648   (fact)->info.factor_mallocs    = reallocs;
1649   (fact)->info.fill_ratio_given  = fill;
1650   if (ai[am] != 0) {
1651     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1652   } else {
1653     (fact)->info.fill_ratio_needed = 0.0;
1654   }
1655   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt"
1661 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx)
1662 {
1663   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1664   PetscErrorCode    ierr;
1665   PetscInt          n = A->rmap->n;
1666   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
1667   PetscScalar       *x,sum;
1668   const PetscScalar *b;
1669   const MatScalar   *aa = a->a,*v;
1670   PetscInt          i,nz;
1671 
1672   PetscFunctionBegin;
1673   if (!n) PetscFunctionReturn(0);
1674 
1675   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1676   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1677 
1678   /* forward solve the lower triangular */
1679   x[0] = b[0];
1680   v    = aa;
1681   vi   = aj;
1682   for (i=1; i<n; i++) {
1683     nz  = ai[i+1] - ai[i];
1684     sum = b[i];
1685     while (nz--) sum -= *v++ * x[*vi++];
1686     x[i] = sum;
1687   }
1688 
1689   /* backward solve the upper triangular */
1690   v   = aa + adiag[n] + 1;
1691   vi  = aj + adiag[n] + 1;
1692   for (i=n-1; i>=0; i--){
1693     nz  = adiag[i] - adiag[i+1] - 1;
1694     sum = x[i];
1695     while (nz--) sum -= *v++ * x[*vi++];
1696     x[i] = sum*aa[adiag[i]];
1697     v++; vi++;
1698   }
1699 
1700   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1701   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1702   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatSolve_SeqAIJ_iludt"
1708 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx)
1709 {
1710   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1711   IS                iscol = a->col,isrow = a->row;
1712   PetscErrorCode    ierr;
1713   PetscInt          i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag;
1714   PetscInt          nz;
1715   const PetscInt    *rout,*cout,*r,*c;
1716   PetscScalar       *x,*tmp,*tmps;
1717   const PetscScalar *b;
1718   const MatScalar   *aa = a->a,*v;
1719 
1720   PetscFunctionBegin;
1721   if (!n) PetscFunctionReturn(0);
1722 
1723   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1724   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1725   tmp  = a->solve_work;
1726 
1727   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1728   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1729 
1730   /* forward solve the lower triangular */
1731   tmp[0] = b[*r++];
1732   tmps   = tmp;
1733   v      = aa;
1734   vi     = aj;
1735   for (i=1; i<n; i++) {
1736     nz  = ai[i+1] - ai[i];
1737     tmp[i] = b[*r++];
1738     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1739     v += nz; vi += nz;
1740   }
1741 
1742   /* backward solve the upper triangular */
1743   v   = aa + adiag[n] + 1;
1744   vi  = aj + adiag[n] + 1;
1745   for (i=n-1; i>=0; i--){
1746     nz  = adiag[i] - adiag[i+1] - 1;
1747     SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz);
1748     x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]];
1749     v += nz+1; vi += nz+1;
1750   }
1751 
1752   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1753   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1754   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1755   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1756   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
1762 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
1763 {
1764   Mat                B = *fact;
1765   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
1766   IS                 isicol;
1767   PetscErrorCode     ierr;
1768   const PetscInt     *r,*ic;
1769   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
1770   PetscInt           *bi,*bj,*bdiag;
1771   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au;
1772   PetscInt           nlnk,*lnk;
1773   PetscBT            lnkbt;
1774   PetscTruth         row_identity,icol_identity,both_identity;
1775   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp;
1776   const PetscInt     *ics;
1777   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
1778   PetscReal          dt=info->dt,shift=info->shiftinblocks;
1779   PetscInt           nnz_max;
1780   PetscTruth         missing;
1781 
1782   PetscFunctionBegin;
1783   /* ------- symbolic factorization, can be reused ---------*/
1784   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1785   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1786   adiag=a->diag;
1787 
1788   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1789 
1790   /* bdiag is location of diagonal in factor */
1791   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1792 
1793   /* allocate row pointers bi */
1794   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1795 
1796   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
1797   dtcount = (PetscInt)info->dtcount;
1798   if (dtcount > n/2) dtcount = n/2;
1799   nnz_max  = ai[n]+2*n*dtcount+2;
1800   ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1801   ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr);
1802 
1803   /* put together the new matrix */
1804   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1805   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
1806   b    = (Mat_SeqAIJ*)(B)->data;
1807   b->free_a       = PETSC_TRUE;
1808   b->free_ij      = PETSC_TRUE;
1809   b->singlemalloc = PETSC_FALSE;
1810   b->a          = ba;
1811   b->j          = bj;
1812   b->i          = bi;
1813   b->diag       = bdiag;
1814   b->ilen       = 0;
1815   b->imax       = 0;
1816   b->row        = isrow;
1817   b->col        = iscol;
1818   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1819   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1820   b->icol       = isicol;
1821   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1822 
1823   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1824   b->maxnz = nnz_max;
1825 
1826   (B)->factor                = MAT_FACTOR_ILUDT;
1827   (B)->info.factor_mallocs   = 0;
1828   (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
1829   CHKMEMQ;
1830   /* ------- end of symbolic factorization ---------*/
1831 
1832   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1833   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1834   ics  = ic;
1835 
1836   /* linked list for storing column indices of the active row */
1837   nlnk = n + 1;
1838   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1839 
1840   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
1841   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
1842   jtmp = im + n;
1843   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
1844   ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1845   vtmp = rtmp + n;
1846 
1847   bi[0]    = 0;
1848   bdiag[0] = nnz_max-1; /* location of diagonal in factor B */
1849   for (i=0; i<n; i++) {
1850     /* copy initial fill into linked list */
1851     nzi = 0; /* nonzeros for active row i */
1852     nzi = ai[r[i]+1] - ai[r[i]];
1853     if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1854     nzi_al = adiag[r[i]] - ai[r[i]];
1855     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
1856     ajtmp = aj + ai[r[i]];
1857     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1858 
1859     /* load in initial (unfactored row) */
1860     ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1861     aatmp = a->a + ai[r[i]];
1862     for (j=0; j<nzi; j++) {
1863       rtmp[ics[*ajtmp++]] = *aatmp++;
1864     }
1865 
1866     /* add pivot rows into linked list */
1867     row = lnk[n];
1868     while (row < i) {
1869       nzi_bl = bi[row+1] - bi[row] + 1;
1870       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
1871       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
1872       nzi  += nlnk;
1873       row   = lnk[row];
1874     }
1875 
1876     /* copy data from lnk into jtmp, then initialize lnk */
1877     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
1878 
1879     /* numerical factorization */
1880     bjtmp = jtmp;
1881     row   = *bjtmp++; /* 1st pivot row */
1882     while  (row < i) {
1883       pc         = rtmp + row;
1884       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
1885       multiplier = (*pc) * (*pv);
1886       *pc        = multiplier;
1887       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
1888         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
1889         pv         = ba + bdiag[row+1] + 1;
1890         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
1891         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
1892         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
1893         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
1894       }
1895       row = *bjtmp++;
1896     }
1897 
1898     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
1899     nzi_bl = 0; j = 0;
1900     while (jtmp[j] < i){ /* Note: jtmp is sorted */
1901       vtmp[j] = rtmp[jtmp[j]];
1902       nzi_bl++; j++;
1903     }
1904     nzi_bu = nzi - nzi_bl -1;
1905     while (j < nzi){
1906       vtmp[j] = rtmp[jtmp[j]];
1907       j++;
1908     }
1909 
1910     bjtmp = bj + bi[i];
1911     batmp = ba + bi[i];
1912     /* apply level dropping rule to L part */
1913     ncut = nzi_al + dtcount;
1914     if (ncut < nzi_bl){
1915       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
1916       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
1917     } else {
1918       ncut = nzi_bl;
1919     }
1920     for (j=0; j<ncut; j++){
1921       bjtmp[j] = jtmp[j];
1922       batmp[j] = vtmp[j];
1923       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
1924     }
1925     bi[i+1] = bi[i] + ncut;
1926     nzi = ncut + 1;
1927 
1928     /* apply level dropping rule to U part */
1929     ncut = nzi_au + dtcount;
1930     if (ncut < nzi_bu){
1931       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
1932       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
1933     } else {
1934       ncut = nzi_bu;
1935     }
1936     nzi += ncut;
1937 
1938     /* mark bdiagonal */
1939     bdiag[i+1]   = bdiag[i] - (ncut + 1);
1940     bjtmp = bj + bdiag[i];
1941     batmp = ba + bdiag[i];
1942     *bjtmp = i;
1943     *batmp = rtmp[i];
1944     if (*batmp == 0.0) *batmp = dt+shift;
1945     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
1946     /* printf(" (%d,%g),",*bjtmp,*batmp); */
1947 
1948     bjtmp = bj + bdiag[i+1]+1;
1949     batmp = ba + bdiag[i+1]+1;
1950     for (k=0; k<ncut; k++){
1951       bjtmp[k] = jtmp[nzi_bl+1+k];
1952       batmp[k] = vtmp[nzi_bl+1+k];
1953       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
1954     }
1955     /* printf("\n"); */
1956 
1957     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
1958     /*
1959     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
1960     printf(" ----------------------------\n");
1961     */
1962   } /* for (i=0; i<n; i++) */
1963   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
1964   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
1965 
1966   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1967   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1968 
1969   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1970   ierr = PetscFree(im);CHKERRQ(ierr);
1971   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1972 
1973   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
1974   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
1975 
1976   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1977   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
1978   both_identity = (PetscTruth) (row_identity && icol_identity);
1979   if (row_identity && icol_identity) {
1980     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt;
1981   } else {
1982     B->ops->solve = MatSolve_SeqAIJ_iludt;
1983   }
1984 
1985   B->ops->lufactorsymbolic  = MatILUDTFactorSymbolic_SeqAIJ;
1986   B->ops->lufactornumeric   = MatILUDTFactorNumeric_SeqAIJ;
1987   B->ops->solveadd          = 0;
1988   B->ops->solvetranspose    = 0;
1989   B->ops->solvetransposeadd = 0;
1990   B->ops->matsolve          = 0;
1991   B->assembled              = PETSC_TRUE;
1992   B->preallocated           = PETSC_TRUE;
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 /* a wraper of MatILUDTFactor_SeqAIJ() */
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
1999 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
2000 {
2001   PetscErrorCode     ierr;
2002 
2003   PetscFunctionBegin;
2004   /* printf("MatILUDTFactorSymbolic_SeqAIJ is called...\n"); */
2005   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
2006 
2007   fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ;
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 /*
2012    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
2013    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
2014 */
2015 #undef __FUNCT__
2016 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
2017 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
2018 {
2019   Mat            C=fact;
2020   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
2021   IS             isrow = b->row,isicol = b->icol;
2022   PetscErrorCode ierr;
2023   const PetscInt *r,*ic,*ics;
2024   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
2025   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
2026   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
2027   PetscReal      dt=info->dt,shift=info->shiftinblocks;
2028   PetscTruth     row_identity, col_identity;
2029 
2030   PetscFunctionBegin;
2031   /* printf("MatILUDTFactorNumeric_SeqAIJ is called...\n"); */
2032   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2033   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
2034   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2035   ics  = ic;
2036 
2037   for (i=0; i<n; i++){
2038     /* initialize rtmp array */
2039     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
2040     bjtmp = bj + bi[i];
2041     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
2042     rtmp[i] = 0.0;
2043     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
2044     bjtmp = bj + bdiag[i+1] + 1;
2045     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
2046 
2047     /* load in initial unfactored row of A */
2048     /* printf("row %d\n",i); */
2049     nz    = ai[r[i]+1] - ai[r[i]];
2050     ajtmp = aj + ai[r[i]];
2051     v     = aa + ai[r[i]];
2052     for (j=0; j<nz; j++) {
2053       rtmp[ics[*ajtmp++]] = v[j];
2054       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
2055     }
2056     /* printf("\n"); */
2057 
2058     /* numerical factorization */
2059     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
2060     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
2061     k = 0;
2062     while (k < nzl){
2063       row   = *bjtmp++;
2064       /* printf("  prow %d\n",row); */
2065       pc         = rtmp + row;
2066       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
2067       multiplier = (*pc) * (*pv);
2068       *pc        = multiplier;
2069       if (PetscAbsScalar(multiplier) > dt){
2070         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
2071         pv         = b->a + bdiag[row+1] + 1;
2072         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
2073         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
2074         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
2075       }
2076       k++;
2077     }
2078 
2079     /* finished row so stick it into b->a */
2080     /* L-part */
2081     pv = b->a + bi[i] ;
2082     pj = bj + bi[i] ;
2083     nzl = bi[i+1] - bi[i];
2084     for (j=0; j<nzl; j++) {
2085       pv[j] = rtmp[pj[j]];
2086       /* printf(" (%d,%g),",pj[j],pv[j]); */
2087     }
2088 
2089     /* diagonal: invert diagonal entries for simplier triangular solves */
2090     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
2091     b->a[bdiag[i]] = 1.0/rtmp[i];
2092     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
2093 
2094     /* U-part */
2095     pv = b->a + bdiag[i+1] + 1;
2096     pj = bj + bdiag[i+1] + 1;
2097     nzu = bdiag[i] - bdiag[i+1] - 1;
2098     for (j=0; j<nzu; j++) {
2099       pv[j] = rtmp[pj[j]];
2100       /* printf(" (%d,%g),",pj[j],pv[j]); */
2101     }
2102     /* printf("\n"); */
2103   }
2104 
2105   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2106   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2107   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2108 
2109   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2110   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
2111   if (row_identity && col_identity) {
2112     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_iludt;
2113   } else {
2114     C->ops->solve   = MatSolve_SeqAIJ_iludt;
2115   }
2116   C->ops->solveadd           = 0;
2117   C->ops->solvetranspose     = 0;
2118   C->ops->solvetransposeadd  = 0;
2119   C->ops->matsolve           = 0;
2120   C->assembled    = PETSC_TRUE;
2121   C->preallocated = PETSC_TRUE;
2122   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125