xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision d25a37e130ea49ebd07f0cfbda24aaa07a3ed83a)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115 #if defined(PETSC_USE_COMPLEX)
116   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
117 #endif
118   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
119   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
120   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
121     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
122     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
123     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
124     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
125   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
126     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
127     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
131   (*B)->factortype = ftype;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
138 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
139 {
140   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
141   IS                 isicol;
142   PetscErrorCode     ierr;
143   const PetscInt     *r,*ic;
144   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
145   PetscInt           *bi,*bj,*ajtmp;
146   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
147   PetscReal          f;
148   PetscInt           nlnk,*lnk,k,**bi_ptr;
149   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
150   PetscBT            lnkbt;
151 
152   PetscFunctionBegin;
153   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
154   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
155   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
156   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
157 
158   /* get new row pointers */
159   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
160   bi[0] = 0;
161 
162   /* bdiag is location of diagonal in factor */
163   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
164   bdiag[0] = 0;
165 
166   /* linked list for storing column indices of the active row */
167   nlnk = n + 1;
168   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
169 
170   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
171 
172   /* initial FreeSpace size is f*(ai[n]+1) */
173   f = info->fill;
174   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
175   current_space = free_space;
176 
177   for (i=0; i<n; i++) {
178     /* copy previous fill into linked list */
179     nzi = 0;
180     nnz = ai[r[i]+1] - ai[r[i]];
181     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
182     ajtmp = aj + ai[r[i]];
183     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
184     nzi += nlnk;
185 
186     /* add pivot rows into linked list */
187     row = lnk[n];
188     while (row < i) {
189       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
190       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
191       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
192       nzi += nlnk;
193       row  = lnk[row];
194     }
195     bi[i+1] = bi[i] + nzi;
196     im[i]   = nzi;
197 
198     /* mark bdiag */
199     nzbd = 0;
200     nnz  = nzi;
201     k    = lnk[n];
202     while (nnz-- && k < i){
203       nzbd++;
204       k = lnk[k];
205     }
206     bdiag[i] = bi[i] + nzbd;
207 
208     /* if free space is not available, make more free space */
209     if (current_space->local_remaining<nzi) {
210       nnz = (n - i)*nzi; /* estimated and max additional space needed */
211       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
212       reallocs++;
213     }
214 
215     /* copy data into free space, then initialize lnk */
216     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
217     bi_ptr[i] = current_space->array;
218     current_space->array           += nzi;
219     current_space->local_used      += nzi;
220     current_space->local_remaining -= nzi;
221   }
222 #if defined(PETSC_USE_INFO)
223   if (ai[n] != 0) {
224     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
225     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
226     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
227     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
228     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
229   } else {
230     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
231   }
232 #endif
233 
234   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
235   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
236 
237   /* destroy list of free space and other temporary array(s) */
238   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
239   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
240   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
241   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
242 
243   /* put together the new matrix */
244   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
245   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
246   b    = (Mat_SeqAIJ*)(B)->data;
247   b->free_a       = PETSC_TRUE;
248   b->free_ij      = PETSC_TRUE;
249   b->singlemalloc = PETSC_FALSE;
250   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
251   b->j          = bj;
252   b->i          = bi;
253   b->diag       = bdiag;
254   b->ilen       = 0;
255   b->imax       = 0;
256   b->row        = isrow;
257   b->col        = iscol;
258   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
259   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
260   b->icol       = isicol;
261   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
262 
263   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
264   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
265   b->maxnz = b->nz = bi[n] ;
266 
267   (B)->factortype            = MAT_FACTOR_LU;
268   (B)->info.factor_mallocs   = reallocs;
269   (B)->info.fill_ratio_given = f;
270 
271   if (ai[n]) {
272     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
273   } else {
274     (B)->info.fill_ratio_needed = 0.0;
275   }
276   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
277   if (a->inode.size) {
278     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
285 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
286 {
287   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
288   IS                 isicol;
289   PetscErrorCode     ierr;
290   const PetscInt     *r,*ic;
291   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
292   PetscInt           *bi,*bj,*ajtmp;
293   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
294   PetscReal          f;
295   PetscInt           nlnk,*lnk,k,**bi_ptr;
296   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
297   PetscBT            lnkbt;
298 
299   PetscFunctionBegin;
300   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
301   /*
302   PetscBool          olddatastruct=PETSC_FALSE;
303   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
304   if(olddatastruct){
305     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
306     PetscFunctionReturn(0);
307   }
308   */
309   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
310   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
311   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
312   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
313 
314   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
315   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
316   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
317   bi[0] = bdiag[0] = 0;
318 
319   /* linked list for storing column indices of the active row */
320   nlnk = n + 1;
321   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
322 
323   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
324 
325   /* initial FreeSpace size is f*(ai[n]+1) */
326   f = info->fill;
327   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
328   current_space = free_space;
329 
330   for (i=0; i<n; i++) {
331     /* copy previous fill into linked list */
332     nzi = 0;
333     nnz = ai[r[i]+1] - ai[r[i]];
334     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
335     ajtmp = aj + ai[r[i]];
336     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
337     nzi += nlnk;
338 
339     /* add pivot rows into linked list */
340     row = lnk[n];
341     while (row < i){
342       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
343       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
344       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
345       nzi  += nlnk;
346       row   = lnk[row];
347     }
348     bi[i+1] = bi[i] + nzi;
349     im[i]   = nzi;
350 
351     /* mark bdiag */
352     nzbd = 0;
353     nnz  = nzi;
354     k    = lnk[n];
355     while (nnz-- && k < i){
356       nzbd++;
357       k = lnk[k];
358     }
359     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
360 
361     /* if free space is not available, make more free space */
362     if (current_space->local_remaining<nzi) {
363       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
364       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
365       reallocs++;
366     }
367 
368     /* copy data into free space, then initialize lnk */
369     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
370     bi_ptr[i] = current_space->array;
371     current_space->array           += nzi;
372     current_space->local_used      += nzi;
373     current_space->local_remaining -= nzi;
374   }
375 
376   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
377   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
378 
379   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
380   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
381   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
382   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
383   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
384 
385   /* put together the new matrix */
386   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
387   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
388   b    = (Mat_SeqAIJ*)(B)->data;
389   b->free_a       = PETSC_TRUE;
390   b->free_ij      = PETSC_TRUE;
391   b->singlemalloc = PETSC_FALSE;
392   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
393   b->j          = bj;
394   b->i          = bi;
395   b->diag       = bdiag;
396   b->ilen       = 0;
397   b->imax       = 0;
398   b->row        = isrow;
399   b->col        = iscol;
400   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
401   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
402   b->icol       = isicol;
403   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
404 
405   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
406   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
407   b->maxnz = b->nz = bdiag[0]+1;
408   B->factortype            = MAT_FACTOR_LU;
409   B->info.factor_mallocs   = reallocs;
410   B->info.fill_ratio_given = f;
411 
412   if (ai[n]) {
413     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
414   } else {
415     B->info.fill_ratio_needed = 0.0;
416   }
417 #if defined(PETSC_USE_INFO)
418   if (ai[n] != 0) {
419     PetscReal af = B->info.fill_ratio_needed;
420     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
421     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
422     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
423     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
424   } else {
425     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
426   }
427 #endif
428   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
429   if (a->inode.size) {
430     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
431   }
432   PetscFunctionReturn(0);
433 }
434 
435 /*
436     Trouble in factorization, should we dump the original matrix?
437 */
438 #undef __FUNCT__
439 #define __FUNCT__ "MatFactorDumpMatrix"
440 PetscErrorCode MatFactorDumpMatrix(Mat A)
441 {
442   PetscErrorCode ierr;
443   PetscBool      flg = PETSC_FALSE;
444 
445   PetscFunctionBegin;
446   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
447   if (flg) {
448     PetscViewer viewer;
449     char        filename[PETSC_MAX_PATH_LEN];
450 
451     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
452     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
453     ierr = MatView(A,viewer);CHKERRQ(ierr);
454     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
461 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
462 {
463   Mat              C=B;
464   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
465   IS               isrow = b->row,isicol = b->icol;
466   PetscErrorCode   ierr;
467   const PetscInt   *r,*ic,*ics;
468   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
469   PetscInt         i,j,k,nz,nzL,row,*pj;
470   const PetscInt   *ajtmp,*bjtmp;
471   MatScalar        *rtmp,*pc,multiplier,*pv;
472   const  MatScalar *aa=a->a,*v;
473   PetscBool        row_identity,col_identity;
474   FactorShiftCtx   sctx;
475   const PetscInt   *ddiag;
476   PetscReal        rs;
477   MatScalar        d;
478 
479   PetscFunctionBegin;
480   /* MatPivotSetUp(): initialize shift context sctx */
481   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
482 
483   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
484     ddiag          = a->diag;
485     sctx.shift_top = info->zeropivot;
486     for (i=0; i<n; i++) {
487       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
488       d  = (aa)[ddiag[i]];
489       rs = -PetscAbsScalar(d) - PetscRealPart(d);
490       v  = aa+ai[i];
491       nz = ai[i+1] - ai[i];
492       for (j=0; j<nz; j++)
493 	rs += PetscAbsScalar(v[j]);
494       if (rs>sctx.shift_top) sctx.shift_top = rs;
495     }
496     sctx.shift_top   *= 1.1;
497     sctx.nshift_max   = 5;
498     sctx.shift_lo     = 0.;
499     sctx.shift_hi     = 1.;
500   }
501 
502   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
503   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
504   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
505   ics  = ic;
506 
507   do {
508     sctx.newshift = PETSC_FALSE;
509     for (i=0; i<n; i++){
510       /* zero rtmp */
511       /* L part */
512       nz    = bi[i+1] - bi[i];
513       bjtmp = bj + bi[i];
514       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515 
516       /* U part */
517       nz = bdiag[i]-bdiag[i+1];
518       bjtmp = bj + bdiag[i+1]+1;
519       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
520 
521       /* load in initial (unfactored row) */
522       nz    = ai[r[i]+1] - ai[r[i]];
523       ajtmp = aj + ai[r[i]];
524       v     = aa + ai[r[i]];
525       for (j=0; j<nz; j++) {
526         rtmp[ics[ajtmp[j]]] = v[j];
527       }
528       /* ZeropivotApply() */
529       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
530 
531       /* elimination */
532       bjtmp = bj + bi[i];
533       row   = *bjtmp++;
534       nzL   = bi[i+1] - bi[i];
535       for(k=0; k < nzL;k++) {
536         pc = rtmp + row;
537         if (*pc != 0.0) {
538           pv         = b->a + bdiag[row];
539           multiplier = *pc * (*pv);
540           *pc        = multiplier;
541           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
542 	  pv = b->a + bdiag[row+1]+1;
543 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
544           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
546         }
547         row = *bjtmp++;
548       }
549 
550       /* finished row so stick it into b->a */
551       rs = 0.0;
552       /* L part */
553       pv   = b->a + bi[i] ;
554       pj   = b->j + bi[i] ;
555       nz   = bi[i+1] - bi[i];
556       for (j=0; j<nz; j++) {
557         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
558       }
559 
560       /* U part */
561       pv = b->a + bdiag[i+1]+1;
562       pj = b->j + bdiag[i+1]+1;
563       nz = bdiag[i] - bdiag[i+1]-1;
564       for (j=0; j<nz; j++) {
565         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566       }
567 
568       sctx.rs  = rs;
569       sctx.pv  = rtmp[i];
570       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
571       if(sctx.newshift) break; /* break for-loop */
572       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
573 
574       /* Mark diagonal and invert diagonal for simplier triangular solves */
575       pv  = b->a + bdiag[i];
576       *pv = 1.0/rtmp[i];
577 
578     } /* endof for (i=0; i<n; i++){ */
579 
580     /* MatPivotRefine() */
581     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
582       /*
583        * if no shift in this attempt & shifting & started shifting & can refine,
584        * then try lower shift
585        */
586       sctx.shift_hi       = sctx.shift_fraction;
587       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
588       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
589       sctx.newshift       = PETSC_TRUE;
590       sctx.nshift++;
591     }
592   } while (sctx.newshift);
593 
594   ierr = PetscFree(rtmp);CHKERRQ(ierr);
595   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
596   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
597 
598   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
599   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
600   if (row_identity && col_identity) {
601     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
602   } else {
603     C->ops->solve = MatSolve_SeqAIJ;
604   }
605   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
606   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
607   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
608   C->ops->matsolve           = MatMatSolve_SeqAIJ;
609   C->assembled    = PETSC_TRUE;
610   C->preallocated = PETSC_TRUE;
611   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
612 
613   /* MatShiftView(A,info,&sctx) */
614   if (sctx.nshift){
615     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
616       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
617     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
618       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
619     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
620       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
621     }
622   }
623   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
629 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
630 {
631   Mat             C=B;
632   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
633   IS              isrow = b->row,isicol = b->icol;
634   PetscErrorCode  ierr;
635   const PetscInt   *r,*ic,*ics;
636   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
637   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
638   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
639   MatScalar       *pv,*rtmp,*pc,multiplier,d;
640   const MatScalar *v,*aa=a->a;
641   PetscReal       rs=0.0;
642   FactorShiftCtx  sctx;
643   const PetscInt  *ddiag;
644   PetscBool       row_identity, col_identity;
645 
646   PetscFunctionBegin;
647   /* MatPivotSetUp(): initialize shift context sctx */
648   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
649 
650   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
651     ddiag          = a->diag;
652     sctx.shift_top = info->zeropivot;
653     for (i=0; i<n; i++) {
654       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
655       d  = (aa)[ddiag[i]];
656       rs = -PetscAbsScalar(d) - PetscRealPart(d);
657       v  = aa+ai[i];
658       nz = ai[i+1] - ai[i];
659       for (j=0; j<nz; j++)
660 	rs += PetscAbsScalar(v[j]);
661       if (rs>sctx.shift_top) sctx.shift_top = rs;
662     }
663     sctx.shift_top   *= 1.1;
664     sctx.nshift_max   = 5;
665     sctx.shift_lo     = 0.;
666     sctx.shift_hi     = 1.;
667   }
668 
669   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
670   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
671   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
672   ics  = ic;
673 
674   do {
675     sctx.newshift = PETSC_FALSE;
676     for (i=0; i<n; i++){
677       nz    = bi[i+1] - bi[i];
678       bjtmp = bj + bi[i];
679       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
680 
681       /* load in initial (unfactored row) */
682       nz    = ai[r[i]+1] - ai[r[i]];
683       ajtmp = aj + ai[r[i]];
684       v     = aa + ai[r[i]];
685       for (j=0; j<nz; j++) {
686         rtmp[ics[ajtmp[j]]] = v[j];
687       }
688       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
689 
690       row = *bjtmp++;
691       while  (row < i) {
692         pc = rtmp + row;
693         if (*pc != 0.0) {
694           pv         = b->a + diag_offset[row];
695           pj         = b->j + diag_offset[row] + 1;
696           multiplier = *pc / *pv++;
697           *pc        = multiplier;
698           nz         = bi[row+1] - diag_offset[row] - 1;
699           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
700           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
701         }
702         row = *bjtmp++;
703       }
704       /* finished row so stick it into b->a */
705       pv   = b->a + bi[i] ;
706       pj   = b->j + bi[i] ;
707       nz   = bi[i+1] - bi[i];
708       diag = diag_offset[i] - bi[i];
709       rs   = 0.0;
710       for (j=0; j<nz; j++) {
711         pv[j] = rtmp[pj[j]];
712         rs   += PetscAbsScalar(pv[j]);
713       }
714       rs   -= PetscAbsScalar(pv[diag]);
715 
716       sctx.rs  = rs;
717       sctx.pv  = pv[diag];
718       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
719       if (sctx.newshift) break;
720       pv[diag] = sctx.pv;
721     }
722 
723     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
724       /*
725        * if no shift in this attempt & shifting & started shifting & can refine,
726        * then try lower shift
727        */
728       sctx.shift_hi       = sctx.shift_fraction;
729       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
730       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
731       sctx.newshift       = PETSC_TRUE;
732       sctx.nshift++;
733     }
734   } while (sctx.newshift);
735 
736   /* invert diagonal entries for simplier triangular solves */
737   for (i=0; i<n; i++) {
738     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
739   }
740   ierr = PetscFree(rtmp);CHKERRQ(ierr);
741   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
742   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
743 
744   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
745   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
746   if (row_identity && col_identity) {
747     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
748   } else {
749     C->ops->solve   = MatSolve_SeqAIJ_inplace;
750   }
751   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
752   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
753   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
754   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
755   C->assembled    = PETSC_TRUE;
756   C->preallocated = PETSC_TRUE;
757   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
758   if (sctx.nshift){
759      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
760       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
761     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
762       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
763     }
764   }
765   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
766   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
767   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 /*
772    This routine implements inplace ILU(0) with row or/and column permutations.
773    Input:
774      A - original matrix
775    Output;
776      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
777          a->j (col index) is permuted by the inverse of colperm, then sorted
778          a->a reordered accordingly with a->j
779          a->diag (ptr to diagonal elements) is updated.
780 */
781 #undef __FUNCT__
782 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
784 {
785   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
786   IS             isrow = a->row,isicol = a->icol;
787   PetscErrorCode ierr;
788   const PetscInt *r,*ic,*ics;
789   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
790   PetscInt       *ajtmp,nz,row;
791   PetscInt       *diag = a->diag,nbdiag,*pj;
792   PetscScalar    *rtmp,*pc,multiplier,d;
793   MatScalar      *pv,*v;
794   PetscReal      rs;
795   FactorShiftCtx sctx;
796   const  MatScalar *aa=a->a,*vtmp;
797 
798   PetscFunctionBegin;
799   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
800 
801   /* MatPivotSetUp(): initialize shift context sctx */
802   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
803 
804   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
805     const PetscInt   *ddiag = a->diag;
806     sctx.shift_top = info->zeropivot;
807     for (i=0; i<n; i++) {
808       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
809       d  = (aa)[ddiag[i]];
810       rs = -PetscAbsScalar(d) - PetscRealPart(d);
811       vtmp  = aa+ai[i];
812       nz = ai[i+1] - ai[i];
813       for (j=0; j<nz; j++)
814 	rs += PetscAbsScalar(vtmp[j]);
815       if (rs>sctx.shift_top) sctx.shift_top = rs;
816     }
817     sctx.shift_top   *= 1.1;
818     sctx.nshift_max   = 5;
819     sctx.shift_lo     = 0.;
820     sctx.shift_hi     = 1.;
821   }
822 
823   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
824   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
825   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
826   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
827   ics = ic;
828 
829 #if defined(MV)
830   sctx.shift_top      = 0.;
831   sctx.nshift_max     = 0;
832   sctx.shift_lo       = 0.;
833   sctx.shift_hi       = 0.;
834   sctx.shift_fraction = 0.;
835 
836   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837     sctx.shift_top = 0.;
838     for (i=0; i<n; i++) {
839       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840       d  = (a->a)[diag[i]];
841       rs = -PetscAbsScalar(d) - PetscRealPart(d);
842       v  = a->a+ai[i];
843       nz = ai[i+1] - ai[i];
844       for (j=0; j<nz; j++)
845 	rs += PetscAbsScalar(v[j]);
846       if (rs>sctx.shift_top) sctx.shift_top = rs;
847     }
848     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
849     sctx.shift_top    *= 1.1;
850     sctx.nshift_max   = 5;
851     sctx.shift_lo     = 0.;
852     sctx.shift_hi     = 1.;
853   }
854 
855   sctx.shift_amount = 0.;
856   sctx.nshift       = 0;
857 #endif
858 
859   do {
860     sctx.newshift = PETSC_FALSE;
861     for (i=0; i<n; i++){
862       /* load in initial unfactored row */
863       nz    = ai[r[i]+1] - ai[r[i]];
864       ajtmp = aj + ai[r[i]];
865       v     = a->a + ai[r[i]];
866       /* sort permuted ajtmp and values v accordingly */
867       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
868       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
869 
870       diag[r[i]] = ai[r[i]];
871       for (j=0; j<nz; j++) {
872         rtmp[ajtmp[j]] = v[j];
873         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
874       }
875       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
876 
877       row = *ajtmp++;
878       while  (row < i) {
879         pc = rtmp + row;
880         if (*pc != 0.0) {
881           pv         = a->a + diag[r[row]];
882           pj         = aj + diag[r[row]] + 1;
883 
884           multiplier = *pc / *pv++;
885           *pc        = multiplier;
886           nz         = ai[r[row]+1] - diag[r[row]] - 1;
887           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
888           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
889         }
890         row = *ajtmp++;
891       }
892       /* finished row so overwrite it onto a->a */
893       pv   = a->a + ai[r[i]] ;
894       pj   = aj + ai[r[i]] ;
895       nz   = ai[r[i]+1] - ai[r[i]];
896       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
897 
898       rs   = 0.0;
899       for (j=0; j<nz; j++) {
900         pv[j] = rtmp[pj[j]];
901         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
902       }
903 
904       sctx.rs  = rs;
905       sctx.pv  = pv[nbdiag];
906       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
907       if (sctx.newshift) break;
908       pv[nbdiag] = sctx.pv;
909     }
910 
911     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
912       /*
913        * if no shift in this attempt & shifting & started shifting & can refine,
914        * then try lower shift
915        */
916       sctx.shift_hi        = sctx.shift_fraction;
917       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
918       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
919       sctx.newshift         = PETSC_TRUE;
920       sctx.nshift++;
921     }
922   } while (sctx.newshift);
923 
924   /* invert diagonal entries for simplier triangular solves */
925   for (i=0; i<n; i++) {
926     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
927   }
928 
929   ierr = PetscFree(rtmp);CHKERRQ(ierr);
930   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
931   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
932   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
933   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
934   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
935   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
936   A->assembled = PETSC_TRUE;
937   A->preallocated = PETSC_TRUE;
938   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
939   if (sctx.nshift){
940     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
941       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
942     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
943       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
944     }
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 /* ----------------------------------------------------------- */
950 #undef __FUNCT__
951 #define __FUNCT__ "MatLUFactor_SeqAIJ"
952 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
953 {
954   PetscErrorCode ierr;
955   Mat            C;
956 
957   PetscFunctionBegin;
958   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
959   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
960   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
961   A->ops->solve            = C->ops->solve;
962   A->ops->solvetranspose   = C->ops->solvetranspose;
963   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
964   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 /* ----------------------------------------------------------- */
968 
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
972 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
973 {
974   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
975   IS                iscol = a->col,isrow = a->row;
976   PetscErrorCode    ierr;
977   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
978   PetscInt          nz;
979   const PetscInt    *rout,*cout,*r,*c;
980   PetscScalar       *x,*tmp,*tmps,sum;
981   const PetscScalar *b;
982   const MatScalar   *aa = a->a,*v;
983 
984   PetscFunctionBegin;
985   if (!n) PetscFunctionReturn(0);
986 
987   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
988   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
989   tmp  = a->solve_work;
990 
991   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
992   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
993 
994   /* forward solve the lower triangular */
995   tmp[0] = b[*r++];
996   tmps   = tmp;
997   for (i=1; i<n; i++) {
998     v   = aa + ai[i] ;
999     vi  = aj + ai[i] ;
1000     nz  = a->diag[i] - ai[i];
1001     sum = b[*r++];
1002     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1003     tmp[i] = sum;
1004   }
1005 
1006   /* backward solve the upper triangular */
1007   for (i=n-1; i>=0; i--){
1008     v   = aa + a->diag[i] + 1;
1009     vi  = aj + a->diag[i] + 1;
1010     nz  = ai[i+1] - a->diag[i] - 1;
1011     sum = tmp[i];
1012     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1013     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1014   }
1015 
1016   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1017   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1018   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1019   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1020   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1026 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1027 {
1028   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1029   IS              iscol = a->col,isrow = a->row;
1030   PetscErrorCode  ierr;
1031   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1032   PetscInt        nz,neq;
1033   const PetscInt  *rout,*cout,*r,*c;
1034   PetscScalar     *x,*b,*tmp,*tmps,sum;
1035   const MatScalar *aa = a->a,*v;
1036   PetscBool       bisdense,xisdense;
1037 
1038   PetscFunctionBegin;
1039   if (!n) PetscFunctionReturn(0);
1040 
1041   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1042   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1043   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1044   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1045 
1046   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1047   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1048 
1049   tmp  = a->solve_work;
1050   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1051   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1052 
1053   for (neq=0; neq<B->cmap->n; neq++){
1054     /* forward solve the lower triangular */
1055     tmp[0] = b[r[0]];
1056     tmps   = tmp;
1057     for (i=1; i<n; i++) {
1058       v   = aa + ai[i] ;
1059       vi  = aj + ai[i] ;
1060       nz  = a->diag[i] - ai[i];
1061       sum = b[r[i]];
1062       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1063       tmp[i] = sum;
1064     }
1065     /* backward solve the upper triangular */
1066     for (i=n-1; i>=0; i--){
1067       v   = aa + a->diag[i] + 1;
1068       vi  = aj + a->diag[i] + 1;
1069       nz  = ai[i+1] - a->diag[i] - 1;
1070       sum = tmp[i];
1071       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1072       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1073     }
1074 
1075     b += n;
1076     x += n;
1077   }
1078   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1079   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1080   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1081   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1082   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1088 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1089 {
1090   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1091   IS              iscol = a->col,isrow = a->row;
1092   PetscErrorCode  ierr;
1093   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1094   PetscInt        nz,neq;
1095   const PetscInt  *rout,*cout,*r,*c;
1096   PetscScalar     *x,*b,*tmp,sum;
1097   const MatScalar *aa = a->a,*v;
1098   PetscBool       bisdense,xisdense;
1099 
1100   PetscFunctionBegin;
1101   if (!n) PetscFunctionReturn(0);
1102 
1103   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1104   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1105   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1106   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1107 
1108   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1109   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1110 
1111   tmp  = a->solve_work;
1112   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1113   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1114 
1115   for (neq=0; neq<B->cmap->n; neq++){
1116     /* forward solve the lower triangular */
1117     tmp[0] = b[r[0]];
1118     v      = aa;
1119     vi     = aj;
1120     for (i=1; i<n; i++) {
1121       nz  = ai[i+1] - ai[i];
1122       sum = b[r[i]];
1123       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1124       tmp[i] = sum;
1125       v += nz; vi += nz;
1126     }
1127 
1128     /* backward solve the upper triangular */
1129     for (i=n-1; i>=0; i--){
1130       v   = aa + adiag[i+1]+1;
1131       vi  = aj + adiag[i+1]+1;
1132       nz  = adiag[i]-adiag[i+1]-1;
1133       sum = tmp[i];
1134       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1135       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1136     }
1137 
1138     b += n;
1139     x += n;
1140   }
1141   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1142   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1143   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1144   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1145   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1151 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1152 {
1153   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1154   IS              iscol = a->col,isrow = a->row;
1155   PetscErrorCode  ierr;
1156   const PetscInt  *r,*c,*rout,*cout;
1157   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1158   PetscInt        nz,row;
1159   PetscScalar     *x,*b,*tmp,*tmps,sum;
1160   const MatScalar *aa = a->a,*v;
1161 
1162   PetscFunctionBegin;
1163   if (!n) PetscFunctionReturn(0);
1164 
1165   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1166   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1167   tmp  = a->solve_work;
1168 
1169   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1170   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1171 
1172   /* forward solve the lower triangular */
1173   tmp[0] = b[*r++];
1174   tmps   = tmp;
1175   for (row=1; row<n; row++) {
1176     i   = rout[row]; /* permuted row */
1177     v   = aa + ai[i] ;
1178     vi  = aj + ai[i] ;
1179     nz  = a->diag[i] - ai[i];
1180     sum = b[*r++];
1181     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1182     tmp[row] = sum;
1183   }
1184 
1185   /* backward solve the upper triangular */
1186   for (row=n-1; row>=0; row--){
1187     i   = rout[row]; /* permuted row */
1188     v   = aa + a->diag[i] + 1;
1189     vi  = aj + a->diag[i] + 1;
1190     nz  = ai[i+1] - a->diag[i] - 1;
1191     sum = tmp[row];
1192     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1193     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1194   }
1195 
1196   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1197   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1198   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1200   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 /* ----------------------------------------------------------- */
1205 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1208 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1209 {
1210   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1211   PetscErrorCode    ierr;
1212   PetscInt          n = A->rmap->n;
1213   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1214   PetscScalar       *x;
1215   const PetscScalar *b;
1216   const MatScalar   *aa = a->a;
1217 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1218   PetscInt          adiag_i,i,nz,ai_i;
1219   const PetscInt    *vi;
1220   const MatScalar   *v;
1221   PetscScalar       sum;
1222 #endif
1223 
1224   PetscFunctionBegin;
1225   if (!n) PetscFunctionReturn(0);
1226 
1227   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1228   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1229 
1230 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1231   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1232 #else
1233   /* forward solve the lower triangular */
1234   x[0] = b[0];
1235   for (i=1; i<n; i++) {
1236     ai_i = ai[i];
1237     v    = aa + ai_i;
1238     vi   = aj + ai_i;
1239     nz   = adiag[i] - ai_i;
1240     sum  = b[i];
1241     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1242     x[i] = sum;
1243   }
1244 
1245   /* backward solve the upper triangular */
1246   for (i=n-1; i>=0; i--){
1247     adiag_i = adiag[i];
1248     v       = aa + adiag_i + 1;
1249     vi      = aj + adiag_i + 1;
1250     nz      = ai[i+1] - adiag_i - 1;
1251     sum     = x[i];
1252     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1253     x[i]    = sum*aa[adiag_i];
1254   }
1255 #endif
1256   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1257   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1258   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1264 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1265 {
1266   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1267   IS                iscol = a->col,isrow = a->row;
1268   PetscErrorCode    ierr;
1269   PetscInt          i, n = A->rmap->n,j;
1270   PetscInt          nz;
1271   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1272   PetscScalar       *x,*tmp,sum;
1273   const PetscScalar *b;
1274   const MatScalar   *aa = a->a,*v;
1275 
1276   PetscFunctionBegin;
1277   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1278 
1279   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1280   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1281   tmp  = a->solve_work;
1282 
1283   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1284   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1285 
1286   /* forward solve the lower triangular */
1287   tmp[0] = b[*r++];
1288   for (i=1; i<n; i++) {
1289     v   = aa + ai[i] ;
1290     vi  = aj + ai[i] ;
1291     nz  = a->diag[i] - ai[i];
1292     sum = b[*r++];
1293     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1294     tmp[i] = sum;
1295   }
1296 
1297   /* backward solve the upper triangular */
1298   for (i=n-1; i>=0; i--){
1299     v   = aa + a->diag[i] + 1;
1300     vi  = aj + a->diag[i] + 1;
1301     nz  = ai[i+1] - a->diag[i] - 1;
1302     sum = tmp[i];
1303     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1304     tmp[i] = sum*aa[a->diag[i]];
1305     x[*c--] += tmp[i];
1306   }
1307 
1308   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1309   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1311   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1312   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1313 
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1319 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1320 {
1321   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1322   IS                iscol = a->col,isrow = a->row;
1323   PetscErrorCode    ierr;
1324   PetscInt          i, n = A->rmap->n,j;
1325   PetscInt          nz;
1326   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1327   PetscScalar       *x,*tmp,sum;
1328   const PetscScalar *b;
1329   const MatScalar   *aa = a->a,*v;
1330 
1331   PetscFunctionBegin;
1332   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1333 
1334   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1335   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1336   tmp  = a->solve_work;
1337 
1338   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1339   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1340 
1341   /* forward solve the lower triangular */
1342   tmp[0] = b[r[0]];
1343   v      = aa;
1344   vi     = aj;
1345   for (i=1; i<n; i++) {
1346     nz  = ai[i+1] - ai[i];
1347     sum = b[r[i]];
1348     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1349     tmp[i] = sum;
1350     v += nz; vi += nz;
1351   }
1352 
1353   /* backward solve the upper triangular */
1354   v  = aa + adiag[n-1];
1355   vi = aj + adiag[n-1];
1356   for (i=n-1; i>=0; i--){
1357     nz  = adiag[i] - adiag[i+1] - 1;
1358     sum = tmp[i];
1359     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1360     tmp[i] = sum*v[nz];
1361     x[c[i]] += tmp[i];
1362     v += nz+1; vi += nz+1;
1363   }
1364 
1365   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1366   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1367   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1369   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1370 
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1376 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1377 {
1378   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1379   IS                iscol = a->col,isrow = a->row;
1380   PetscErrorCode    ierr;
1381   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1382   PetscInt          i,n = A->rmap->n,j;
1383   PetscInt          nz;
1384   PetscScalar       *x,*tmp,s1;
1385   const MatScalar   *aa = a->a,*v;
1386   const PetscScalar *b;
1387 
1388   PetscFunctionBegin;
1389   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1390   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1391   tmp  = a->solve_work;
1392 
1393   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1394   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1395 
1396   /* copy the b into temp work space according to permutation */
1397   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1398 
1399   /* forward solve the U^T */
1400   for (i=0; i<n; i++) {
1401     v   = aa + diag[i] ;
1402     vi  = aj + diag[i] + 1;
1403     nz  = ai[i+1] - diag[i] - 1;
1404     s1  = tmp[i];
1405     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1406     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1407     tmp[i] = s1;
1408   }
1409 
1410   /* backward solve the L^T */
1411   for (i=n-1; i>=0; i--){
1412     v   = aa + diag[i] - 1 ;
1413     vi  = aj + diag[i] - 1 ;
1414     nz  = diag[i] - ai[i];
1415     s1  = tmp[i];
1416     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1417   }
1418 
1419   /* copy tmp into x according to permutation */
1420   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1421 
1422   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1423   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1424   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1425   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1426 
1427   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1433 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1434 {
1435   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1436   IS                iscol = a->col,isrow = a->row;
1437   PetscErrorCode    ierr;
1438   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1439   PetscInt          i,n = A->rmap->n,j;
1440   PetscInt          nz;
1441   PetscScalar       *x,*tmp,s1;
1442   const MatScalar   *aa = a->a,*v;
1443   const PetscScalar *b;
1444 
1445   PetscFunctionBegin;
1446   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1447   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1448   tmp  = a->solve_work;
1449 
1450   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1451   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1452 
1453   /* copy the b into temp work space according to permutation */
1454   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1455 
1456   /* forward solve the U^T */
1457   for (i=0; i<n; i++) {
1458     v   = aa + adiag[i+1] + 1;
1459     vi  = aj + adiag[i+1] + 1;
1460     nz  = adiag[i] - adiag[i+1] - 1;
1461     s1  = tmp[i];
1462     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1463     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1464     tmp[i] = s1;
1465   }
1466 
1467   /* backward solve the L^T */
1468   for (i=n-1; i>=0; i--){
1469     v   = aa + ai[i];
1470     vi  = aj + ai[i];
1471     nz  = ai[i+1] - ai[i];
1472     s1  = tmp[i];
1473     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1474   }
1475 
1476   /* copy tmp into x according to permutation */
1477   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1478 
1479   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1480   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1481   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1482   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1483 
1484   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1490 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1491 {
1492   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1493   IS                iscol = a->col,isrow = a->row;
1494   PetscErrorCode    ierr;
1495   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1496   PetscInt          i,n = A->rmap->n,j;
1497   PetscInt          nz;
1498   PetscScalar       *x,*tmp,s1;
1499   const MatScalar   *aa = a->a,*v;
1500   const PetscScalar *b;
1501 
1502   PetscFunctionBegin;
1503   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1504   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1505   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1506   tmp  = a->solve_work;
1507 
1508   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1509   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1510 
1511   /* copy the b into temp work space according to permutation */
1512   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1513 
1514   /* forward solve the U^T */
1515   for (i=0; i<n; i++) {
1516     v   = aa + diag[i] ;
1517     vi  = aj + diag[i] + 1;
1518     nz  = ai[i+1] - diag[i] - 1;
1519     s1  = tmp[i];
1520     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1521     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1522     tmp[i] = s1;
1523   }
1524 
1525   /* backward solve the L^T */
1526   for (i=n-1; i>=0; i--){
1527     v   = aa + diag[i] - 1 ;
1528     vi  = aj + diag[i] - 1 ;
1529     nz  = diag[i] - ai[i];
1530     s1  = tmp[i];
1531     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1532   }
1533 
1534   /* copy tmp into x according to permutation */
1535   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1536 
1537   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1538   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1539   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1541 
1542   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1548 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1549 {
1550   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1551   IS                iscol = a->col,isrow = a->row;
1552   PetscErrorCode    ierr;
1553   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1554   PetscInt          i,n = A->rmap->n,j;
1555   PetscInt          nz;
1556   PetscScalar       *x,*tmp,s1;
1557   const MatScalar   *aa = a->a,*v;
1558   const PetscScalar *b;
1559 
1560   PetscFunctionBegin;
1561   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1562   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1563   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1564   tmp  = a->solve_work;
1565 
1566   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1567   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1568 
1569   /* copy the b into temp work space according to permutation */
1570   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1571 
1572   /* forward solve the U^T */
1573   for (i=0; i<n; i++) {
1574     v   = aa + adiag[i+1] + 1;
1575     vi  = aj + adiag[i+1] + 1;
1576     nz  = adiag[i] - adiag[i+1] - 1;
1577     s1  = tmp[i];
1578     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1579     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1580     tmp[i] = s1;
1581   }
1582 
1583 
1584   /* backward solve the L^T */
1585   for (i=n-1; i>=0; i--){
1586     v   = aa + ai[i] ;
1587     vi  = aj + ai[i];
1588     nz  = ai[i+1] - ai[i];
1589     s1  = tmp[i];
1590     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1591   }
1592 
1593   /* copy tmp into x according to permutation */
1594   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1595 
1596   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1597   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1598   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1599   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1600 
1601   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 /* ----------------------------------------------------------------*/
1606 
1607 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1608 
1609 /*
1610    ilu() under revised new data structure.
1611    Factored arrays bj and ba are stored as
1612      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1613 
1614    bi=fact->i is an array of size n+1, in which
1615    bi+
1616      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1617      bi[n]:  points to L(n-1,n-1)+1
1618 
1619   bdiag=fact->diag is an array of size n+1,in which
1620      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1621      bdiag[n]: points to entry of U(n-1,0)-1
1622 
1623    U(i,:) contains bdiag[i] as its last entry, i.e.,
1624     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1625 */
1626 #undef __FUNCT__
1627 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1628 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1629 {
1630 
1631   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1632   PetscErrorCode     ierr;
1633   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1634   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1635   PetscBool          missing;
1636   IS                 isicol;
1637 
1638   PetscFunctionBegin;
1639   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1640   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1641   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1642   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1643 
1644   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1645   b    = (Mat_SeqAIJ*)(fact)->data;
1646 
1647   /* allocate matrix arrays for new data structure */
1648   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1649   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1650   b->singlemalloc = PETSC_TRUE;
1651   if (!b->diag){
1652     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1653     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1654   }
1655   bdiag = b->diag;
1656 
1657   if (n > 0) {
1658     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1659   }
1660 
1661   /* set bi and bj with new data structure */
1662   bi = b->i;
1663   bj = b->j;
1664 
1665   /* L part */
1666   bi[0] = 0;
1667   for (i=0; i<n; i++){
1668     nz = adiag[i] - ai[i];
1669     bi[i+1] = bi[i] + nz;
1670     aj = a->j + ai[i];
1671     for (j=0; j<nz; j++){
1672       /*   *bj = aj[j]; bj++; */
1673       bj[k++] = aj[j];
1674     }
1675   }
1676 
1677   /* U part */
1678   bdiag[n] = bi[n]-1;
1679   for (i=n-1; i>=0; i--){
1680     nz = ai[i+1] - adiag[i] - 1;
1681     aj = a->j + adiag[i] + 1;
1682     for (j=0; j<nz; j++){
1683       /*      *bj = aj[j]; bj++; */
1684       bj[k++] = aj[j];
1685     }
1686     /* diag[i] */
1687     /*    *bj = i; bj++; */
1688     bj[k++] = i;
1689     bdiag[i] = bdiag[i+1] + nz + 1;
1690   }
1691 
1692   fact->factortype             = MAT_FACTOR_ILU;
1693   fact->info.factor_mallocs    = 0;
1694   fact->info.fill_ratio_given  = info->fill;
1695   fact->info.fill_ratio_needed = 1.0;
1696   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1697 
1698   b       = (Mat_SeqAIJ*)(fact)->data;
1699   b->row  = isrow;
1700   b->col  = iscol;
1701   b->icol = isicol;
1702   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1703   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1704   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1710 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1711 {
1712   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1713   IS                 isicol;
1714   PetscErrorCode     ierr;
1715   const PetscInt     *r,*ic;
1716   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1717   PetscInt           *bi,*cols,nnz,*cols_lvl;
1718   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1719   PetscInt           i,levels,diagonal_fill;
1720   PetscBool          col_identity,row_identity;
1721   PetscReal          f;
1722   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1723   PetscBT            lnkbt;
1724   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1725   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1726   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1727 
1728   PetscFunctionBegin;
1729   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1730   /*
1731   PetscBool          olddatastruct=PETSC_FALSE;
1732   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1733   if(olddatastruct){
1734     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1735     PetscFunctionReturn(0);
1736   }
1737   */
1738 
1739   levels = (PetscInt)info->levels;
1740   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1741   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1742 
1743   if (!levels && row_identity && col_identity) {
1744     /* special case: ilu(0) with natural ordering */
1745     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1746     if (a->inode.size) {
1747       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1748     }
1749     PetscFunctionReturn(0);
1750   }
1751 
1752   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1753   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1754   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1755   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1756 
1757   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1758   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1759   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1760   bi[0] = bdiag[0] = 0;
1761 
1762   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1763 
1764   /* create a linked list for storing column indices of the active row */
1765   nlnk = n + 1;
1766   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1767 
1768   /* initial FreeSpace size is f*(ai[n]+1) */
1769   f             = info->fill;
1770   diagonal_fill = (PetscInt)info->diagonal_fill;
1771   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1772   current_space = free_space;
1773   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1774   current_space_lvl = free_space_lvl;
1775 
1776   for (i=0; i<n; i++) {
1777     nzi = 0;
1778     /* copy current row into linked list */
1779     nnz  = ai[r[i]+1] - ai[r[i]];
1780     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1781     cols = aj + ai[r[i]];
1782     lnk[i] = -1; /* marker to indicate if diagonal exists */
1783     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1784     nzi += nlnk;
1785 
1786     /* make sure diagonal entry is included */
1787     if (diagonal_fill && lnk[i] == -1) {
1788       fm = n;
1789       while (lnk[fm] < i) fm = lnk[fm];
1790       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1791       lnk[fm]    = i;
1792       lnk_lvl[i] = 0;
1793       nzi++; dcount++;
1794     }
1795 
1796     /* add pivot rows into the active row */
1797     nzbd = 0;
1798     prow = lnk[n];
1799     while (prow < i) {
1800       nnz      = bdiag[prow];
1801       cols     = bj_ptr[prow] + nnz + 1;
1802       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1803       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1804       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1805       nzi += nlnk;
1806       prow = lnk[prow];
1807       nzbd++;
1808     }
1809     bdiag[i] = nzbd;
1810     bi[i+1]  = bi[i] + nzi;
1811 
1812     /* if free space is not available, make more free space */
1813     if (current_space->local_remaining<nzi) {
1814       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1815       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1816       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1817       reallocs++;
1818     }
1819 
1820     /* copy data into free_space and free_space_lvl, then initialize lnk */
1821     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1822     bj_ptr[i]    = current_space->array;
1823     bjlvl_ptr[i] = current_space_lvl->array;
1824 
1825     /* make sure the active row i has diagonal entry */
1826     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1827 
1828     current_space->array           += nzi;
1829     current_space->local_used      += nzi;
1830     current_space->local_remaining -= nzi;
1831     current_space_lvl->array           += nzi;
1832     current_space_lvl->local_used      += nzi;
1833     current_space_lvl->local_remaining -= nzi;
1834   }
1835 
1836   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1837   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1838 
1839   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1840   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1841   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1842 
1843   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1844   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1845   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1846 
1847 #if defined(PETSC_USE_INFO)
1848   {
1849     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1850     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1851     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1852     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1853     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1854     if (diagonal_fill) {
1855       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1856     }
1857   }
1858 #endif
1859 
1860   /* put together the new matrix */
1861   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1862   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1863   b = (Mat_SeqAIJ*)(fact)->data;
1864   b->free_a       = PETSC_TRUE;
1865   b->free_ij      = PETSC_TRUE;
1866   b->singlemalloc = PETSC_FALSE;
1867   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1868   b->j          = bj;
1869   b->i          = bi;
1870   b->diag       = bdiag;
1871   b->ilen       = 0;
1872   b->imax       = 0;
1873   b->row        = isrow;
1874   b->col        = iscol;
1875   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1876   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1877   b->icol       = isicol;
1878   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1879   /* In b structure:  Free imax, ilen, old a, old j.
1880      Allocate bdiag, solve_work, new a, new j */
1881   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1882   b->maxnz = b->nz = bdiag[0]+1;
1883   (fact)->info.factor_mallocs    = reallocs;
1884   (fact)->info.fill_ratio_given  = f;
1885   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1886   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1887   if (a->inode.size) {
1888     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1896 {
1897   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1898   IS                 isicol;
1899   PetscErrorCode     ierr;
1900   const PetscInt     *r,*ic;
1901   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1902   PetscInt           *bi,*cols,nnz,*cols_lvl;
1903   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1904   PetscInt           i,levels,diagonal_fill;
1905   PetscBool          col_identity,row_identity;
1906   PetscReal          f;
1907   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1908   PetscBT            lnkbt;
1909   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1910   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1911   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1912   PetscBool          missing;
1913 
1914   PetscFunctionBegin;
1915   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1916   f             = info->fill;
1917   levels        = (PetscInt)info->levels;
1918   diagonal_fill = (PetscInt)info->diagonal_fill;
1919   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1920 
1921   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1922   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1923   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1924     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1925     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1926     if (a->inode.size) {
1927       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1928     }
1929     fact->factortype = MAT_FACTOR_ILU;
1930     (fact)->info.factor_mallocs    = 0;
1931     (fact)->info.fill_ratio_given  = info->fill;
1932     (fact)->info.fill_ratio_needed = 1.0;
1933     b               = (Mat_SeqAIJ*)(fact)->data;
1934     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1935     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1936     b->row              = isrow;
1937     b->col              = iscol;
1938     b->icol             = isicol;
1939     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1940     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1941     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1942     PetscFunctionReturn(0);
1943   }
1944 
1945   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1946   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1947 
1948   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1949   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1950   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1951   bi[0] = bdiag[0] = 0;
1952 
1953   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1954 
1955   /* create a linked list for storing column indices of the active row */
1956   nlnk = n + 1;
1957   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1958 
1959   /* initial FreeSpace size is f*(ai[n]+1) */
1960   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1961   current_space = free_space;
1962   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1963   current_space_lvl = free_space_lvl;
1964 
1965   for (i=0; i<n; i++) {
1966     nzi = 0;
1967     /* copy current row into linked list */
1968     nnz  = ai[r[i]+1] - ai[r[i]];
1969     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1970     cols = aj + ai[r[i]];
1971     lnk[i] = -1; /* marker to indicate if diagonal exists */
1972     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1973     nzi += nlnk;
1974 
1975     /* make sure diagonal entry is included */
1976     if (diagonal_fill && lnk[i] == -1) {
1977       fm = n;
1978       while (lnk[fm] < i) fm = lnk[fm];
1979       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1980       lnk[fm]    = i;
1981       lnk_lvl[i] = 0;
1982       nzi++; dcount++;
1983     }
1984 
1985     /* add pivot rows into the active row */
1986     nzbd = 0;
1987     prow = lnk[n];
1988     while (prow < i) {
1989       nnz      = bdiag[prow];
1990       cols     = bj_ptr[prow] + nnz + 1;
1991       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1992       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1993       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1994       nzi += nlnk;
1995       prow = lnk[prow];
1996       nzbd++;
1997     }
1998     bdiag[i] = nzbd;
1999     bi[i+1]  = bi[i] + nzi;
2000 
2001     /* if free space is not available, make more free space */
2002     if (current_space->local_remaining<nzi) {
2003       nnz = nzi*(n - i); /* estimated and max additional space needed */
2004       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2005       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2006       reallocs++;
2007     }
2008 
2009     /* copy data into free_space and free_space_lvl, then initialize lnk */
2010     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2011     bj_ptr[i]    = current_space->array;
2012     bjlvl_ptr[i] = current_space_lvl->array;
2013 
2014     /* make sure the active row i has diagonal entry */
2015     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2016 
2017     current_space->array           += nzi;
2018     current_space->local_used      += nzi;
2019     current_space->local_remaining -= nzi;
2020     current_space_lvl->array           += nzi;
2021     current_space_lvl->local_used      += nzi;
2022     current_space_lvl->local_remaining -= nzi;
2023   }
2024 
2025   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2026   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2027 
2028   /* destroy list of free space and other temporary arrays */
2029   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2030   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2031   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2032   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2033   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2034 
2035 #if defined(PETSC_USE_INFO)
2036   {
2037     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2038     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2039     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2040     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2041     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2042     if (diagonal_fill) {
2043       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2044     }
2045   }
2046 #endif
2047 
2048   /* put together the new matrix */
2049   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2050   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2051   b = (Mat_SeqAIJ*)(fact)->data;
2052   b->free_a       = PETSC_TRUE;
2053   b->free_ij      = PETSC_TRUE;
2054   b->singlemalloc = PETSC_FALSE;
2055   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2056   b->j          = bj;
2057   b->i          = bi;
2058   for (i=0; i<n; i++) bdiag[i] += bi[i];
2059   b->diag       = bdiag;
2060   b->ilen       = 0;
2061   b->imax       = 0;
2062   b->row        = isrow;
2063   b->col        = iscol;
2064   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2065   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2066   b->icol       = isicol;
2067   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2068   /* In b structure:  Free imax, ilen, old a, old j.
2069      Allocate bdiag, solve_work, new a, new j */
2070   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2071   b->maxnz             = b->nz = bi[n] ;
2072   (fact)->info.factor_mallocs    = reallocs;
2073   (fact)->info.fill_ratio_given  = f;
2074   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2075   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2076   if (a->inode.size) {
2077     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2084 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2085 {
2086   Mat            C = B;
2087   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2088   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2089   IS             ip=b->row,iip = b->icol;
2090   PetscErrorCode ierr;
2091   const PetscInt *rip,*riip;
2092   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2093   PetscInt       *ai=a->i,*aj=a->j;
2094   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2095   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2096   PetscBool      perm_identity;
2097   FactorShiftCtx sctx;
2098   PetscReal      rs;
2099   MatScalar      d,*v;
2100 
2101   PetscFunctionBegin;
2102   /* MatPivotSetUp(): initialize shift context sctx */
2103   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2104 
2105   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2106     sctx.shift_top = info->zeropivot;
2107     for (i=0; i<mbs; i++) {
2108       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2109       d  = (aa)[a->diag[i]];
2110       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2111       v  = aa+ai[i];
2112       nz = ai[i+1] - ai[i];
2113       for (j=0; j<nz; j++)
2114 	rs += PetscAbsScalar(v[j]);
2115       if (rs>sctx.shift_top) sctx.shift_top = rs;
2116     }
2117     sctx.shift_top   *= 1.1;
2118     sctx.nshift_max   = 5;
2119     sctx.shift_lo     = 0.;
2120     sctx.shift_hi     = 1.;
2121   }
2122 
2123   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2124   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2125 
2126   /* allocate working arrays
2127      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2128      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2129   */
2130   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2131 
2132   do {
2133     sctx.newshift = PETSC_FALSE;
2134 
2135     for (i=0; i<mbs; i++) c2r[i] = mbs;
2136     if (mbs) il[0] = 0;
2137 
2138     for (k = 0; k<mbs; k++){
2139       /* zero rtmp */
2140       nz = bi[k+1] - bi[k];
2141       bjtmp = bj + bi[k];
2142       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2143 
2144       /* load in initial unfactored row */
2145       bval = ba + bi[k];
2146       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2147       for (j = jmin; j < jmax; j++){
2148         col = riip[aj[j]];
2149         if (col >= k){ /* only take upper triangular entry */
2150           rtmp[col] = aa[j];
2151           *bval++   = 0.0; /* for in-place factorization */
2152         }
2153       }
2154       /* shift the diagonal of the matrix: ZeropivotApply() */
2155       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2156 
2157       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2158       dk = rtmp[k];
2159       i  = c2r[k]; /* first row to be added to k_th row  */
2160 
2161       while (i < k){
2162         nexti = c2r[i]; /* next row to be added to k_th row */
2163 
2164         /* compute multiplier, update diag(k) and U(i,k) */
2165         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2166         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2167         dk   += uikdi*ba[ili]; /* update diag[k] */
2168         ba[ili] = uikdi; /* -U(i,k) */
2169 
2170         /* add multiple of row i to k-th row */
2171         jmin = ili + 1; jmax = bi[i+1];
2172         if (jmin < jmax){
2173           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2174           /* update il and c2r for row i */
2175           il[i] = jmin;
2176           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2177         }
2178         i = nexti;
2179       }
2180 
2181       /* copy data into U(k,:) */
2182       rs   = 0.0;
2183       jmin = bi[k]; jmax = bi[k+1]-1;
2184       if (jmin < jmax) {
2185         for (j=jmin; j<jmax; j++){
2186           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2187         }
2188         /* add the k-th row into il and c2r */
2189         il[k] = jmin;
2190         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2191       }
2192 
2193       /* MatPivotCheck() */
2194       sctx.rs  = rs;
2195       sctx.pv  = dk;
2196       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2197       if(sctx.newshift) break;
2198       dk = sctx.pv;
2199 
2200       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2201     }
2202   } while (sctx.newshift);
2203 
2204   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2205   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2206   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2207 
2208   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2209   if (perm_identity){
2210     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2211     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2212     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2213     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2214   } else {
2215     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2216     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2217     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2218     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2219   }
2220 
2221   C->assembled    = PETSC_TRUE;
2222   C->preallocated = PETSC_TRUE;
2223   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2224 
2225   /* MatPivotView() */
2226   if (sctx.nshift){
2227     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2228       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2229     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2230       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2231     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2232       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2233     }
2234   }
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2240 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2241 {
2242   Mat            C = B;
2243   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2244   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2245   IS             ip=b->row,iip = b->icol;
2246   PetscErrorCode ierr;
2247   const PetscInt *rip,*riip;
2248   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2249   PetscInt       *ai=a->i,*aj=a->j;
2250   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2251   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2252   PetscBool      perm_identity;
2253   FactorShiftCtx sctx;
2254   PetscReal      rs;
2255   MatScalar      d,*v;
2256 
2257   PetscFunctionBegin;
2258   /* MatPivotSetUp(): initialize shift context sctx */
2259   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2260 
2261   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2262     sctx.shift_top = info->zeropivot;
2263     for (i=0; i<mbs; i++) {
2264       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2265       d  = (aa)[a->diag[i]];
2266       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2267       v  = aa+ai[i];
2268       nz = ai[i+1] - ai[i];
2269       for (j=0; j<nz; j++)
2270 	rs += PetscAbsScalar(v[j]);
2271       if (rs>sctx.shift_top) sctx.shift_top = rs;
2272     }
2273     sctx.shift_top   *= 1.1;
2274     sctx.nshift_max   = 5;
2275     sctx.shift_lo     = 0.;
2276     sctx.shift_hi     = 1.;
2277   }
2278 
2279   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2280   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2281 
2282   /* initialization */
2283   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2284 
2285   do {
2286     sctx.newshift = PETSC_FALSE;
2287 
2288     for (i=0; i<mbs; i++) jl[i] = mbs;
2289     il[0] = 0;
2290 
2291     for (k = 0; k<mbs; k++){
2292       /* zero rtmp */
2293       nz = bi[k+1] - bi[k];
2294       bjtmp = bj + bi[k];
2295       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2296 
2297       bval = ba + bi[k];
2298       /* initialize k-th row by the perm[k]-th row of A */
2299       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2300       for (j = jmin; j < jmax; j++){
2301         col = riip[aj[j]];
2302         if (col >= k){ /* only take upper triangular entry */
2303           rtmp[col] = aa[j];
2304           *bval++  = 0.0; /* for in-place factorization */
2305         }
2306       }
2307       /* shift the diagonal of the matrix */
2308       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2309 
2310       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2311       dk = rtmp[k];
2312       i = jl[k]; /* first row to be added to k_th row  */
2313 
2314       while (i < k){
2315         nexti = jl[i]; /* next row to be added to k_th row */
2316 
2317         /* compute multiplier, update diag(k) and U(i,k) */
2318         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2319         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2320         dk += uikdi*ba[ili];
2321         ba[ili] = uikdi; /* -U(i,k) */
2322 
2323         /* add multiple of row i to k-th row */
2324         jmin = ili + 1; jmax = bi[i+1];
2325         if (jmin < jmax){
2326           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2327           /* update il and jl for row i */
2328           il[i] = jmin;
2329           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2330         }
2331         i = nexti;
2332       }
2333 
2334       /* shift the diagonals when zero pivot is detected */
2335       /* compute rs=sum of abs(off-diagonal) */
2336       rs   = 0.0;
2337       jmin = bi[k]+1;
2338       nz   = bi[k+1] - jmin;
2339       bcol = bj + jmin;
2340       for (j=0; j<nz; j++) {
2341         rs += PetscAbsScalar(rtmp[bcol[j]]);
2342       }
2343 
2344       sctx.rs = rs;
2345       sctx.pv = dk;
2346       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2347       if (sctx.newshift) break;
2348       dk = sctx.pv;
2349 
2350       /* copy data into U(k,:) */
2351       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2352       jmin = bi[k]+1; jmax = bi[k+1];
2353       if (jmin < jmax) {
2354         for (j=jmin; j<jmax; j++){
2355           col = bj[j]; ba[j] = rtmp[col];
2356         }
2357         /* add the k-th row into il and jl */
2358         il[k] = jmin;
2359         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2360       }
2361     }
2362   } while (sctx.newshift);
2363 
2364   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2365   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2366   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2367 
2368   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2369   if (perm_identity){
2370     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2373     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2374   } else {
2375     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2376     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2377     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2378     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2379   }
2380 
2381   C->assembled    = PETSC_TRUE;
2382   C->preallocated = PETSC_TRUE;
2383   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2384   if (sctx.nshift){
2385     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2386       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2387     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2388       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2389     }
2390   }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 /*
2395    icc() under revised new data structure.
2396    Factored arrays bj and ba are stored as
2397      U(0,:),...,U(i,:),U(n-1,:)
2398 
2399    ui=fact->i is an array of size n+1, in which
2400    ui+
2401      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2402      ui[n]:  points to U(n-1,n-1)+1
2403 
2404   udiag=fact->diag is an array of size n,in which
2405      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2406 
2407    U(i,:) contains udiag[i] as its last entry, i.e.,
2408     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2409 */
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2413 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2414 {
2415   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2416   Mat_SeqSBAIJ       *b;
2417   PetscErrorCode     ierr;
2418   PetscBool          perm_identity,missing;
2419   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2420   const PetscInt     *rip,*riip;
2421   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2422   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2423   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2424   PetscReal          fill=info->fill,levels=info->levels;
2425   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2426   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2427   PetscBT            lnkbt;
2428   IS                 iperm;
2429 
2430   PetscFunctionBegin;
2431   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2432   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2433   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2434   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2435   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2436 
2437   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2438   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2439   ui[0] = 0;
2440 
2441   /* ICC(0) without matrix ordering: simply rearrange column indices */
2442   if (!levels && perm_identity) {
2443     for (i=0; i<am; i++) {
2444       ncols    = ai[i+1] - a->diag[i];
2445       ui[i+1]  = ui[i] + ncols;
2446       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2447     }
2448     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2449     cols = uj;
2450     for (i=0; i<am; i++) {
2451       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2452       ncols = ai[i+1] - a->diag[i] -1;
2453       for (j=0; j<ncols; j++) *cols++ = aj[j];
2454       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2455     }
2456   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2457     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2458     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2459 
2460     /* initialization */
2461     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2462 
2463     /* jl: linked list for storing indices of the pivot rows
2464        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2465     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2466     for (i=0; i<am; i++){
2467       jl[i] = am; il[i] = 0;
2468     }
2469 
2470     /* create and initialize a linked list for storing column indices of the active row k */
2471     nlnk = am + 1;
2472     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2473 
2474     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2475     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2476     current_space = free_space;
2477     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2478     current_space_lvl = free_space_lvl;
2479 
2480     for (k=0; k<am; k++){  /* for each active row k */
2481       /* initialize lnk by the column indices of row rip[k] of A */
2482       nzk   = 0;
2483       ncols = ai[rip[k]+1] - ai[rip[k]];
2484       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2485       ncols_upper = 0;
2486       for (j=0; j<ncols; j++){
2487         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2488         if (riip[i] >= k){ /* only take upper triangular entry */
2489           ajtmp[ncols_upper] = i;
2490           ncols_upper++;
2491         }
2492       }
2493       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2494       nzk += nlnk;
2495 
2496       /* update lnk by computing fill-in for each pivot row to be merged in */
2497       prow = jl[k]; /* 1st pivot row */
2498 
2499       while (prow < k){
2500         nextprow = jl[prow];
2501 
2502         /* merge prow into k-th row */
2503         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2504         jmax = ui[prow+1];
2505         ncols = jmax-jmin;
2506         i     = jmin - ui[prow];
2507         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2508         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2509         j     = *(uj - 1);
2510         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2511         nzk += nlnk;
2512 
2513         /* update il and jl for prow */
2514         if (jmin < jmax){
2515           il[prow] = jmin;
2516           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2517         }
2518         prow = nextprow;
2519       }
2520 
2521       /* if free space is not available, make more free space */
2522       if (current_space->local_remaining<nzk) {
2523         i  = am - k + 1; /* num of unfactored rows */
2524         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2525         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2526         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2527         reallocs++;
2528       }
2529 
2530       /* copy data into free_space and free_space_lvl, then initialize lnk */
2531       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2532       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2533 
2534       /* add the k-th row into il and jl */
2535       if (nzk > 1){
2536         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2537         jl[k] = jl[i]; jl[i] = k;
2538         il[k] = ui[k] + 1;
2539       }
2540       uj_ptr[k]     = current_space->array;
2541       uj_lvl_ptr[k] = current_space_lvl->array;
2542 
2543       current_space->array           += nzk;
2544       current_space->local_used      += nzk;
2545       current_space->local_remaining -= nzk;
2546 
2547       current_space_lvl->array           += nzk;
2548       current_space_lvl->local_used      += nzk;
2549       current_space_lvl->local_remaining -= nzk;
2550 
2551       ui[k+1] = ui[k] + nzk;
2552     }
2553 
2554     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2555     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2556     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2557     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2558 
2559     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2560     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2561     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2562     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2563     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2564 
2565   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2566 
2567   /* put together the new matrix in MATSEQSBAIJ format */
2568   b    = (Mat_SeqSBAIJ*)(fact)->data;
2569   b->singlemalloc = PETSC_FALSE;
2570   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2571   b->j    = uj;
2572   b->i    = ui;
2573   b->diag = udiag;
2574   b->free_diag = PETSC_TRUE;
2575   b->ilen = 0;
2576   b->imax = 0;
2577   b->row  = perm;
2578   b->col  = perm;
2579   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2580   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2581   b->icol = iperm;
2582   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2583   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2584   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2585   b->maxnz   = b->nz = ui[am];
2586   b->free_a  = PETSC_TRUE;
2587   b->free_ij = PETSC_TRUE;
2588 
2589   fact->info.factor_mallocs   = reallocs;
2590   fact->info.fill_ratio_given = fill;
2591   if (ai[am] != 0) {
2592     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2593     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2594   } else {
2595     fact->info.fill_ratio_needed = 0.0;
2596   }
2597 #if defined(PETSC_USE_INFO)
2598     if (ai[am] != 0) {
2599       PetscReal af = fact->info.fill_ratio_needed;
2600       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2601       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2602       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2603     } else {
2604       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2605     }
2606 #endif
2607   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2613 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2614 {
2615   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2616   Mat_SeqSBAIJ       *b;
2617   PetscErrorCode     ierr;
2618   PetscBool          perm_identity,missing;
2619   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2620   const PetscInt     *rip,*riip;
2621   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2622   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2623   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2624   PetscReal          fill=info->fill,levels=info->levels;
2625   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2626   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2627   PetscBT            lnkbt;
2628   IS                 iperm;
2629 
2630   PetscFunctionBegin;
2631   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2632   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2633   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2634   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2635   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2636 
2637   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2638   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2639   ui[0] = 0;
2640 
2641   /* ICC(0) without matrix ordering: simply copies fill pattern */
2642   if (!levels && perm_identity) {
2643 
2644     for (i=0; i<am; i++) {
2645       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2646       udiag[i] = ui[i];
2647     }
2648     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2649     cols = uj;
2650     for (i=0; i<am; i++) {
2651       aj    = a->j + a->diag[i];
2652       ncols = ui[i+1] - ui[i];
2653       for (j=0; j<ncols; j++) *cols++ = *aj++;
2654     }
2655   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2656     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2657     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2658 
2659     /* initialization */
2660     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2661 
2662     /* jl: linked list for storing indices of the pivot rows
2663        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2664     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2665     for (i=0; i<am; i++){
2666       jl[i] = am; il[i] = 0;
2667     }
2668 
2669     /* create and initialize a linked list for storing column indices of the active row k */
2670     nlnk = am + 1;
2671     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2672 
2673     /* initial FreeSpace size is fill*(ai[am]+1) */
2674     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2675     current_space = free_space;
2676     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2677     current_space_lvl = free_space_lvl;
2678 
2679     for (k=0; k<am; k++){  /* for each active row k */
2680       /* initialize lnk by the column indices of row rip[k] of A */
2681       nzk   = 0;
2682       ncols = ai[rip[k]+1] - ai[rip[k]];
2683       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2684       ncols_upper = 0;
2685       for (j=0; j<ncols; j++){
2686         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2687         if (riip[i] >= k){ /* only take upper triangular entry */
2688           ajtmp[ncols_upper] = i;
2689           ncols_upper++;
2690         }
2691       }
2692       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2693       nzk += nlnk;
2694 
2695       /* update lnk by computing fill-in for each pivot row to be merged in */
2696       prow = jl[k]; /* 1st pivot row */
2697 
2698       while (prow < k){
2699         nextprow = jl[prow];
2700 
2701         /* merge prow into k-th row */
2702         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2703         jmax = ui[prow+1];
2704         ncols = jmax-jmin;
2705         i     = jmin - ui[prow];
2706         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2707         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2708         j     = *(uj - 1);
2709         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2710         nzk += nlnk;
2711 
2712         /* update il and jl for prow */
2713         if (jmin < jmax){
2714           il[prow] = jmin;
2715           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2716         }
2717         prow = nextprow;
2718       }
2719 
2720       /* if free space is not available, make more free space */
2721       if (current_space->local_remaining<nzk) {
2722         i = am - k + 1; /* num of unfactored rows */
2723         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2724         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2725         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2726         reallocs++;
2727       }
2728 
2729       /* copy data into free_space and free_space_lvl, then initialize lnk */
2730       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2731       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2732 
2733       /* add the k-th row into il and jl */
2734       if (nzk > 1){
2735         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2736         jl[k] = jl[i]; jl[i] = k;
2737         il[k] = ui[k] + 1;
2738       }
2739       uj_ptr[k]     = current_space->array;
2740       uj_lvl_ptr[k] = current_space_lvl->array;
2741 
2742       current_space->array           += nzk;
2743       current_space->local_used      += nzk;
2744       current_space->local_remaining -= nzk;
2745 
2746       current_space_lvl->array           += nzk;
2747       current_space_lvl->local_used      += nzk;
2748       current_space_lvl->local_remaining -= nzk;
2749 
2750       ui[k+1] = ui[k] + nzk;
2751     }
2752 
2753 #if defined(PETSC_USE_INFO)
2754     if (ai[am] != 0) {
2755       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2756       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2757       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2758       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2759     } else {
2760       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2761     }
2762 #endif
2763 
2764     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2765     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2766     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2767     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2768 
2769     /* destroy list of free space and other temporary array(s) */
2770     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2771     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2772     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2773     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2774 
2775   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2776 
2777   /* put together the new matrix in MATSEQSBAIJ format */
2778 
2779   b    = (Mat_SeqSBAIJ*)fact->data;
2780   b->singlemalloc = PETSC_FALSE;
2781   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2782   b->j    = uj;
2783   b->i    = ui;
2784   b->diag = udiag;
2785   b->free_diag = PETSC_TRUE;
2786   b->ilen = 0;
2787   b->imax = 0;
2788   b->row  = perm;
2789   b->col  = perm;
2790   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2791   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2792   b->icol = iperm;
2793   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2794   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2795   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2796   b->maxnz   = b->nz = ui[am];
2797   b->free_a  = PETSC_TRUE;
2798   b->free_ij = PETSC_TRUE;
2799 
2800   fact->info.factor_mallocs    = reallocs;
2801   fact->info.fill_ratio_given  = fill;
2802   if (ai[am] != 0) {
2803     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2804   } else {
2805     fact->info.fill_ratio_needed = 0.0;
2806   }
2807   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 #undef __FUNCT__
2812 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2813 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2814 {
2815   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2816   Mat_SeqSBAIJ       *b;
2817   PetscErrorCode     ierr;
2818   PetscBool          perm_identity;
2819   PetscReal          fill = info->fill;
2820   const PetscInt     *rip,*riip;
2821   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2822   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2823   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2824   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2825   PetscBT            lnkbt;
2826   IS                 iperm;
2827 
2828   PetscFunctionBegin;
2829   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2830   /* check whether perm is the identity mapping */
2831   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2832   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2833   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2834   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2835 
2836   /* initialization */
2837   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2838   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2839   ui[0] = 0;
2840 
2841   /* jl: linked list for storing indices of the pivot rows
2842      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2843   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2844   for (i=0; i<am; i++){
2845     jl[i] = am; il[i] = 0;
2846   }
2847 
2848   /* create and initialize a linked list for storing column indices of the active row k */
2849   nlnk = am + 1;
2850   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2851 
2852   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2853   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2854   current_space = free_space;
2855 
2856   for (k=0; k<am; k++){  /* for each active row k */
2857     /* initialize lnk by the column indices of row rip[k] of A */
2858     nzk   = 0;
2859     ncols = ai[rip[k]+1] - ai[rip[k]];
2860     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2861     ncols_upper = 0;
2862     for (j=0; j<ncols; j++){
2863       i = riip[*(aj + ai[rip[k]] + j)];
2864       if (i >= k){ /* only take upper triangular entry */
2865         cols[ncols_upper] = i;
2866         ncols_upper++;
2867       }
2868     }
2869     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2870     nzk += nlnk;
2871 
2872     /* update lnk by computing fill-in for each pivot row to be merged in */
2873     prow = jl[k]; /* 1st pivot row */
2874 
2875     while (prow < k){
2876       nextprow = jl[prow];
2877       /* merge prow into k-th row */
2878       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2879       jmax = ui[prow+1];
2880       ncols = jmax-jmin;
2881       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2882       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2883       nzk += nlnk;
2884 
2885       /* update il and jl for prow */
2886       if (jmin < jmax){
2887         il[prow] = jmin;
2888         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2889       }
2890       prow = nextprow;
2891     }
2892 
2893     /* if free space is not available, make more free space */
2894     if (current_space->local_remaining<nzk) {
2895       i  = am - k + 1; /* num of unfactored rows */
2896       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2897       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2898       reallocs++;
2899     }
2900 
2901     /* copy data into free space, then initialize lnk */
2902     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2903 
2904     /* add the k-th row into il and jl */
2905     if (nzk > 1){
2906       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2907       jl[k] = jl[i]; jl[i] = k;
2908       il[k] = ui[k] + 1;
2909     }
2910     ui_ptr[k] = current_space->array;
2911     current_space->array           += nzk;
2912     current_space->local_used      += nzk;
2913     current_space->local_remaining -= nzk;
2914 
2915     ui[k+1] = ui[k] + nzk;
2916   }
2917 
2918   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2919   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2920   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2921 
2922   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2923   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2924   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2925   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2926 
2927   /* put together the new matrix in MATSEQSBAIJ format */
2928 
2929   b = (Mat_SeqSBAIJ*)fact->data;
2930   b->singlemalloc = PETSC_FALSE;
2931   b->free_a       = PETSC_TRUE;
2932   b->free_ij      = PETSC_TRUE;
2933   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2934   b->j    = uj;
2935   b->i    = ui;
2936   b->diag = udiag;
2937   b->free_diag = PETSC_TRUE;
2938   b->ilen = 0;
2939   b->imax = 0;
2940   b->row  = perm;
2941   b->col  = perm;
2942   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2943   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2944   b->icol = iperm;
2945   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2946   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2947   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2948   b->maxnz = b->nz = ui[am];
2949 
2950   fact->info.factor_mallocs    = reallocs;
2951   fact->info.fill_ratio_given  = fill;
2952   if (ai[am] != 0) {
2953     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2954     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2955   } else {
2956     fact->info.fill_ratio_needed = 0.0;
2957   }
2958 #if defined(PETSC_USE_INFO)
2959   if (ai[am] != 0) {
2960     PetscReal af = fact->info.fill_ratio_needed;
2961     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2962     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2963     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2964   } else {
2965      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2966   }
2967 #endif
2968   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2974 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2975 {
2976   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2977   Mat_SeqSBAIJ       *b;
2978   PetscErrorCode     ierr;
2979   PetscBool          perm_identity;
2980   PetscReal          fill = info->fill;
2981   const PetscInt     *rip,*riip;
2982   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2983   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2984   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2985   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2986   PetscBT            lnkbt;
2987   IS                 iperm;
2988 
2989   PetscFunctionBegin;
2990   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2991   /* check whether perm is the identity mapping */
2992   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2993   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2994   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2995   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2996 
2997   /* initialization */
2998   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2999   ui[0] = 0;
3000 
3001   /* jl: linked list for storing indices of the pivot rows
3002      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3003   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
3004   for (i=0; i<am; i++){
3005     jl[i] = am; il[i] = 0;
3006   }
3007 
3008   /* create and initialize a linked list for storing column indices of the active row k */
3009   nlnk = am + 1;
3010   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3011 
3012   /* initial FreeSpace size is fill*(ai[am]+1) */
3013   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3014   current_space = free_space;
3015 
3016   for (k=0; k<am; k++){  /* for each active row k */
3017     /* initialize lnk by the column indices of row rip[k] of A */
3018     nzk   = 0;
3019     ncols = ai[rip[k]+1] - ai[rip[k]];
3020     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3021     ncols_upper = 0;
3022     for (j=0; j<ncols; j++){
3023       i = riip[*(aj + ai[rip[k]] + j)];
3024       if (i >= k){ /* only take upper triangular entry */
3025         cols[ncols_upper] = i;
3026         ncols_upper++;
3027       }
3028     }
3029     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3030     nzk += nlnk;
3031 
3032     /* update lnk by computing fill-in for each pivot row to be merged in */
3033     prow = jl[k]; /* 1st pivot row */
3034 
3035     while (prow < k){
3036       nextprow = jl[prow];
3037       /* merge prow into k-th row */
3038       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3039       jmax = ui[prow+1];
3040       ncols = jmax-jmin;
3041       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3042       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3043       nzk += nlnk;
3044 
3045       /* update il and jl for prow */
3046       if (jmin < jmax){
3047         il[prow] = jmin;
3048         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3049       }
3050       prow = nextprow;
3051     }
3052 
3053     /* if free space is not available, make more free space */
3054     if (current_space->local_remaining<nzk) {
3055       i = am - k + 1; /* num of unfactored rows */
3056       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3057       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3058       reallocs++;
3059     }
3060 
3061     /* copy data into free space, then initialize lnk */
3062     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3063 
3064     /* add the k-th row into il and jl */
3065     if (nzk-1 > 0){
3066       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3067       jl[k] = jl[i]; jl[i] = k;
3068       il[k] = ui[k] + 1;
3069     }
3070     ui_ptr[k] = current_space->array;
3071     current_space->array           += nzk;
3072     current_space->local_used      += nzk;
3073     current_space->local_remaining -= nzk;
3074 
3075     ui[k+1] = ui[k] + nzk;
3076   }
3077 
3078 #if defined(PETSC_USE_INFO)
3079   if (ai[am] != 0) {
3080     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3081     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3082     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3083     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3084   } else {
3085      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3086   }
3087 #endif
3088 
3089   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3090   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3091   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3092 
3093   /* destroy list of free space and other temporary array(s) */
3094   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3095   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3096   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3097 
3098   /* put together the new matrix in MATSEQSBAIJ format */
3099 
3100   b = (Mat_SeqSBAIJ*)fact->data;
3101   b->singlemalloc = PETSC_FALSE;
3102   b->free_a       = PETSC_TRUE;
3103   b->free_ij      = PETSC_TRUE;
3104   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3105   b->j    = uj;
3106   b->i    = ui;
3107   b->diag = 0;
3108   b->ilen = 0;
3109   b->imax = 0;
3110   b->row  = perm;
3111   b->col  = perm;
3112   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3113   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3114   b->icol = iperm;
3115   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3116   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3117   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3118   b->maxnz = b->nz = ui[am];
3119 
3120   fact->info.factor_mallocs    = reallocs;
3121   fact->info.fill_ratio_given  = fill;
3122   if (ai[am] != 0) {
3123     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3124   } else {
3125     fact->info.fill_ratio_needed = 0.0;
3126   }
3127   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3128   PetscFunctionReturn(0);
3129 }
3130 
3131 #undef __FUNCT__
3132 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3133 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3134 {
3135   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3136   PetscErrorCode    ierr;
3137   PetscInt          n = A->rmap->n;
3138   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3139   PetscScalar       *x,sum;
3140   const PetscScalar *b;
3141   const MatScalar   *aa = a->a,*v;
3142   PetscInt          i,nz;
3143 
3144   PetscFunctionBegin;
3145   if (!n) PetscFunctionReturn(0);
3146 
3147   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3148   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3149 
3150   /* forward solve the lower triangular */
3151   x[0] = b[0];
3152   v    = aa;
3153   vi   = aj;
3154   for (i=1; i<n; i++) {
3155     nz  = ai[i+1] - ai[i];
3156     sum = b[i];
3157     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3158     v  += nz;
3159     vi += nz;
3160     x[i] = sum;
3161   }
3162 
3163   /* backward solve the upper triangular */
3164   for (i=n-1; i>=0; i--){
3165     v   = aa + adiag[i+1] + 1;
3166     vi  = aj + adiag[i+1] + 1;
3167     nz = adiag[i] - adiag[i+1]-1;
3168     sum = x[i];
3169     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3170     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3171   }
3172 
3173   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3174   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3175   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 #undef __FUNCT__
3180 #define __FUNCT__ "MatSolve_SeqAIJ"
3181 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3182 {
3183   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3184   IS                iscol = a->col,isrow = a->row;
3185   PetscErrorCode    ierr;
3186   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3187   const PetscInt    *rout,*cout,*r,*c;
3188   PetscScalar       *x,*tmp,sum;
3189   const PetscScalar *b;
3190   const MatScalar   *aa = a->a,*v;
3191 
3192   PetscFunctionBegin;
3193   if (!n) PetscFunctionReturn(0);
3194 
3195   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3196   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3197   tmp  = a->solve_work;
3198 
3199   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3200   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3201 
3202   /* forward solve the lower triangular */
3203   tmp[0] = b[r[0]];
3204   v      = aa;
3205   vi     = aj;
3206   for (i=1; i<n; i++) {
3207     nz  = ai[i+1] - ai[i];
3208     sum = b[r[i]];
3209     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3210     tmp[i] = sum;
3211     v += nz; vi += nz;
3212   }
3213 
3214   /* backward solve the upper triangular */
3215   for (i=n-1; i>=0; i--){
3216     v   = aa + adiag[i+1]+1;
3217     vi  = aj + adiag[i+1]+1;
3218     nz  = adiag[i]-adiag[i+1]-1;
3219     sum = tmp[i];
3220     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3221     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3222   }
3223 
3224   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3225   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3226   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3227   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3228   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 #undef __FUNCT__
3233 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3234 /*
3235     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3236 */
3237 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3238 {
3239   Mat                B = *fact;
3240   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3241   IS                 isicol;
3242   PetscErrorCode     ierr;
3243   const PetscInt     *r,*ic;
3244   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3245   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3246   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3247   PetscInt           nlnk,*lnk;
3248   PetscBT            lnkbt;
3249   PetscBool          row_identity,icol_identity;
3250   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3251   const PetscInt     *ics;
3252   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3253   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3254   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3255   PetscBool          missing;
3256 
3257   PetscFunctionBegin;
3258 
3259   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3260   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3261   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3262 
3263   /* ------- symbolic factorization, can be reused ---------*/
3264   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3265   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3266   adiag=a->diag;
3267 
3268   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3269 
3270   /* bdiag is location of diagonal in factor */
3271   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3272   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3273 
3274   /* allocate row pointers bi */
3275   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3276 
3277   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3278   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3279   nnz_max  = ai[n]+2*n*dtcount+2;
3280 
3281   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3282   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3283 
3284   /* put together the new matrix */
3285   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3286   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3287   b    = (Mat_SeqAIJ*)B->data;
3288   b->free_a       = PETSC_TRUE;
3289   b->free_ij      = PETSC_TRUE;
3290   b->singlemalloc = PETSC_FALSE;
3291   b->a          = ba;
3292   b->j          = bj;
3293   b->i          = bi;
3294   b->diag       = bdiag;
3295   b->ilen       = 0;
3296   b->imax       = 0;
3297   b->row        = isrow;
3298   b->col        = iscol;
3299   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3300   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3301   b->icol       = isicol;
3302   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3303 
3304   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3305   b->maxnz = nnz_max;
3306 
3307   B->factortype            = MAT_FACTOR_ILUDT;
3308   B->info.factor_mallocs   = 0;
3309   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3310   CHKMEMQ;
3311   /* ------- end of symbolic factorization ---------*/
3312 
3313   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3314   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3315   ics  = ic;
3316 
3317   /* linked list for storing column indices of the active row */
3318   nlnk = n + 1;
3319   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3320 
3321   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3322   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3323   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3324   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3325   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3326 
3327   bi[0]    = 0;
3328   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3329   bdiag_rev[n] = bdiag[0];
3330   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3331   for (i=0; i<n; i++) {
3332     /* copy initial fill into linked list */
3333     nzi = 0; /* nonzeros for active row i */
3334     nzi = ai[r[i]+1] - ai[r[i]];
3335     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3336     nzi_al = adiag[r[i]] - ai[r[i]];
3337     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3338     ajtmp = aj + ai[r[i]];
3339     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3340 
3341     /* load in initial (unfactored row) */
3342     aatmp = a->a + ai[r[i]];
3343     for (j=0; j<nzi; j++) {
3344       rtmp[ics[*ajtmp++]] = *aatmp++;
3345     }
3346 
3347     /* add pivot rows into linked list */
3348     row = lnk[n];
3349     while (row < i ) {
3350       nzi_bl = bi[row+1] - bi[row] + 1;
3351       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3352       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3353       nzi  += nlnk;
3354       row   = lnk[row];
3355     }
3356 
3357     /* copy data from lnk into jtmp, then initialize lnk */
3358     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3359 
3360     /* numerical factorization */
3361     bjtmp = jtmp;
3362     row   = *bjtmp++; /* 1st pivot row */
3363     while  ( row < i ) {
3364       pc         = rtmp + row;
3365       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3366       multiplier = (*pc) * (*pv);
3367       *pc        = multiplier;
3368       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3369         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3370         pv         = ba + bdiag[row+1] + 1;
3371         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3372         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3373         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3374         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3375       }
3376       row = *bjtmp++;
3377     }
3378 
3379     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3380     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3381     nzi_bl = 0; j = 0;
3382     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3383       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3384       nzi_bl++; j++;
3385     }
3386     nzi_bu = nzi - nzi_bl -1;
3387     while (j < nzi){
3388       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3389       j++;
3390     }
3391 
3392     bjtmp = bj + bi[i];
3393     batmp = ba + bi[i];
3394     /* apply level dropping rule to L part */
3395     ncut = nzi_al + dtcount;
3396     if (ncut < nzi_bl){
3397       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3398       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3399     } else {
3400       ncut = nzi_bl;
3401     }
3402     for (j=0; j<ncut; j++){
3403       bjtmp[j] = jtmp[j];
3404       batmp[j] = vtmp[j];
3405       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3406     }
3407     bi[i+1] = bi[i] + ncut;
3408     nzi = ncut + 1;
3409 
3410     /* apply level dropping rule to U part */
3411     ncut = nzi_au + dtcount;
3412     if (ncut < nzi_bu){
3413       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3414       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3415     } else {
3416       ncut = nzi_bu;
3417     }
3418     nzi += ncut;
3419 
3420     /* mark bdiagonal */
3421     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3422     bdiag_rev[n-i-1] = bdiag[i+1];
3423     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3424     bjtmp = bj + bdiag[i];
3425     batmp = ba + bdiag[i];
3426     *bjtmp = i;
3427     *batmp = diag_tmp; /* rtmp[i]; */
3428     if (*batmp == 0.0) {
3429       *batmp = dt+shift;
3430       /* printf(" row %d add shift %g\n",i,shift); */
3431     }
3432     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3433     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3434 
3435     bjtmp = bj + bdiag[i+1]+1;
3436     batmp = ba + bdiag[i+1]+1;
3437     for (k=0; k<ncut; k++){
3438       bjtmp[k] = jtmp[nzi_bl+1+k];
3439       batmp[k] = vtmp[nzi_bl+1+k];
3440       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3441     }
3442     /* printf("\n"); */
3443 
3444     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3445     /*
3446     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3447     printf(" ----------------------------\n");
3448     */
3449   } /* for (i=0; i<n; i++) */
3450   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3451   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3452 
3453   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3454   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3455 
3456   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3457   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3458   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3459   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3460 
3461   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3462   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3463 
3464   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3465   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3466   if (row_identity && icol_identity) {
3467     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3468   } else {
3469     B->ops->solve = MatSolve_SeqAIJ;
3470   }
3471 
3472   B->ops->solveadd          = 0;
3473   B->ops->solvetranspose    = 0;
3474   B->ops->solvetransposeadd = 0;
3475   B->ops->matsolve          = 0;
3476   B->assembled              = PETSC_TRUE;
3477   B->preallocated           = PETSC_TRUE;
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 /* a wraper of MatILUDTFactor_SeqAIJ() */
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3484 /*
3485     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3486 */
3487 
3488 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3489 {
3490   PetscErrorCode     ierr;
3491 
3492   PetscFunctionBegin;
3493   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 /*
3498    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3499    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3500 */
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3503 /*
3504     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3505 */
3506 
3507 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3508 {
3509   Mat            C=fact;
3510   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3511   IS             isrow = b->row,isicol = b->icol;
3512   PetscErrorCode ierr;
3513   const PetscInt *r,*ic,*ics;
3514   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3515   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3516   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3517   PetscReal      dt=info->dt,shift=info->shiftamount;
3518   PetscBool      row_identity, col_identity;
3519 
3520   PetscFunctionBegin;
3521   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3522   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3523   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3524   ics  = ic;
3525 
3526   for (i=0; i<n; i++){
3527     /* initialize rtmp array */
3528     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3529     bjtmp = bj + bi[i];
3530     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3531     rtmp[i] = 0.0;
3532     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3533     bjtmp = bj + bdiag[i+1] + 1;
3534     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3535 
3536     /* load in initial unfactored row of A */
3537     /* printf("row %d\n",i); */
3538     nz    = ai[r[i]+1] - ai[r[i]];
3539     ajtmp = aj + ai[r[i]];
3540     v     = aa + ai[r[i]];
3541     for (j=0; j<nz; j++) {
3542       rtmp[ics[*ajtmp++]] = v[j];
3543       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3544     }
3545     /* printf("\n"); */
3546 
3547     /* numerical factorization */
3548     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3549     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3550     k = 0;
3551     while (k < nzl){
3552       row   = *bjtmp++;
3553       /* printf("  prow %d\n",row); */
3554       pc         = rtmp + row;
3555       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3556       multiplier = (*pc) * (*pv);
3557       *pc        = multiplier;
3558       if (PetscAbsScalar(multiplier) > dt){
3559         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3560         pv         = b->a + bdiag[row+1] + 1;
3561         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3562         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3563         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3564       }
3565       k++;
3566     }
3567 
3568     /* finished row so stick it into b->a */
3569     /* L-part */
3570     pv = b->a + bi[i] ;
3571     pj = bj + bi[i] ;
3572     nzl = bi[i+1] - bi[i];
3573     for (j=0; j<nzl; j++) {
3574       pv[j] = rtmp[pj[j]];
3575       /* printf(" (%d,%g),",pj[j],pv[j]); */
3576     }
3577 
3578     /* diagonal: invert diagonal entries for simplier triangular solves */
3579     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3580     b->a[bdiag[i]] = 1.0/rtmp[i];
3581     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3582 
3583     /* U-part */
3584     pv = b->a + bdiag[i+1] + 1;
3585     pj = bj + bdiag[i+1] + 1;
3586     nzu = bdiag[i] - bdiag[i+1] - 1;
3587     for (j=0; j<nzu; j++) {
3588       pv[j] = rtmp[pj[j]];
3589       /* printf(" (%d,%g),",pj[j],pv[j]); */
3590     }
3591     /* printf("\n"); */
3592   }
3593 
3594   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3595   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3596   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3597 
3598   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3599   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3600   if (row_identity && col_identity) {
3601     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3602   } else {
3603     C->ops->solve   = MatSolve_SeqAIJ;
3604   }
3605   C->ops->solveadd           = 0;
3606   C->ops->solvetranspose     = 0;
3607   C->ops->solvetransposeadd  = 0;
3608   C->ops->matsolve           = 0;
3609   C->assembled    = PETSC_TRUE;
3610   C->preallocated = PETSC_TRUE;
3611   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3612   PetscFunctionReturn(0);
3613 }
3614