xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 009bbdc485cd9ad46be9940d3549e2dde9cdc322)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
32       else              past  = PetscAbsScalar(aa[j]);
33     }
34     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
35     if (past/future > best) {
36       best = past/future;
37       current = i;
38     }
39   }
40 
41   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
42   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
43   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
44   order[0] = current;
45   for (i=0; i<n-1; i++) {
46     done[current] = PETSC_TRUE;
47     best          = -1;
48     /* loop over all neighbors of current pivot */
49     for (j=ai[current]; j<ai[current+1]; j++) {
50       jj = aj[j];
51       if (done[jj]) continue;
52       /* loop over columns of potential next row computing weights for below and above diagonal */
53       past = future = 0.0;
54       for (k=ai[jj]; k<ai[jj+1]; k++) {
55         kk = aj[k];
56         if (done[kk]) past += PetscAbsScalar(aa[k]);
57         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
58       }
59       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
60       if (past/future > best) {
61         best = past/future;
62         newcurrent = jj;
63       }
64     }
65     if (best == -1) { /* no neighbors to select from so select best of all that remain */
66       best = -1;
67       for (k=0; k<n; k++) {
68         if (done[k]) continue;
69         future = 0.0;
70         past   = 0.0;
71         for (j=ai[k]; j<ai[k+1]; j++) {
72           kk = aj[j];
73           if (done[kk])       past += PetscAbsScalar(aa[j]);
74           else if (kk != k) future += PetscAbsScalar(aa[j]);
75         }
76         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
77         if (past/future > best) {
78           best = past/future;
79           newcurrent = k;
80         }
81       }
82     }
83     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
84     current = newcurrent;
85     order[i+1] = current;
86   }
87   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
88   *icol = *irow;
89   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
90   ierr = PetscFree(done);CHKERRQ(ierr);
91   ierr = PetscFree(order);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 EXTERN_C_END
95 
96 EXTERN_C_BEGIN
97 #undef __FUNCT__
98 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
99 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
100 {
101   PetscFunctionBegin;
102   *flg = PETSC_TRUE;
103   PetscFunctionReturn(0);
104 }
105 EXTERN_C_END
106 
107 EXTERN_C_BEGIN
108 #undef __FUNCT__
109 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
110 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
111 {
112   PetscInt           n = A->rmap->n;
113   PetscErrorCode     ierr;
114 
115   PetscFunctionBegin;
116 #if defined(PETSC_USE_COMPLEX)
117   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
118 #endif
119   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
120   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
121   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
122     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
123     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
124     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
125     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
126   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
127     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
128     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
129     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
130     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
131   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
132   (*B)->factortype = ftype;
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
139 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
140 {
141   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
142   IS                 isicol;
143   PetscErrorCode     ierr;
144   const PetscInt     *r,*ic;
145   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
146   PetscInt           *bi,*bj,*ajtmp;
147   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
148   PetscReal          f;
149   PetscInt           nlnk,*lnk,k,**bi_ptr;
150   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
151   PetscBT            lnkbt;
152 
153   PetscFunctionBegin;
154   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
155   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
156   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
157   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
158 
159   /* get new row pointers */
160   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
161   bi[0] = 0;
162 
163   /* bdiag is location of diagonal in factor */
164   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
165   bdiag[0] = 0;
166 
167   /* linked list for storing column indices of the active row */
168   nlnk = n + 1;
169   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
170 
171   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
172 
173   /* initial FreeSpace size is f*(ai[n]+1) */
174   f = info->fill;
175   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
176   current_space = free_space;
177 
178   for (i=0; i<n; i++) {
179     /* copy previous fill into linked list */
180     nzi = 0;
181     nnz = ai[r[i]+1] - ai[r[i]];
182     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
183     ajtmp = aj + ai[r[i]];
184     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
185     nzi += nlnk;
186 
187     /* add pivot rows into linked list */
188     row = lnk[n];
189     while (row < i) {
190       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
191       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
192       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
193       nzi += nlnk;
194       row  = lnk[row];
195     }
196     bi[i+1] = bi[i] + nzi;
197     im[i]   = nzi;
198 
199     /* mark bdiag */
200     nzbd = 0;
201     nnz  = nzi;
202     k    = lnk[n];
203     while (nnz-- && k < i) {
204       nzbd++;
205       k = lnk[k];
206     }
207     bdiag[i] = bi[i] + nzbd;
208 
209     /* if free space is not available, make more free space */
210     if (current_space->local_remaining<nzi) {
211       nnz = (n - i)*nzi; /* estimated and max additional space needed */
212       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
213       reallocs++;
214     }
215 
216     /* copy data into free space, then initialize lnk */
217     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
218     bi_ptr[i] = current_space->array;
219     current_space->array           += nzi;
220     current_space->local_used      += nzi;
221     current_space->local_remaining -= nzi;
222   }
223 #if defined(PETSC_USE_INFO)
224   if (ai[n] != 0) {
225     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
226     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
227     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
228     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
229     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
230   } else {
231     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
232   }
233 #endif
234 
235   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
237 
238   /* destroy list of free space and other temporary array(s) */
239   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
240   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
241   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
242   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
243 
244   /* put together the new matrix */
245   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
246   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
247   b    = (Mat_SeqAIJ*)(B)->data;
248   b->free_a       = PETSC_TRUE;
249   b->free_ij      = PETSC_TRUE;
250   b->singlemalloc = PETSC_FALSE;
251   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
252   b->j          = bj;
253   b->i          = bi;
254   b->diag       = bdiag;
255   b->ilen       = 0;
256   b->imax       = 0;
257   b->row        = isrow;
258   b->col        = iscol;
259   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
260   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
261   b->icol       = isicol;
262   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
263 
264   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
265   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
266   b->maxnz = b->nz = bi[n] ;
267 
268   (B)->factortype            = MAT_FACTOR_LU;
269   (B)->info.factor_mallocs   = reallocs;
270   (B)->info.fill_ratio_given = f;
271 
272   if (ai[n]) {
273     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
274   } else {
275     (B)->info.fill_ratio_needed = 0.0;
276   }
277   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
278   if (a->inode.size) {
279     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
287 {
288   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
289   IS                 isicol;
290   PetscErrorCode     ierr;
291   const PetscInt     *r,*ic;
292   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
293   PetscInt           *bi,*bj,*ajtmp;
294   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295   PetscReal          f;
296   PetscInt           nlnk,*lnk,k,**bi_ptr;
297   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
298   PetscBT            lnkbt;
299 
300   PetscFunctionBegin;
301   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
302   /*
303   PetscBool          olddatastruct=PETSC_FALSE;
304   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
305   if (olddatastruct) {
306     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
307     PetscFunctionReturn(0);
308   }
309   */
310   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
311   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
312   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
313   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
314 
315   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
317   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
318   bi[0] = bdiag[0] = 0;
319 
320   /* linked list for storing column indices of the active row */
321   nlnk = n + 1;
322   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
323 
324   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
325 
326   /* initial FreeSpace size is f*(ai[n]+1) */
327   f = info->fill;
328   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
329   current_space = free_space;
330 
331   for (i=0; i<n; i++) {
332     /* copy previous fill into linked list */
333     nzi = 0;
334     nnz = ai[r[i]+1] - ai[r[i]];
335     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
336     ajtmp = aj + ai[r[i]];
337     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
338     nzi += nlnk;
339 
340     /* add pivot rows into linked list */
341     row = lnk[n];
342     while (row < i) {
343       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
346       nzi  += nlnk;
347       row   = lnk[row];
348     }
349     bi[i+1] = bi[i] + nzi;
350     im[i]   = nzi;
351 
352     /* mark bdiag */
353     nzbd = 0;
354     nnz  = nzi;
355     k    = lnk[n];
356     while (nnz-- && k < i) {
357       nzbd++;
358       k = lnk[k];
359     }
360     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361 
362     /* if free space is not available, make more free space */
363     if (current_space->local_remaining<nzi) {
364       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
365       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
366       reallocs++;
367     }
368 
369     /* copy data into free space, then initialize lnk */
370     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
371     bi_ptr[i] = current_space->array;
372     current_space->array           += nzi;
373     current_space->local_used      += nzi;
374     current_space->local_remaining -= nzi;
375   }
376 
377   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
378   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
379 
380   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
381   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
382   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
383   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
384   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
385 
386   /* put together the new matrix */
387   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
388   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
389   b    = (Mat_SeqAIJ*)(B)->data;
390   b->free_a       = PETSC_TRUE;
391   b->free_ij      = PETSC_TRUE;
392   b->singlemalloc = PETSC_FALSE;
393   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
394   b->j          = bj;
395   b->i          = bi;
396   b->diag       = bdiag;
397   b->ilen       = 0;
398   b->imax       = 0;
399   b->row        = isrow;
400   b->col        = iscol;
401   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
402   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
403   b->icol       = isicol;
404   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
405 
406   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
407   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
408   b->maxnz = b->nz = bdiag[0]+1;
409   B->factortype            = MAT_FACTOR_LU;
410   B->info.factor_mallocs   = reallocs;
411   B->info.fill_ratio_given = f;
412 
413   if (ai[n]) {
414     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
415   } else {
416     B->info.fill_ratio_needed = 0.0;
417   }
418 #if defined(PETSC_USE_INFO)
419   if (ai[n] != 0) {
420     PetscReal af = B->info.fill_ratio_needed;
421     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
422     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
423     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
424     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
425   } else {
426     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
427   }
428 #endif
429   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
430   if (a->inode.size) {
431     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 /*
437     Trouble in factorization, should we dump the original matrix?
438 */
439 #undef __FUNCT__
440 #define __FUNCT__ "MatFactorDumpMatrix"
441 PetscErrorCode MatFactorDumpMatrix(Mat A)
442 {
443   PetscErrorCode ierr;
444   PetscBool      flg = PETSC_FALSE;
445 
446   PetscFunctionBegin;
447   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
448   if (flg) {
449     PetscViewer viewer;
450     char        filename[PETSC_MAX_PATH_LEN];
451 
452     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
453     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
454     ierr = MatView(A,viewer);CHKERRQ(ierr);
455     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
456   }
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
462 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
463 {
464   Mat              C=B;
465   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
466   IS               isrow = b->row,isicol = b->icol;
467   PetscErrorCode   ierr;
468   const PetscInt   *r,*ic,*ics;
469   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
470   PetscInt         i,j,k,nz,nzL,row,*pj;
471   const PetscInt   *ajtmp,*bjtmp;
472   MatScalar        *rtmp,*pc,multiplier,*pv;
473   const  MatScalar *aa=a->a,*v;
474   PetscBool        row_identity,col_identity;
475   FactorShiftCtx   sctx;
476   const PetscInt   *ddiag;
477   PetscReal        rs;
478   MatScalar        d;
479 
480   PetscFunctionBegin;
481   /* MatPivotSetUp(): initialize shift context sctx */
482   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
483 
484   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
485     ddiag          = a->diag;
486     sctx.shift_top = info->zeropivot;
487     for (i=0; i<n; i++) {
488       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
489       d  = (aa)[ddiag[i]];
490       rs = -PetscAbsScalar(d) - PetscRealPart(d);
491       v  = aa+ai[i];
492       nz = ai[i+1] - ai[i];
493       for (j=0; j<nz; j++)
494         rs += PetscAbsScalar(v[j]);
495       if (rs>sctx.shift_top) sctx.shift_top = rs;
496     }
497     sctx.shift_top   *= 1.1;
498     sctx.nshift_max   = 5;
499     sctx.shift_lo     = 0.;
500     sctx.shift_hi     = 1.;
501   }
502 
503   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
504   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
505   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
506   ics  = ic;
507 
508   do {
509     sctx.newshift = PETSC_FALSE;
510     for (i=0; i<n; i++) {
511       /* zero rtmp */
512       /* L part */
513       nz    = bi[i+1] - bi[i];
514       bjtmp = bj + bi[i];
515       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516 
517       /* U part */
518       nz = bdiag[i]-bdiag[i+1];
519       bjtmp = bj + bdiag[i+1]+1;
520       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
521 
522       /* load in initial (unfactored row) */
523       nz    = ai[r[i]+1] - ai[r[i]];
524       ajtmp = aj + ai[r[i]];
525       v     = aa + ai[r[i]];
526       for (j=0; j<nz; j++) {
527         rtmp[ics[ajtmp[j]]] = v[j];
528       }
529       /* ZeropivotApply() */
530       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
531 
532       /* elimination */
533       bjtmp = bj + bi[i];
534       row   = *bjtmp++;
535       nzL   = bi[i+1] - bi[i];
536       for (k=0; k < nzL;k++) {
537         pc = rtmp + row;
538         if (*pc != 0.0) {
539           pv         = b->a + bdiag[row];
540           multiplier = *pc * (*pv);
541           *pc        = multiplier;
542           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
543           pv = b->a + bdiag[row+1]+1;
544           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
545           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
546           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
547         }
548         row = *bjtmp++;
549       }
550 
551       /* finished row so stick it into b->a */
552       rs = 0.0;
553       /* L part */
554       pv   = b->a + bi[i] ;
555       pj   = b->j + bi[i] ;
556       nz   = bi[i+1] - bi[i];
557       for (j=0; j<nz; j++) {
558         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
559       }
560 
561       /* U part */
562       pv = b->a + bdiag[i+1]+1;
563       pj = b->j + bdiag[i+1]+1;
564       nz = bdiag[i] - bdiag[i+1]-1;
565       for (j=0; j<nz; j++) {
566         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
567       }
568 
569       sctx.rs  = rs;
570       sctx.pv  = rtmp[i];
571       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
572       if (sctx.newshift) break; /* break for-loop */
573       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
574 
575       /* Mark diagonal and invert diagonal for simplier triangular solves */
576       pv  = b->a + bdiag[i];
577       *pv = 1.0/rtmp[i];
578 
579     } /* endof for (i=0; i<n; i++) { */
580 
581     /* MatPivotRefine() */
582     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
583       /*
584        * if no shift in this attempt & shifting & started shifting & can refine,
585        * then try lower shift
586        */
587       sctx.shift_hi       = sctx.shift_fraction;
588       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
589       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
590       sctx.newshift       = PETSC_TRUE;
591       sctx.nshift++;
592     }
593   } while (sctx.newshift);
594 
595   ierr = PetscFree(rtmp);CHKERRQ(ierr);
596   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
597   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
598 
599   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
600   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
601   if (row_identity && col_identity) {
602     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603   } else {
604     C->ops->solve = MatSolve_SeqAIJ;
605   }
606   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
607   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
608   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
609   C->ops->matsolve           = MatMatSolve_SeqAIJ;
610   C->assembled    = PETSC_TRUE;
611   C->preallocated = PETSC_TRUE;
612   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
613 
614   /* MatShiftView(A,info,&sctx) */
615   if (sctx.nshift) {
616     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
618     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
622     }
623   }
624   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
625   PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
630 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
631 {
632   Mat             C=B;
633   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
634   IS              isrow = b->row,isicol = b->icol;
635   PetscErrorCode  ierr;
636   const PetscInt   *r,*ic,*ics;
637   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
638   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
639   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
640   MatScalar       *pv,*rtmp,*pc,multiplier,d;
641   const MatScalar *v,*aa=a->a;
642   PetscReal       rs=0.0;
643   FactorShiftCtx  sctx;
644   const PetscInt  *ddiag;
645   PetscBool       row_identity, col_identity;
646 
647   PetscFunctionBegin;
648   /* MatPivotSetUp(): initialize shift context sctx */
649   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
650 
651   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
652     ddiag          = a->diag;
653     sctx.shift_top = info->zeropivot;
654     for (i=0; i<n; i++) {
655       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
656       d  = (aa)[ddiag[i]];
657       rs = -PetscAbsScalar(d) - PetscRealPart(d);
658       v  = aa+ai[i];
659       nz = ai[i+1] - ai[i];
660       for (j=0; j<nz; j++)
661         rs += PetscAbsScalar(v[j]);
662       if (rs>sctx.shift_top) sctx.shift_top = rs;
663     }
664     sctx.shift_top   *= 1.1;
665     sctx.nshift_max   = 5;
666     sctx.shift_lo     = 0.;
667     sctx.shift_hi     = 1.;
668   }
669 
670   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
671   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
672   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
673   ics  = ic;
674 
675   do {
676     sctx.newshift = PETSC_FALSE;
677     for (i=0; i<n; i++) {
678       nz    = bi[i+1] - bi[i];
679       bjtmp = bj + bi[i];
680       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
681 
682       /* load in initial (unfactored row) */
683       nz    = ai[r[i]+1] - ai[r[i]];
684       ajtmp = aj + ai[r[i]];
685       v     = aa + ai[r[i]];
686       for (j=0; j<nz; j++) {
687         rtmp[ics[ajtmp[j]]] = v[j];
688       }
689       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690 
691       row = *bjtmp++;
692       while  (row < i) {
693         pc = rtmp + row;
694         if (*pc != 0.0) {
695           pv         = b->a + diag_offset[row];
696           pj         = b->j + diag_offset[row] + 1;
697           multiplier = *pc / *pv++;
698           *pc        = multiplier;
699           nz         = bi[row+1] - diag_offset[row] - 1;
700           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
702         }
703         row = *bjtmp++;
704       }
705       /* finished row so stick it into b->a */
706       pv   = b->a + bi[i] ;
707       pj   = b->j + bi[i] ;
708       nz   = bi[i+1] - bi[i];
709       diag = diag_offset[i] - bi[i];
710       rs   = 0.0;
711       for (j=0; j<nz; j++) {
712         pv[j] = rtmp[pj[j]];
713         rs   += PetscAbsScalar(pv[j]);
714       }
715       rs   -= PetscAbsScalar(pv[diag]);
716 
717       sctx.rs  = rs;
718       sctx.pv  = pv[diag];
719       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
720       if (sctx.newshift) break;
721       pv[diag] = sctx.pv;
722     }
723 
724     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
725       /*
726        * if no shift in this attempt & shifting & started shifting & can refine,
727        * then try lower shift
728        */
729       sctx.shift_hi       = sctx.shift_fraction;
730       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
731       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
732       sctx.newshift       = PETSC_TRUE;
733       sctx.nshift++;
734     }
735   } while (sctx.newshift);
736 
737   /* invert diagonal entries for simplier triangular solves */
738   for (i=0; i<n; i++) {
739     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
740   }
741   ierr = PetscFree(rtmp);CHKERRQ(ierr);
742   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
743   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
744 
745   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
746   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
747   if (row_identity && col_identity) {
748     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749   } else {
750     C->ops->solve   = MatSolve_SeqAIJ_inplace;
751   }
752   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
753   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
754   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
755   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
756   C->assembled    = PETSC_TRUE;
757   C->preallocated = PETSC_TRUE;
758   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
759   if (sctx.nshift) {
760      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
761       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
762     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
763       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
764     }
765   }
766   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
767   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
768   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 /*
773    This routine implements inplace ILU(0) with row or/and column permutations.
774    Input:
775      A - original matrix
776    Output;
777      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
778          a->j (col index) is permuted by the inverse of colperm, then sorted
779          a->a reordered accordingly with a->j
780          a->diag (ptr to diagonal elements) is updated.
781 */
782 #undef __FUNCT__
783 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
784 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
785 {
786   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
787   IS             isrow = a->row,isicol = a->icol;
788   PetscErrorCode ierr;
789   const PetscInt *r,*ic,*ics;
790   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
791   PetscInt       *ajtmp,nz,row;
792   PetscInt       *diag = a->diag,nbdiag,*pj;
793   PetscScalar    *rtmp,*pc,multiplier,d;
794   MatScalar      *pv,*v;
795   PetscReal      rs;
796   FactorShiftCtx sctx;
797   const  MatScalar *aa=a->a,*vtmp;
798 
799   PetscFunctionBegin;
800   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
801 
802   /* MatPivotSetUp(): initialize shift context sctx */
803   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
804 
805   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
806     const PetscInt   *ddiag = a->diag;
807     sctx.shift_top = info->zeropivot;
808     for (i=0; i<n; i++) {
809       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
810       d  = (aa)[ddiag[i]];
811       rs = -PetscAbsScalar(d) - PetscRealPart(d);
812       vtmp  = aa+ai[i];
813       nz = ai[i+1] - ai[i];
814       for (j=0; j<nz; j++)
815         rs += PetscAbsScalar(vtmp[j]);
816       if (rs>sctx.shift_top) sctx.shift_top = rs;
817     }
818     sctx.shift_top   *= 1.1;
819     sctx.nshift_max   = 5;
820     sctx.shift_lo     = 0.;
821     sctx.shift_hi     = 1.;
822   }
823 
824   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
825   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
826   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
827   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
828   ics = ic;
829 
830 #if defined(MV)
831   sctx.shift_top      = 0.;
832   sctx.nshift_max     = 0;
833   sctx.shift_lo       = 0.;
834   sctx.shift_hi       = 0.;
835   sctx.shift_fraction = 0.;
836 
837   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
838     sctx.shift_top = 0.;
839     for (i=0; i<n; i++) {
840       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
841       d  = (a->a)[diag[i]];
842       rs = -PetscAbsScalar(d) - PetscRealPart(d);
843       v  = a->a+ai[i];
844       nz = ai[i+1] - ai[i];
845       for (j=0; j<nz; j++)
846         rs += PetscAbsScalar(v[j]);
847       if (rs>sctx.shift_top) sctx.shift_top = rs;
848     }
849     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
850     sctx.shift_top    *= 1.1;
851     sctx.nshift_max   = 5;
852     sctx.shift_lo     = 0.;
853     sctx.shift_hi     = 1.;
854   }
855 
856   sctx.shift_amount = 0.;
857   sctx.nshift       = 0;
858 #endif
859 
860   do {
861     sctx.newshift = PETSC_FALSE;
862     for (i=0; i<n; i++) {
863       /* load in initial unfactored row */
864       nz    = ai[r[i]+1] - ai[r[i]];
865       ajtmp = aj + ai[r[i]];
866       v     = a->a + ai[r[i]];
867       /* sort permuted ajtmp and values v accordingly */
868       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
869       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
870 
871       diag[r[i]] = ai[r[i]];
872       for (j=0; j<nz; j++) {
873         rtmp[ajtmp[j]] = v[j];
874         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
875       }
876       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
877 
878       row = *ajtmp++;
879       while  (row < i) {
880         pc = rtmp + row;
881         if (*pc != 0.0) {
882           pv         = a->a + diag[r[row]];
883           pj         = aj + diag[r[row]] + 1;
884 
885           multiplier = *pc / *pv++;
886           *pc        = multiplier;
887           nz         = ai[r[row]+1] - diag[r[row]] - 1;
888           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
889           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
890         }
891         row = *ajtmp++;
892       }
893       /* finished row so overwrite it onto a->a */
894       pv   = a->a + ai[r[i]] ;
895       pj   = aj + ai[r[i]] ;
896       nz   = ai[r[i]+1] - ai[r[i]];
897       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
898 
899       rs   = 0.0;
900       for (j=0; j<nz; j++) {
901         pv[j] = rtmp[pj[j]];
902         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
903       }
904 
905       sctx.rs  = rs;
906       sctx.pv  = pv[nbdiag];
907       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
908       if (sctx.newshift) break;
909       pv[nbdiag] = sctx.pv;
910     }
911 
912     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
913       /*
914        * if no shift in this attempt & shifting & started shifting & can refine,
915        * then try lower shift
916        */
917       sctx.shift_hi        = sctx.shift_fraction;
918       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
919       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
920       sctx.newshift         = PETSC_TRUE;
921       sctx.nshift++;
922     }
923   } while (sctx.newshift);
924 
925   /* invert diagonal entries for simplier triangular solves */
926   for (i=0; i<n; i++) {
927     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
928   }
929 
930   ierr = PetscFree(rtmp);CHKERRQ(ierr);
931   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
932   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
933   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
934   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
935   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
936   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
937   A->assembled = PETSC_TRUE;
938   A->preallocated = PETSC_TRUE;
939   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
940   if (sctx.nshift) {
941     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
942       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
943     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
944       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
945     }
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 /* ----------------------------------------------------------- */
951 #undef __FUNCT__
952 #define __FUNCT__ "MatLUFactor_SeqAIJ"
953 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
954 {
955   PetscErrorCode ierr;
956   Mat            C;
957 
958   PetscFunctionBegin;
959   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
960   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
961   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
962   A->ops->solve            = C->ops->solve;
963   A->ops->solvetranspose   = C->ops->solvetranspose;
964   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
965   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 /* ----------------------------------------------------------- */
969 
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
973 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
974 {
975   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
976   IS                iscol = a->col,isrow = a->row;
977   PetscErrorCode    ierr;
978   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
979   PetscInt          nz;
980   const PetscInt    *rout,*cout,*r,*c;
981   PetscScalar       *x,*tmp,*tmps,sum;
982   const PetscScalar *b;
983   const MatScalar   *aa = a->a,*v;
984 
985   PetscFunctionBegin;
986   if (!n) PetscFunctionReturn(0);
987 
988   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
989   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
990   tmp  = a->solve_work;
991 
992   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
993   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
994 
995   /* forward solve the lower triangular */
996   tmp[0] = b[*r++];
997   tmps   = tmp;
998   for (i=1; i<n; i++) {
999     v   = aa + ai[i] ;
1000     vi  = aj + ai[i] ;
1001     nz  = a->diag[i] - ai[i];
1002     sum = b[*r++];
1003     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1004     tmp[i] = sum;
1005   }
1006 
1007   /* backward solve the upper triangular */
1008   for (i=n-1; i>=0; i--) {
1009     v   = aa + a->diag[i] + 1;
1010     vi  = aj + a->diag[i] + 1;
1011     nz  = ai[i+1] - a->diag[i] - 1;
1012     sum = tmp[i];
1013     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1014     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1015   }
1016 
1017   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1018   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1019   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1020   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1021   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1027 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1028 {
1029   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1030   IS              iscol = a->col,isrow = a->row;
1031   PetscErrorCode  ierr;
1032   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1033   PetscInt        nz,neq;
1034   const PetscInt  *rout,*cout,*r,*c;
1035   PetscScalar     *x,*b,*tmp,*tmps,sum;
1036   const MatScalar *aa = a->a,*v;
1037   PetscBool       bisdense,xisdense;
1038 
1039   PetscFunctionBegin;
1040   if (!n) PetscFunctionReturn(0);
1041 
1042   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1043   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1044   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1045   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1046 
1047   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1048   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1049 
1050   tmp  = a->solve_work;
1051   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1052   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1053 
1054   for (neq=0; neq<B->cmap->n; neq++) {
1055     /* forward solve the lower triangular */
1056     tmp[0] = b[r[0]];
1057     tmps   = tmp;
1058     for (i=1; i<n; i++) {
1059       v   = aa + ai[i] ;
1060       vi  = aj + ai[i] ;
1061       nz  = a->diag[i] - ai[i];
1062       sum = b[r[i]];
1063       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1064       tmp[i] = sum;
1065     }
1066     /* backward solve the upper triangular */
1067     for (i=n-1; i>=0; i--) {
1068       v   = aa + a->diag[i] + 1;
1069       vi  = aj + a->diag[i] + 1;
1070       nz  = ai[i+1] - a->diag[i] - 1;
1071       sum = tmp[i];
1072       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1073       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1074     }
1075 
1076     b += n;
1077     x += n;
1078   }
1079   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1080   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1081   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1082   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1083   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1089 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1090 {
1091   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1092   IS              iscol = a->col,isrow = a->row;
1093   PetscErrorCode  ierr;
1094   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1095   PetscInt        nz,neq;
1096   const PetscInt  *rout,*cout,*r,*c;
1097   PetscScalar     *x,*b,*tmp,sum;
1098   const MatScalar *aa = a->a,*v;
1099   PetscBool       bisdense,xisdense;
1100 
1101   PetscFunctionBegin;
1102   if (!n) PetscFunctionReturn(0);
1103 
1104   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1105   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1106   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1107   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1108 
1109   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1110   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1111 
1112   tmp  = a->solve_work;
1113   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1114   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1115 
1116   for (neq=0; neq<B->cmap->n; neq++) {
1117     /* forward solve the lower triangular */
1118     tmp[0] = b[r[0]];
1119     v      = aa;
1120     vi     = aj;
1121     for (i=1; i<n; i++) {
1122       nz  = ai[i+1] - ai[i];
1123       sum = b[r[i]];
1124       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1125       tmp[i] = sum;
1126       v += nz; vi += nz;
1127     }
1128 
1129     /* backward solve the upper triangular */
1130     for (i=n-1; i>=0; i--) {
1131       v   = aa + adiag[i+1]+1;
1132       vi  = aj + adiag[i+1]+1;
1133       nz  = adiag[i]-adiag[i+1]-1;
1134       sum = tmp[i];
1135       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1136       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1137     }
1138 
1139     b += n;
1140     x += n;
1141   }
1142   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1143   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1144   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1145   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1146   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1152 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1153 {
1154   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1155   IS              iscol = a->col,isrow = a->row;
1156   PetscErrorCode  ierr;
1157   const PetscInt  *r,*c,*rout,*cout;
1158   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1159   PetscInt        nz,row;
1160   PetscScalar     *x,*b,*tmp,*tmps,sum;
1161   const MatScalar *aa = a->a,*v;
1162 
1163   PetscFunctionBegin;
1164   if (!n) PetscFunctionReturn(0);
1165 
1166   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1168   tmp  = a->solve_work;
1169 
1170   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1171   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1172 
1173   /* forward solve the lower triangular */
1174   tmp[0] = b[*r++];
1175   tmps   = tmp;
1176   for (row=1; row<n; row++) {
1177     i   = rout[row]; /* permuted row */
1178     v   = aa + ai[i] ;
1179     vi  = aj + ai[i] ;
1180     nz  = a->diag[i] - ai[i];
1181     sum = b[*r++];
1182     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1183     tmp[row] = sum;
1184   }
1185 
1186   /* backward solve the upper triangular */
1187   for (row=n-1; row>=0; row--) {
1188     i   = rout[row]; /* permuted row */
1189     v   = aa + a->diag[i] + 1;
1190     vi  = aj + a->diag[i] + 1;
1191     nz  = ai[i+1] - a->diag[i] - 1;
1192     sum = tmp[row];
1193     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1194     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1195   }
1196 
1197   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1198   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1201   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 /* ----------------------------------------------------------- */
1206 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1209 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1210 {
1211   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1212   PetscErrorCode    ierr;
1213   PetscInt          n = A->rmap->n;
1214   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1215   PetscScalar       *x;
1216   const PetscScalar *b;
1217   const MatScalar   *aa = a->a;
1218 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1219   PetscInt          adiag_i,i,nz,ai_i;
1220   const PetscInt    *vi;
1221   const MatScalar   *v;
1222   PetscScalar       sum;
1223 #endif
1224 
1225   PetscFunctionBegin;
1226   if (!n) PetscFunctionReturn(0);
1227 
1228   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1229   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1230 
1231 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1232   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1233 #else
1234   /* forward solve the lower triangular */
1235   x[0] = b[0];
1236   for (i=1; i<n; i++) {
1237     ai_i = ai[i];
1238     v    = aa + ai_i;
1239     vi   = aj + ai_i;
1240     nz   = adiag[i] - ai_i;
1241     sum  = b[i];
1242     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1243     x[i] = sum;
1244   }
1245 
1246   /* backward solve the upper triangular */
1247   for (i=n-1; i>=0; i--) {
1248     adiag_i = adiag[i];
1249     v       = aa + adiag_i + 1;
1250     vi      = aj + adiag_i + 1;
1251     nz      = ai[i+1] - adiag_i - 1;
1252     sum     = x[i];
1253     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1254     x[i]    = sum*aa[adiag_i];
1255   }
1256 #endif
1257   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1258   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1259   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1265 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1266 {
1267   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1268   IS                iscol = a->col,isrow = a->row;
1269   PetscErrorCode    ierr;
1270   PetscInt          i, n = A->rmap->n,j;
1271   PetscInt          nz;
1272   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1273   PetscScalar       *x,*tmp,sum;
1274   const PetscScalar *b;
1275   const MatScalar   *aa = a->a,*v;
1276 
1277   PetscFunctionBegin;
1278   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1279 
1280   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1281   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1282   tmp  = a->solve_work;
1283 
1284   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1285   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1286 
1287   /* forward solve the lower triangular */
1288   tmp[0] = b[*r++];
1289   for (i=1; i<n; i++) {
1290     v   = aa + ai[i] ;
1291     vi  = aj + ai[i] ;
1292     nz  = a->diag[i] - ai[i];
1293     sum = b[*r++];
1294     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1295     tmp[i] = sum;
1296   }
1297 
1298   /* backward solve the upper triangular */
1299   for (i=n-1; i>=0; i--) {
1300     v   = aa + a->diag[i] + 1;
1301     vi  = aj + a->diag[i] + 1;
1302     nz  = ai[i+1] - a->diag[i] - 1;
1303     sum = tmp[i];
1304     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1305     tmp[i] = sum*aa[a->diag[i]];
1306     x[*c--] += tmp[i];
1307   }
1308 
1309   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1310   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1311   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1312   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1313   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1319 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1320 {
1321   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1322   IS                iscol = a->col,isrow = a->row;
1323   PetscErrorCode    ierr;
1324   PetscInt          i, n = A->rmap->n,j;
1325   PetscInt          nz;
1326   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1327   PetscScalar       *x,*tmp,sum;
1328   const PetscScalar *b;
1329   const MatScalar   *aa = a->a,*v;
1330 
1331   PetscFunctionBegin;
1332   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1333 
1334   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1335   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1336   tmp  = a->solve_work;
1337 
1338   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1339   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1340 
1341   /* forward solve the lower triangular */
1342   tmp[0] = b[r[0]];
1343   v      = aa;
1344   vi     = aj;
1345   for (i=1; i<n; i++) {
1346     nz  = ai[i+1] - ai[i];
1347     sum = b[r[i]];
1348     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1349     tmp[i] = sum;
1350     v += nz; vi += nz;
1351   }
1352 
1353   /* backward solve the upper triangular */
1354   v  = aa + adiag[n-1];
1355   vi = aj + adiag[n-1];
1356   for (i=n-1; i>=0; i--) {
1357     nz  = adiag[i] - adiag[i+1] - 1;
1358     sum = tmp[i];
1359     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1360     tmp[i] = sum*v[nz];
1361     x[c[i]] += tmp[i];
1362     v += nz+1; vi += nz+1;
1363   }
1364 
1365   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1366   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1367   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1369   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1375 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1376 {
1377   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1378   IS                iscol = a->col,isrow = a->row;
1379   PetscErrorCode    ierr;
1380   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1381   PetscInt          i,n = A->rmap->n,j;
1382   PetscInt          nz;
1383   PetscScalar       *x,*tmp,s1;
1384   const MatScalar   *aa = a->a,*v;
1385   const PetscScalar *b;
1386 
1387   PetscFunctionBegin;
1388   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1389   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1390   tmp  = a->solve_work;
1391 
1392   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1393   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1394 
1395   /* copy the b into temp work space according to permutation */
1396   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1397 
1398   /* forward solve the U^T */
1399   for (i=0; i<n; i++) {
1400     v   = aa + diag[i] ;
1401     vi  = aj + diag[i] + 1;
1402     nz  = ai[i+1] - diag[i] - 1;
1403     s1  = tmp[i];
1404     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1405     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1406     tmp[i] = s1;
1407   }
1408 
1409   /* backward solve the L^T */
1410   for (i=n-1; i>=0; i--) {
1411     v   = aa + diag[i] - 1 ;
1412     vi  = aj + diag[i] - 1 ;
1413     nz  = diag[i] - ai[i];
1414     s1  = tmp[i];
1415     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1416   }
1417 
1418   /* copy tmp into x according to permutation */
1419   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1420 
1421   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1422   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1423   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1425 
1426   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1432 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1433 {
1434   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1435   IS                iscol = a->col,isrow = a->row;
1436   PetscErrorCode    ierr;
1437   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1438   PetscInt          i,n = A->rmap->n,j;
1439   PetscInt          nz;
1440   PetscScalar       *x,*tmp,s1;
1441   const MatScalar   *aa = a->a,*v;
1442   const PetscScalar *b;
1443 
1444   PetscFunctionBegin;
1445   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1446   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1447   tmp  = a->solve_work;
1448 
1449   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1450   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1451 
1452   /* copy the b into temp work space according to permutation */
1453   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1454 
1455   /* forward solve the U^T */
1456   for (i=0; i<n; i++) {
1457     v   = aa + adiag[i+1] + 1;
1458     vi  = aj + adiag[i+1] + 1;
1459     nz  = adiag[i] - adiag[i+1] - 1;
1460     s1  = tmp[i];
1461     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1462     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1463     tmp[i] = s1;
1464   }
1465 
1466   /* backward solve the L^T */
1467   for (i=n-1; i>=0; i--) {
1468     v   = aa + ai[i];
1469     vi  = aj + ai[i];
1470     nz  = ai[i+1] - ai[i];
1471     s1  = tmp[i];
1472     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1473   }
1474 
1475   /* copy tmp into x according to permutation */
1476   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1477 
1478   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1479   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1480   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1481   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1482 
1483   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1489 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1490 {
1491   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1492   IS                iscol = a->col,isrow = a->row;
1493   PetscErrorCode    ierr;
1494   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1495   PetscInt          i,n = A->rmap->n,j;
1496   PetscInt          nz;
1497   PetscScalar       *x,*tmp,s1;
1498   const MatScalar   *aa = a->a,*v;
1499   const PetscScalar *b;
1500 
1501   PetscFunctionBegin;
1502   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1503   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1504   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1505   tmp  = a->solve_work;
1506 
1507   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1508   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1509 
1510   /* copy the b into temp work space according to permutation */
1511   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1512 
1513   /* forward solve the U^T */
1514   for (i=0; i<n; i++) {
1515     v   = aa + diag[i] ;
1516     vi  = aj + diag[i] + 1;
1517     nz  = ai[i+1] - diag[i] - 1;
1518     s1  = tmp[i];
1519     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1520     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1521     tmp[i] = s1;
1522   }
1523 
1524   /* backward solve the L^T */
1525   for (i=n-1; i>=0; i--) {
1526     v   = aa + diag[i] - 1 ;
1527     vi  = aj + diag[i] - 1 ;
1528     nz  = diag[i] - ai[i];
1529     s1  = tmp[i];
1530     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1531   }
1532 
1533   /* copy tmp into x according to permutation */
1534   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1535 
1536   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1537   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1538   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1539   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1540 
1541   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1547 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1548 {
1549   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1550   IS                iscol = a->col,isrow = a->row;
1551   PetscErrorCode    ierr;
1552   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1553   PetscInt          i,n = A->rmap->n,j;
1554   PetscInt          nz;
1555   PetscScalar       *x,*tmp,s1;
1556   const MatScalar   *aa = a->a,*v;
1557   const PetscScalar *b;
1558 
1559   PetscFunctionBegin;
1560   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1561   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1562   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1563   tmp  = a->solve_work;
1564 
1565   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1566   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1567 
1568   /* copy the b into temp work space according to permutation */
1569   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1570 
1571   /* forward solve the U^T */
1572   for (i=0; i<n; i++) {
1573     v   = aa + adiag[i+1] + 1;
1574     vi  = aj + adiag[i+1] + 1;
1575     nz  = adiag[i] - adiag[i+1] - 1;
1576     s1  = tmp[i];
1577     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1578     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1579     tmp[i] = s1;
1580   }
1581 
1582 
1583   /* backward solve the L^T */
1584   for (i=n-1; i>=0; i--) {
1585     v   = aa + ai[i] ;
1586     vi  = aj + ai[i];
1587     nz  = ai[i+1] - ai[i];
1588     s1  = tmp[i];
1589     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1590   }
1591 
1592   /* copy tmp into x according to permutation */
1593   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1594 
1595   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1596   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1597   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1598   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1599 
1600   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 /* ----------------------------------------------------------------*/
1605 
1606 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1607 
1608 /*
1609    ilu() under revised new data structure.
1610    Factored arrays bj and ba are stored as
1611      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1612 
1613    bi=fact->i is an array of size n+1, in which
1614    bi+
1615      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1616      bi[n]:  points to L(n-1,n-1)+1
1617 
1618   bdiag=fact->diag is an array of size n+1,in which
1619      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1620      bdiag[n]: points to entry of U(n-1,0)-1
1621 
1622    U(i,:) contains bdiag[i] as its last entry, i.e.,
1623     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1624 */
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1627 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1628 {
1629 
1630   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1631   PetscErrorCode     ierr;
1632   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1633   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1634   PetscBool          missing;
1635   IS                 isicol;
1636 
1637   PetscFunctionBegin;
1638   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1639   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1640   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1641   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1642   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1643   b    = (Mat_SeqAIJ*)(fact)->data;
1644 
1645   /* allocate matrix arrays for new data structure */
1646   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1647   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1648   b->singlemalloc = PETSC_TRUE;
1649   if (!b->diag) {
1650     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1651     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1652   }
1653   bdiag = b->diag;
1654 
1655   if (n > 0) {
1656     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1657   }
1658 
1659   /* set bi and bj with new data structure */
1660   bi = b->i;
1661   bj = b->j;
1662 
1663   /* L part */
1664   bi[0] = 0;
1665   for (i=0; i<n; i++) {
1666     nz = adiag[i] - ai[i];
1667     bi[i+1] = bi[i] + nz;
1668     aj = a->j + ai[i];
1669     for (j=0; j<nz; j++) {
1670       /*   *bj = aj[j]; bj++; */
1671       bj[k++] = aj[j];
1672     }
1673   }
1674 
1675   /* U part */
1676   bdiag[n] = bi[n]-1;
1677   for (i=n-1; i>=0; i--) {
1678     nz = ai[i+1] - adiag[i] - 1;
1679     aj = a->j + adiag[i] + 1;
1680     for (j=0; j<nz; j++) {
1681       /*      *bj = aj[j]; bj++; */
1682       bj[k++] = aj[j];
1683     }
1684     /* diag[i] */
1685     /*    *bj = i; bj++; */
1686     bj[k++] = i;
1687     bdiag[i] = bdiag[i+1] + nz + 1;
1688   }
1689 
1690   fact->factortype             = MAT_FACTOR_ILU;
1691   fact->info.factor_mallocs    = 0;
1692   fact->info.fill_ratio_given  = info->fill;
1693   fact->info.fill_ratio_needed = 1.0;
1694   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1695 
1696   b       = (Mat_SeqAIJ*)(fact)->data;
1697   b->row  = isrow;
1698   b->col  = iscol;
1699   b->icol = isicol;
1700   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1701   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1702   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1708 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1709 {
1710   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1711   IS                 isicol;
1712   PetscErrorCode     ierr;
1713   const PetscInt     *r,*ic;
1714   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1715   PetscInt           *bi,*cols,nnz,*cols_lvl;
1716   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1717   PetscInt           i,levels,diagonal_fill;
1718   PetscBool          col_identity,row_identity;
1719   PetscReal          f;
1720   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1721   PetscBT            lnkbt;
1722   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1723   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1724   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1725 
1726   PetscFunctionBegin;
1727   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1728   /*
1729   PetscBool          olddatastruct=PETSC_FALSE;
1730   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1731   if (olddatastruct) {
1732     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1733     PetscFunctionReturn(0);
1734   }
1735   */
1736 
1737   levels = (PetscInt)info->levels;
1738   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1739   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1740   if (!levels && row_identity && col_identity) {
1741     /* special case: ilu(0) with natural ordering */
1742     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1743     if (a->inode.size) {
1744       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1745     }
1746     PetscFunctionReturn(0);
1747   }
1748 
1749   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1750   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1751   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1752   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1753 
1754   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1756   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1757   bi[0] = bdiag[0] = 0;
1758   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1759 
1760   /* create a linked list for storing column indices of the active row */
1761   nlnk = n + 1;
1762   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1763 
1764   /* initial FreeSpace size is f*(ai[n]+1) */
1765   f             = info->fill;
1766   diagonal_fill = (PetscInt)info->diagonal_fill;
1767   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1768   current_space = free_space;
1769   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1770   current_space_lvl = free_space_lvl;
1771   for (i=0; i<n; i++) {
1772     nzi = 0;
1773     /* copy current row into linked list */
1774     nnz  = ai[r[i]+1] - ai[r[i]];
1775     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1776     cols = aj + ai[r[i]];
1777     lnk[i] = -1; /* marker to indicate if diagonal exists */
1778     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1779     nzi += nlnk;
1780 
1781     /* make sure diagonal entry is included */
1782     if (diagonal_fill && lnk[i] == -1) {
1783       fm = n;
1784       while (lnk[fm] < i) fm = lnk[fm];
1785       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1786       lnk[fm]    = i;
1787       lnk_lvl[i] = 0;
1788       nzi++; dcount++;
1789     }
1790 
1791     /* add pivot rows into the active row */
1792     nzbd = 0;
1793     prow = lnk[n];
1794     while (prow < i) {
1795       nnz      = bdiag[prow];
1796       cols     = bj_ptr[prow] + nnz + 1;
1797       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1798       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1799       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1800       nzi += nlnk;
1801       prow = lnk[prow];
1802       nzbd++;
1803     }
1804     bdiag[i] = nzbd;
1805     bi[i+1]  = bi[i] + nzi;
1806     /* if free space is not available, make more free space */
1807     if (current_space->local_remaining<nzi) {
1808       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1809       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1810       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1811       reallocs++;
1812     }
1813 
1814     /* copy data into free_space and free_space_lvl, then initialize lnk */
1815     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1816     bj_ptr[i]    = current_space->array;
1817     bjlvl_ptr[i] = current_space_lvl->array;
1818 
1819     /* make sure the active row i has diagonal entry */
1820     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1821 
1822     current_space->array           += nzi;
1823     current_space->local_used      += nzi;
1824     current_space->local_remaining -= nzi;
1825     current_space_lvl->array           += nzi;
1826     current_space_lvl->local_used      += nzi;
1827     current_space_lvl->local_remaining -= nzi;
1828   }
1829 
1830   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1831   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1832   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1833   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1834   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1835 
1836   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1837   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1838   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1839 
1840 #if defined(PETSC_USE_INFO)
1841   {
1842     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1843     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1844     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1845     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1846     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1847     if (diagonal_fill) {
1848       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1849     }
1850   }
1851 #endif
1852   /* put together the new matrix */
1853   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1854   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1855   b = (Mat_SeqAIJ*)(fact)->data;
1856   b->free_a       = PETSC_TRUE;
1857   b->free_ij      = PETSC_TRUE;
1858   b->singlemalloc = PETSC_FALSE;
1859   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1860   b->j          = bj;
1861   b->i          = bi;
1862   b->diag       = bdiag;
1863   b->ilen       = 0;
1864   b->imax       = 0;
1865   b->row        = isrow;
1866   b->col        = iscol;
1867   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1868   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1869   b->icol       = isicol;
1870   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1871   /* In b structure:  Free imax, ilen, old a, old j.
1872      Allocate bdiag, solve_work, new a, new j */
1873   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1874   b->maxnz = b->nz = bdiag[0]+1;
1875   (fact)->info.factor_mallocs    = reallocs;
1876   (fact)->info.fill_ratio_given  = f;
1877   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1878   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1879   if (a->inode.size) {
1880     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1887 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1888 {
1889   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1890   IS                 isicol;
1891   PetscErrorCode     ierr;
1892   const PetscInt     *r,*ic;
1893   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1894   PetscInt           *bi,*cols,nnz,*cols_lvl;
1895   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1896   PetscInt           i,levels,diagonal_fill;
1897   PetscBool          col_identity,row_identity;
1898   PetscReal          f;
1899   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1900   PetscBT            lnkbt;
1901   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1902   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1903   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1904   PetscBool          missing;
1905 
1906   PetscFunctionBegin;
1907   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1908   f             = info->fill;
1909   levels        = (PetscInt)info->levels;
1910   diagonal_fill = (PetscInt)info->diagonal_fill;
1911   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1912 
1913   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1914   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1915   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1916     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1917     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1918     if (a->inode.size) {
1919       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1920     }
1921     fact->factortype = MAT_FACTOR_ILU;
1922     (fact)->info.factor_mallocs    = 0;
1923     (fact)->info.fill_ratio_given  = info->fill;
1924     (fact)->info.fill_ratio_needed = 1.0;
1925     b               = (Mat_SeqAIJ*)(fact)->data;
1926     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1927     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1928     b->row              = isrow;
1929     b->col              = iscol;
1930     b->icol             = isicol;
1931     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1932     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1933     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1934     PetscFunctionReturn(0);
1935   }
1936 
1937   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1938   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1939 
1940   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1941   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1942   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1943   bi[0] = bdiag[0] = 0;
1944 
1945   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1946 
1947   /* create a linked list for storing column indices of the active row */
1948   nlnk = n + 1;
1949   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1950 
1951   /* initial FreeSpace size is f*(ai[n]+1) */
1952   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1953   current_space = free_space;
1954   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1955   current_space_lvl = free_space_lvl;
1956 
1957   for (i=0; i<n; i++) {
1958     nzi = 0;
1959     /* copy current row into linked list */
1960     nnz  = ai[r[i]+1] - ai[r[i]];
1961     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1962     cols = aj + ai[r[i]];
1963     lnk[i] = -1; /* marker to indicate if diagonal exists */
1964     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1965     nzi += nlnk;
1966 
1967     /* make sure diagonal entry is included */
1968     if (diagonal_fill && lnk[i] == -1) {
1969       fm = n;
1970       while (lnk[fm] < i) fm = lnk[fm];
1971       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1972       lnk[fm]    = i;
1973       lnk_lvl[i] = 0;
1974       nzi++; dcount++;
1975     }
1976 
1977     /* add pivot rows into the active row */
1978     nzbd = 0;
1979     prow = lnk[n];
1980     while (prow < i) {
1981       nnz      = bdiag[prow];
1982       cols     = bj_ptr[prow] + nnz + 1;
1983       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1984       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1985       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1986       nzi += nlnk;
1987       prow = lnk[prow];
1988       nzbd++;
1989     }
1990     bdiag[i] = nzbd;
1991     bi[i+1]  = bi[i] + nzi;
1992 
1993     /* if free space is not available, make more free space */
1994     if (current_space->local_remaining<nzi) {
1995       nnz = nzi*(n - i); /* estimated and max additional space needed */
1996       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1997       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1998       reallocs++;
1999     }
2000 
2001     /* copy data into free_space and free_space_lvl, then initialize lnk */
2002     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2003     bj_ptr[i]    = current_space->array;
2004     bjlvl_ptr[i] = current_space_lvl->array;
2005 
2006     /* make sure the active row i has diagonal entry */
2007     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2008 
2009     current_space->array           += nzi;
2010     current_space->local_used      += nzi;
2011     current_space->local_remaining -= nzi;
2012     current_space_lvl->array           += nzi;
2013     current_space_lvl->local_used      += nzi;
2014     current_space_lvl->local_remaining -= nzi;
2015   }
2016 
2017   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2018   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2019 
2020   /* destroy list of free space and other temporary arrays */
2021   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2022   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2023   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2024   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2025   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2026 
2027 #if defined(PETSC_USE_INFO)
2028   {
2029     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2030     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2031     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2032     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2033     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2034     if (diagonal_fill) {
2035       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2036     }
2037   }
2038 #endif
2039 
2040   /* put together the new matrix */
2041   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2042   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2043   b = (Mat_SeqAIJ*)(fact)->data;
2044   b->free_a       = PETSC_TRUE;
2045   b->free_ij      = PETSC_TRUE;
2046   b->singlemalloc = PETSC_FALSE;
2047   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2048   b->j          = bj;
2049   b->i          = bi;
2050   for (i=0; i<n; i++) bdiag[i] += bi[i];
2051   b->diag       = bdiag;
2052   b->ilen       = 0;
2053   b->imax       = 0;
2054   b->row        = isrow;
2055   b->col        = iscol;
2056   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2057   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2058   b->icol       = isicol;
2059   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2060   /* In b structure:  Free imax, ilen, old a, old j.
2061      Allocate bdiag, solve_work, new a, new j */
2062   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2063   b->maxnz             = b->nz = bi[n] ;
2064   (fact)->info.factor_mallocs    = reallocs;
2065   (fact)->info.fill_ratio_given  = f;
2066   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2067   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2068   if (a->inode.size) {
2069     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2076 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2077 {
2078   Mat            C = B;
2079   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2080   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2081   IS             ip=b->row,iip = b->icol;
2082   PetscErrorCode ierr;
2083   const PetscInt *rip,*riip;
2084   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2085   PetscInt       *ai=a->i,*aj=a->j;
2086   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2087   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2088   PetscBool      perm_identity;
2089   FactorShiftCtx sctx;
2090   PetscReal      rs;
2091   MatScalar      d,*v;
2092 
2093   PetscFunctionBegin;
2094   /* MatPivotSetUp(): initialize shift context sctx */
2095   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2096 
2097   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2098     sctx.shift_top = info->zeropivot;
2099     for (i=0; i<mbs; i++) {
2100       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2101       d  = (aa)[a->diag[i]];
2102       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2103       v  = aa+ai[i];
2104       nz = ai[i+1] - ai[i];
2105       for (j=0; j<nz; j++)
2106         rs += PetscAbsScalar(v[j]);
2107       if (rs>sctx.shift_top) sctx.shift_top = rs;
2108     }
2109     sctx.shift_top   *= 1.1;
2110     sctx.nshift_max   = 5;
2111     sctx.shift_lo     = 0.;
2112     sctx.shift_hi     = 1.;
2113   }
2114 
2115   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2116   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2117 
2118   /* allocate working arrays
2119      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2120      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2121   */
2122   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2123 
2124   do {
2125     sctx.newshift = PETSC_FALSE;
2126 
2127     for (i=0; i<mbs; i++) c2r[i] = mbs;
2128     if (mbs) il[0] = 0;
2129 
2130     for (k = 0; k<mbs; k++) {
2131       /* zero rtmp */
2132       nz = bi[k+1] - bi[k];
2133       bjtmp = bj + bi[k];
2134       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2135 
2136       /* load in initial unfactored row */
2137       bval = ba + bi[k];
2138       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2139       for (j = jmin; j < jmax; j++) {
2140         col = riip[aj[j]];
2141         if (col >= k) { /* only take upper triangular entry */
2142           rtmp[col] = aa[j];
2143           *bval++   = 0.0; /* for in-place factorization */
2144         }
2145       }
2146       /* shift the diagonal of the matrix: ZeropivotApply() */
2147       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2148 
2149       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2150       dk = rtmp[k];
2151       i  = c2r[k]; /* first row to be added to k_th row  */
2152 
2153       while (i < k) {
2154         nexti = c2r[i]; /* next row to be added to k_th row */
2155 
2156         /* compute multiplier, update diag(k) and U(i,k) */
2157         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2158         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2159         dk   += uikdi*ba[ili]; /* update diag[k] */
2160         ba[ili] = uikdi; /* -U(i,k) */
2161 
2162         /* add multiple of row i to k-th row */
2163         jmin = ili + 1; jmax = bi[i+1];
2164         if (jmin < jmax) {
2165           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2166           /* update il and c2r for row i */
2167           il[i] = jmin;
2168           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2169         }
2170         i = nexti;
2171       }
2172 
2173       /* copy data into U(k,:) */
2174       rs   = 0.0;
2175       jmin = bi[k]; jmax = bi[k+1]-1;
2176       if (jmin < jmax) {
2177         for (j=jmin; j<jmax; j++) {
2178           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2179         }
2180         /* add the k-th row into il and c2r */
2181         il[k] = jmin;
2182         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2183       }
2184 
2185       /* MatPivotCheck() */
2186       sctx.rs  = rs;
2187       sctx.pv  = dk;
2188       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2189       if (sctx.newshift) break;
2190       dk = sctx.pv;
2191 
2192       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2193     }
2194   } while (sctx.newshift);
2195 
2196   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2197   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2198   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2199 
2200   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2201   if (perm_identity) {
2202     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2203     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2204     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2205     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2206   } else {
2207     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2208     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2209     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2210     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2211   }
2212 
2213   C->assembled    = PETSC_TRUE;
2214   C->preallocated = PETSC_TRUE;
2215   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2216 
2217   /* MatPivotView() */
2218   if (sctx.nshift) {
2219     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2220       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2221     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2222       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2223     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2224       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2225     }
2226   }
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2232 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2233 {
2234   Mat            C = B;
2235   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2236   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2237   IS             ip=b->row,iip = b->icol;
2238   PetscErrorCode ierr;
2239   const PetscInt *rip,*riip;
2240   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2241   PetscInt       *ai=a->i,*aj=a->j;
2242   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2243   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2244   PetscBool      perm_identity;
2245   FactorShiftCtx sctx;
2246   PetscReal      rs;
2247   MatScalar      d,*v;
2248 
2249   PetscFunctionBegin;
2250   /* MatPivotSetUp(): initialize shift context sctx */
2251   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2252 
2253   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2254     sctx.shift_top = info->zeropivot;
2255     for (i=0; i<mbs; i++) {
2256       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2257       d  = (aa)[a->diag[i]];
2258       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2259       v  = aa+ai[i];
2260       nz = ai[i+1] - ai[i];
2261       for (j=0; j<nz; j++)
2262         rs += PetscAbsScalar(v[j]);
2263       if (rs>sctx.shift_top) sctx.shift_top = rs;
2264     }
2265     sctx.shift_top   *= 1.1;
2266     sctx.nshift_max   = 5;
2267     sctx.shift_lo     = 0.;
2268     sctx.shift_hi     = 1.;
2269   }
2270 
2271   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2272   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2273 
2274   /* initialization */
2275   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2276 
2277   do {
2278     sctx.newshift = PETSC_FALSE;
2279 
2280     for (i=0; i<mbs; i++) jl[i] = mbs;
2281     il[0] = 0;
2282 
2283     for (k = 0; k<mbs; k++) {
2284       /* zero rtmp */
2285       nz = bi[k+1] - bi[k];
2286       bjtmp = bj + bi[k];
2287       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2288 
2289       bval = ba + bi[k];
2290       /* initialize k-th row by the perm[k]-th row of A */
2291       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2292       for (j = jmin; j < jmax; j++) {
2293         col = riip[aj[j]];
2294         if (col >= k) { /* only take upper triangular entry */
2295           rtmp[col] = aa[j];
2296           *bval++  = 0.0; /* for in-place factorization */
2297         }
2298       }
2299       /* shift the diagonal of the matrix */
2300       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2301 
2302       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2303       dk = rtmp[k];
2304       i = jl[k]; /* first row to be added to k_th row  */
2305 
2306       while (i < k) {
2307         nexti = jl[i]; /* next row to be added to k_th row */
2308 
2309         /* compute multiplier, update diag(k) and U(i,k) */
2310         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2311         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2312         dk += uikdi*ba[ili];
2313         ba[ili] = uikdi; /* -U(i,k) */
2314 
2315         /* add multiple of row i to k-th row */
2316         jmin = ili + 1; jmax = bi[i+1];
2317         if (jmin < jmax) {
2318           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2319           /* update il and jl for row i */
2320           il[i] = jmin;
2321           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2322         }
2323         i = nexti;
2324       }
2325 
2326       /* shift the diagonals when zero pivot is detected */
2327       /* compute rs=sum of abs(off-diagonal) */
2328       rs   = 0.0;
2329       jmin = bi[k]+1;
2330       nz   = bi[k+1] - jmin;
2331       bcol = bj + jmin;
2332       for (j=0; j<nz; j++) {
2333         rs += PetscAbsScalar(rtmp[bcol[j]]);
2334       }
2335 
2336       sctx.rs = rs;
2337       sctx.pv = dk;
2338       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2339       if (sctx.newshift) break;
2340       dk = sctx.pv;
2341 
2342       /* copy data into U(k,:) */
2343       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2344       jmin = bi[k]+1; jmax = bi[k+1];
2345       if (jmin < jmax) {
2346         for (j=jmin; j<jmax; j++) {
2347           col = bj[j]; ba[j] = rtmp[col];
2348         }
2349         /* add the k-th row into il and jl */
2350         il[k] = jmin;
2351         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2352       }
2353     }
2354   } while (sctx.newshift);
2355 
2356   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2357   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2358   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2359 
2360   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2361   if (perm_identity) {
2362     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2363     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2364     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2365     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366   } else {
2367     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2368     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2369     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2370     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2371   }
2372 
2373   C->assembled    = PETSC_TRUE;
2374   C->preallocated = PETSC_TRUE;
2375   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2376   if (sctx.nshift) {
2377     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2378       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2379     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2380       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2381     }
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 /*
2387    icc() under revised new data structure.
2388    Factored arrays bj and ba are stored as
2389      U(0,:),...,U(i,:),U(n-1,:)
2390 
2391    ui=fact->i is an array of size n+1, in which
2392    ui+
2393      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2394      ui[n]:  points to U(n-1,n-1)+1
2395 
2396   udiag=fact->diag is an array of size n,in which
2397      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2398 
2399    U(i,:) contains udiag[i] as its last entry, i.e.,
2400     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2401 */
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2405 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2406 {
2407   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2408   Mat_SeqSBAIJ       *b;
2409   PetscErrorCode     ierr;
2410   PetscBool          perm_identity,missing;
2411   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2412   const PetscInt     *rip,*riip;
2413   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2414   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2415   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2416   PetscReal          fill=info->fill,levels=info->levels;
2417   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2418   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2419   PetscBT            lnkbt;
2420   IS                 iperm;
2421 
2422   PetscFunctionBegin;
2423   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2424   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2425   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2426   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2427   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2428 
2429   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2430   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2431   ui[0] = 0;
2432 
2433   /* ICC(0) without matrix ordering: simply rearrange column indices */
2434   if (!levels && perm_identity) {
2435     for (i=0; i<am; i++) {
2436       ncols    = ai[i+1] - a->diag[i];
2437       ui[i+1]  = ui[i] + ncols;
2438       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2439     }
2440     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2441     cols = uj;
2442     for (i=0; i<am; i++) {
2443       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2444       ncols = ai[i+1] - a->diag[i] -1;
2445       for (j=0; j<ncols; j++) *cols++ = aj[j];
2446       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2447     }
2448   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2449     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2450     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2451 
2452     /* initialization */
2453     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2454 
2455     /* jl: linked list for storing indices of the pivot rows
2456        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2457     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2458     for (i=0; i<am; i++) {
2459       jl[i] = am; il[i] = 0;
2460     }
2461 
2462     /* create and initialize a linked list for storing column indices of the active row k */
2463     nlnk = am + 1;
2464     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2465 
2466     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2467     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2468     current_space = free_space;
2469     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2470     current_space_lvl = free_space_lvl;
2471 
2472     for (k=0; k<am; k++) {  /* for each active row k */
2473       /* initialize lnk by the column indices of row rip[k] of A */
2474       nzk   = 0;
2475       ncols = ai[rip[k]+1] - ai[rip[k]];
2476       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2477       ncols_upper = 0;
2478       for (j=0; j<ncols; j++) {
2479         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2480         if (riip[i] >= k) { /* only take upper triangular entry */
2481           ajtmp[ncols_upper] = i;
2482           ncols_upper++;
2483         }
2484       }
2485       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2486       nzk += nlnk;
2487 
2488       /* update lnk by computing fill-in for each pivot row to be merged in */
2489       prow = jl[k]; /* 1st pivot row */
2490 
2491       while (prow < k) {
2492         nextprow = jl[prow];
2493 
2494         /* merge prow into k-th row */
2495         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2496         jmax = ui[prow+1];
2497         ncols = jmax-jmin;
2498         i     = jmin - ui[prow];
2499         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2500         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2501         j     = *(uj - 1);
2502         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2503         nzk += nlnk;
2504 
2505         /* update il and jl for prow */
2506         if (jmin < jmax) {
2507           il[prow] = jmin;
2508           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2509         }
2510         prow = nextprow;
2511       }
2512 
2513       /* if free space is not available, make more free space */
2514       if (current_space->local_remaining<nzk) {
2515         i  = am - k + 1; /* num of unfactored rows */
2516         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2517         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2518         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2519         reallocs++;
2520       }
2521 
2522       /* copy data into free_space and free_space_lvl, then initialize lnk */
2523       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2524       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2525 
2526       /* add the k-th row into il and jl */
2527       if (nzk > 1) {
2528         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2529         jl[k] = jl[i]; jl[i] = k;
2530         il[k] = ui[k] + 1;
2531       }
2532       uj_ptr[k]     = current_space->array;
2533       uj_lvl_ptr[k] = current_space_lvl->array;
2534 
2535       current_space->array           += nzk;
2536       current_space->local_used      += nzk;
2537       current_space->local_remaining -= nzk;
2538 
2539       current_space_lvl->array           += nzk;
2540       current_space_lvl->local_used      += nzk;
2541       current_space_lvl->local_remaining -= nzk;
2542 
2543       ui[k+1] = ui[k] + nzk;
2544     }
2545 
2546     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2547     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2548     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2549     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2550 
2551     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2552     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2553     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2554     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2555     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2556 
2557   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2558 
2559   /* put together the new matrix in MATSEQSBAIJ format */
2560   b    = (Mat_SeqSBAIJ*)(fact)->data;
2561   b->singlemalloc = PETSC_FALSE;
2562   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2563   b->j    = uj;
2564   b->i    = ui;
2565   b->diag = udiag;
2566   b->free_diag = PETSC_TRUE;
2567   b->ilen = 0;
2568   b->imax = 0;
2569   b->row  = perm;
2570   b->col  = perm;
2571   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2572   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2573   b->icol = iperm;
2574   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2575   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2576   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2577   b->maxnz   = b->nz = ui[am];
2578   b->free_a  = PETSC_TRUE;
2579   b->free_ij = PETSC_TRUE;
2580 
2581   fact->info.factor_mallocs   = reallocs;
2582   fact->info.fill_ratio_given = fill;
2583   if (ai[am] != 0) {
2584     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2585     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2586   } else {
2587     fact->info.fill_ratio_needed = 0.0;
2588   }
2589 #if defined(PETSC_USE_INFO)
2590     if (ai[am] != 0) {
2591       PetscReal af = fact->info.fill_ratio_needed;
2592       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2593       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2594       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2595     } else {
2596       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2597     }
2598 #endif
2599   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2605 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2606 {
2607   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2608   Mat_SeqSBAIJ       *b;
2609   PetscErrorCode     ierr;
2610   PetscBool          perm_identity,missing;
2611   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2612   const PetscInt     *rip,*riip;
2613   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2614   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2615   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2616   PetscReal          fill=info->fill,levels=info->levels;
2617   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2618   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2619   PetscBT            lnkbt;
2620   IS                 iperm;
2621 
2622   PetscFunctionBegin;
2623   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2624   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2625   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2626   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2627   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2628 
2629   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2630   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2631   ui[0] = 0;
2632 
2633   /* ICC(0) without matrix ordering: simply copies fill pattern */
2634   if (!levels && perm_identity) {
2635 
2636     for (i=0; i<am; i++) {
2637       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2638       udiag[i] = ui[i];
2639     }
2640     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2641     cols = uj;
2642     for (i=0; i<am; i++) {
2643       aj    = a->j + a->diag[i];
2644       ncols = ui[i+1] - ui[i];
2645       for (j=0; j<ncols; j++) *cols++ = *aj++;
2646     }
2647   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2648     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2649     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2650 
2651     /* initialization */
2652     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2653 
2654     /* jl: linked list for storing indices of the pivot rows
2655        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2656     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2657     for (i=0; i<am; i++) {
2658       jl[i] = am; il[i] = 0;
2659     }
2660 
2661     /* create and initialize a linked list for storing column indices of the active row k */
2662     nlnk = am + 1;
2663     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2664 
2665     /* initial FreeSpace size is fill*(ai[am]+1) */
2666     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2667     current_space = free_space;
2668     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2669     current_space_lvl = free_space_lvl;
2670 
2671     for (k=0; k<am; k++) {  /* for each active row k */
2672       /* initialize lnk by the column indices of row rip[k] of A */
2673       nzk   = 0;
2674       ncols = ai[rip[k]+1] - ai[rip[k]];
2675       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2676       ncols_upper = 0;
2677       for (j=0; j<ncols; j++) {
2678         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2679         if (riip[i] >= k) { /* only take upper triangular entry */
2680           ajtmp[ncols_upper] = i;
2681           ncols_upper++;
2682         }
2683       }
2684       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2685       nzk += nlnk;
2686 
2687       /* update lnk by computing fill-in for each pivot row to be merged in */
2688       prow = jl[k]; /* 1st pivot row */
2689 
2690       while (prow < k) {
2691         nextprow = jl[prow];
2692 
2693         /* merge prow into k-th row */
2694         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2695         jmax = ui[prow+1];
2696         ncols = jmax-jmin;
2697         i     = jmin - ui[prow];
2698         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2699         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2700         j     = *(uj - 1);
2701         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2702         nzk += nlnk;
2703 
2704         /* update il and jl for prow */
2705         if (jmin < jmax) {
2706           il[prow] = jmin;
2707           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2708         }
2709         prow = nextprow;
2710       }
2711 
2712       /* if free space is not available, make more free space */
2713       if (current_space->local_remaining<nzk) {
2714         i = am - k + 1; /* num of unfactored rows */
2715         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2716         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2717         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2718         reallocs++;
2719       }
2720 
2721       /* copy data into free_space and free_space_lvl, then initialize lnk */
2722       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2723       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2724 
2725       /* add the k-th row into il and jl */
2726       if (nzk > 1) {
2727         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2728         jl[k] = jl[i]; jl[i] = k;
2729         il[k] = ui[k] + 1;
2730       }
2731       uj_ptr[k]     = current_space->array;
2732       uj_lvl_ptr[k] = current_space_lvl->array;
2733 
2734       current_space->array           += nzk;
2735       current_space->local_used      += nzk;
2736       current_space->local_remaining -= nzk;
2737 
2738       current_space_lvl->array           += nzk;
2739       current_space_lvl->local_used      += nzk;
2740       current_space_lvl->local_remaining -= nzk;
2741 
2742       ui[k+1] = ui[k] + nzk;
2743     }
2744 
2745 #if defined(PETSC_USE_INFO)
2746     if (ai[am] != 0) {
2747       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2748       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2749       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2750       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2751     } else {
2752       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2753     }
2754 #endif
2755 
2756     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2757     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2758     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2759     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2760 
2761     /* destroy list of free space and other temporary array(s) */
2762     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2763     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2764     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2765     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2766 
2767   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2768 
2769   /* put together the new matrix in MATSEQSBAIJ format */
2770 
2771   b    = (Mat_SeqSBAIJ*)fact->data;
2772   b->singlemalloc = PETSC_FALSE;
2773   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2774   b->j    = uj;
2775   b->i    = ui;
2776   b->diag = udiag;
2777   b->free_diag = PETSC_TRUE;
2778   b->ilen = 0;
2779   b->imax = 0;
2780   b->row  = perm;
2781   b->col  = perm;
2782   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2783   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2784   b->icol = iperm;
2785   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2786   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2787   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2788   b->maxnz   = b->nz = ui[am];
2789   b->free_a  = PETSC_TRUE;
2790   b->free_ij = PETSC_TRUE;
2791 
2792   fact->info.factor_mallocs    = reallocs;
2793   fact->info.fill_ratio_given  = fill;
2794   if (ai[am] != 0) {
2795     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2796   } else {
2797     fact->info.fill_ratio_needed = 0.0;
2798   }
2799   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2805 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2806 {
2807   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2808   Mat_SeqSBAIJ       *b;
2809   PetscErrorCode     ierr;
2810   PetscBool          perm_identity;
2811   PetscReal          fill = info->fill;
2812   const PetscInt     *rip,*riip;
2813   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2814   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2815   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2816   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2817   PetscBT            lnkbt;
2818   IS                 iperm;
2819 
2820   PetscFunctionBegin;
2821   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2822   /* check whether perm is the identity mapping */
2823   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2824   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2825   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2826   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2827 
2828   /* initialization */
2829   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2830   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2831   ui[0] = 0;
2832 
2833   /* jl: linked list for storing indices of the pivot rows
2834      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2835   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2836   for (i=0; i<am; i++) {
2837     jl[i] = am; il[i] = 0;
2838   }
2839 
2840   /* create and initialize a linked list for storing column indices of the active row k */
2841   nlnk = am + 1;
2842   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2843 
2844   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2845   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2846   current_space = free_space;
2847 
2848   for (k=0; k<am; k++) {  /* for each active row k */
2849     /* initialize lnk by the column indices of row rip[k] of A */
2850     nzk   = 0;
2851     ncols = ai[rip[k]+1] - ai[rip[k]];
2852     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2853     ncols_upper = 0;
2854     for (j=0; j<ncols; j++) {
2855       i = riip[*(aj + ai[rip[k]] + j)];
2856       if (i >= k) { /* only take upper triangular entry */
2857         cols[ncols_upper] = i;
2858         ncols_upper++;
2859       }
2860     }
2861     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2862     nzk += nlnk;
2863 
2864     /* update lnk by computing fill-in for each pivot row to be merged in */
2865     prow = jl[k]; /* 1st pivot row */
2866 
2867     while (prow < k) {
2868       nextprow = jl[prow];
2869       /* merge prow into k-th row */
2870       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2871       jmax = ui[prow+1];
2872       ncols = jmax-jmin;
2873       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2874       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2875       nzk += nlnk;
2876 
2877       /* update il and jl for prow */
2878       if (jmin < jmax) {
2879         il[prow] = jmin;
2880         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2881       }
2882       prow = nextprow;
2883     }
2884 
2885     /* if free space is not available, make more free space */
2886     if (current_space->local_remaining<nzk) {
2887       i  = am - k + 1; /* num of unfactored rows */
2888       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2889       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2890       reallocs++;
2891     }
2892 
2893     /* copy data into free space, then initialize lnk */
2894     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2895 
2896     /* add the k-th row into il and jl */
2897     if (nzk > 1) {
2898       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2899       jl[k] = jl[i]; jl[i] = k;
2900       il[k] = ui[k] + 1;
2901     }
2902     ui_ptr[k] = current_space->array;
2903     current_space->array           += nzk;
2904     current_space->local_used      += nzk;
2905     current_space->local_remaining -= nzk;
2906 
2907     ui[k+1] = ui[k] + nzk;
2908   }
2909 
2910   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2911   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2912   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2913 
2914   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2915   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2916   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2917   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2918 
2919   /* put together the new matrix in MATSEQSBAIJ format */
2920 
2921   b = (Mat_SeqSBAIJ*)fact->data;
2922   b->singlemalloc = PETSC_FALSE;
2923   b->free_a       = PETSC_TRUE;
2924   b->free_ij      = PETSC_TRUE;
2925   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2926   b->j    = uj;
2927   b->i    = ui;
2928   b->diag = udiag;
2929   b->free_diag = PETSC_TRUE;
2930   b->ilen = 0;
2931   b->imax = 0;
2932   b->row  = perm;
2933   b->col  = perm;
2934   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2935   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2936   b->icol = iperm;
2937   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2938   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2939   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2940   b->maxnz = b->nz = ui[am];
2941 
2942   fact->info.factor_mallocs    = reallocs;
2943   fact->info.fill_ratio_given  = fill;
2944   if (ai[am] != 0) {
2945     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2946     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2947   } else {
2948     fact->info.fill_ratio_needed = 0.0;
2949   }
2950 #if defined(PETSC_USE_INFO)
2951   if (ai[am] != 0) {
2952     PetscReal af = fact->info.fill_ratio_needed;
2953     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2954     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2955     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2956   } else {
2957      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2958   }
2959 #endif
2960   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 #undef __FUNCT__
2965 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2966 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2967 {
2968   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2969   Mat_SeqSBAIJ       *b;
2970   PetscErrorCode     ierr;
2971   PetscBool          perm_identity;
2972   PetscReal          fill = info->fill;
2973   const PetscInt     *rip,*riip;
2974   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2975   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2976   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2977   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2978   PetscBT            lnkbt;
2979   IS                 iperm;
2980 
2981   PetscFunctionBegin;
2982   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2983   /* check whether perm is the identity mapping */
2984   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2985   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2986   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2987   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2988 
2989   /* initialization */
2990   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2991   ui[0] = 0;
2992 
2993   /* jl: linked list for storing indices of the pivot rows
2994      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2995   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2996   for (i=0; i<am; i++) {
2997     jl[i] = am; il[i] = 0;
2998   }
2999 
3000   /* create and initialize a linked list for storing column indices of the active row k */
3001   nlnk = am + 1;
3002   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3003 
3004   /* initial FreeSpace size is fill*(ai[am]+1) */
3005   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3006   current_space = free_space;
3007 
3008   for (k=0; k<am; k++) {  /* for each active row k */
3009     /* initialize lnk by the column indices of row rip[k] of A */
3010     nzk   = 0;
3011     ncols = ai[rip[k]+1] - ai[rip[k]];
3012     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3013     ncols_upper = 0;
3014     for (j=0; j<ncols; j++) {
3015       i = riip[*(aj + ai[rip[k]] + j)];
3016       if (i >= k) { /* only take upper triangular entry */
3017         cols[ncols_upper] = i;
3018         ncols_upper++;
3019       }
3020     }
3021     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3022     nzk += nlnk;
3023 
3024     /* update lnk by computing fill-in for each pivot row to be merged in */
3025     prow = jl[k]; /* 1st pivot row */
3026 
3027     while (prow < k) {
3028       nextprow = jl[prow];
3029       /* merge prow into k-th row */
3030       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3031       jmax = ui[prow+1];
3032       ncols = jmax-jmin;
3033       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3034       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3035       nzk += nlnk;
3036 
3037       /* update il and jl for prow */
3038       if (jmin < jmax) {
3039         il[prow] = jmin;
3040         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3041       }
3042       prow = nextprow;
3043     }
3044 
3045     /* if free space is not available, make more free space */
3046     if (current_space->local_remaining<nzk) {
3047       i = am - k + 1; /* num of unfactored rows */
3048       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3049       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3050       reallocs++;
3051     }
3052 
3053     /* copy data into free space, then initialize lnk */
3054     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3055 
3056     /* add the k-th row into il and jl */
3057     if (nzk-1 > 0) {
3058       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3059       jl[k] = jl[i]; jl[i] = k;
3060       il[k] = ui[k] + 1;
3061     }
3062     ui_ptr[k] = current_space->array;
3063     current_space->array           += nzk;
3064     current_space->local_used      += nzk;
3065     current_space->local_remaining -= nzk;
3066 
3067     ui[k+1] = ui[k] + nzk;
3068   }
3069 
3070 #if defined(PETSC_USE_INFO)
3071   if (ai[am] != 0) {
3072     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3073     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3074     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3075     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3076   } else {
3077      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3078   }
3079 #endif
3080 
3081   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3082   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3083   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3084 
3085   /* destroy list of free space and other temporary array(s) */
3086   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3087   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3088   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3089 
3090   /* put together the new matrix in MATSEQSBAIJ format */
3091 
3092   b = (Mat_SeqSBAIJ*)fact->data;
3093   b->singlemalloc = PETSC_FALSE;
3094   b->free_a       = PETSC_TRUE;
3095   b->free_ij      = PETSC_TRUE;
3096   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3097   b->j    = uj;
3098   b->i    = ui;
3099   b->diag = 0;
3100   b->ilen = 0;
3101   b->imax = 0;
3102   b->row  = perm;
3103   b->col  = perm;
3104   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3105   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3106   b->icol = iperm;
3107   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3108   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3109   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3110   b->maxnz = b->nz = ui[am];
3111 
3112   fact->info.factor_mallocs    = reallocs;
3113   fact->info.fill_ratio_given  = fill;
3114   if (ai[am] != 0) {
3115     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3116   } else {
3117     fact->info.fill_ratio_needed = 0.0;
3118   }
3119   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3120   PetscFunctionReturn(0);
3121 }
3122 
3123 #undef __FUNCT__
3124 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3125 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3126 {
3127   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3128   PetscErrorCode    ierr;
3129   PetscInt          n = A->rmap->n;
3130   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3131   PetscScalar       *x,sum;
3132   const PetscScalar *b;
3133   const MatScalar   *aa = a->a,*v;
3134   PetscInt          i,nz;
3135 
3136   PetscFunctionBegin;
3137   if (!n) PetscFunctionReturn(0);
3138 
3139   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3141 
3142   /* forward solve the lower triangular */
3143   x[0] = b[0];
3144   v    = aa;
3145   vi   = aj;
3146   for (i=1; i<n; i++) {
3147     nz  = ai[i+1] - ai[i];
3148     sum = b[i];
3149     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3150     v  += nz;
3151     vi += nz;
3152     x[i] = sum;
3153   }
3154 
3155   /* backward solve the upper triangular */
3156   for (i=n-1; i>=0; i--) {
3157     v   = aa + adiag[i+1] + 1;
3158     vi  = aj + adiag[i+1] + 1;
3159     nz = adiag[i] - adiag[i+1]-1;
3160     sum = x[i];
3161     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3162     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3163   }
3164 
3165   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3166   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3167   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatSolve_SeqAIJ"
3173 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3174 {
3175   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3176   IS                iscol = a->col,isrow = a->row;
3177   PetscErrorCode    ierr;
3178   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3179   const PetscInt    *rout,*cout,*r,*c;
3180   PetscScalar       *x,*tmp,sum;
3181   const PetscScalar *b;
3182   const MatScalar   *aa = a->a,*v;
3183 
3184   PetscFunctionBegin;
3185   if (!n) PetscFunctionReturn(0);
3186 
3187   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3189   tmp  = a->solve_work;
3190 
3191   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3192   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3193 
3194   /* forward solve the lower triangular */
3195   tmp[0] = b[r[0]];
3196   v      = aa;
3197   vi     = aj;
3198   for (i=1; i<n; i++) {
3199     nz  = ai[i+1] - ai[i];
3200     sum = b[r[i]];
3201     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3202     tmp[i] = sum;
3203     v += nz; vi += nz;
3204   }
3205 
3206   /* backward solve the upper triangular */
3207   for (i=n-1; i>=0; i--) {
3208     v   = aa + adiag[i+1]+1;
3209     vi  = aj + adiag[i+1]+1;
3210     nz  = adiag[i]-adiag[i+1]-1;
3211     sum = tmp[i];
3212     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3213     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3214   }
3215 
3216   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3217   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3218   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3219   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3220   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3226 /*
3227     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3228 */
3229 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3230 {
3231   Mat                B = *fact;
3232   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3233   IS                 isicol;
3234   PetscErrorCode     ierr;
3235   const PetscInt     *r,*ic;
3236   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3237   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3238   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3239   PetscInt           nlnk,*lnk;
3240   PetscBT            lnkbt;
3241   PetscBool          row_identity,icol_identity;
3242   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3243   const PetscInt     *ics;
3244   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3245   PetscReal          dt=info->dt,shift=info->shiftamount;
3246   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3247   PetscBool          missing;
3248 
3249   PetscFunctionBegin;
3250   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3251   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3252 
3253   /* ------- symbolic factorization, can be reused ---------*/
3254   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3255   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3256   adiag=a->diag;
3257 
3258   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3259 
3260   /* bdiag is location of diagonal in factor */
3261   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3262   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3263 
3264   /* allocate row pointers bi */
3265   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3266 
3267   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3268   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3269   nnz_max  = ai[n]+2*n*dtcount+2;
3270 
3271   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3272   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3273 
3274   /* put together the new matrix */
3275   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3276   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3277   b    = (Mat_SeqAIJ*)B->data;
3278   b->free_a       = PETSC_TRUE;
3279   b->free_ij      = PETSC_TRUE;
3280   b->singlemalloc = PETSC_FALSE;
3281   b->a          = ba;
3282   b->j          = bj;
3283   b->i          = bi;
3284   b->diag       = bdiag;
3285   b->ilen       = 0;
3286   b->imax       = 0;
3287   b->row        = isrow;
3288   b->col        = iscol;
3289   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3290   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3291   b->icol       = isicol;
3292   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3293 
3294   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3295   b->maxnz = nnz_max;
3296 
3297   B->factortype            = MAT_FACTOR_ILUDT;
3298   B->info.factor_mallocs   = 0;
3299   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3300   CHKMEMQ;
3301   /* ------- end of symbolic factorization ---------*/
3302 
3303   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3304   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3305   ics  = ic;
3306 
3307   /* linked list for storing column indices of the active row */
3308   nlnk = n + 1;
3309   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3310 
3311   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3312   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3313   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3314   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3315   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3316 
3317   bi[0]    = 0;
3318   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3319   bdiag_rev[n] = bdiag[0];
3320   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3321   for (i=0; i<n; i++) {
3322     /* copy initial fill into linked list */
3323     nzi = ai[r[i]+1] - ai[r[i]];
3324     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3325     nzi_al = adiag[r[i]] - ai[r[i]];
3326     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3327     ajtmp = aj + ai[r[i]];
3328     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3329 
3330     /* load in initial (unfactored row) */
3331     aatmp = a->a + ai[r[i]];
3332     for (j=0; j<nzi; j++) {
3333       rtmp[ics[*ajtmp++]] = *aatmp++;
3334     }
3335 
3336     /* add pivot rows into linked list */
3337     row = lnk[n];
3338     while (row < i) {
3339       nzi_bl = bi[row+1] - bi[row] + 1;
3340       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3341       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3342       nzi  += nlnk;
3343       row   = lnk[row];
3344     }
3345 
3346     /* copy data from lnk into jtmp, then initialize lnk */
3347     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3348 
3349     /* numerical factorization */
3350     bjtmp = jtmp;
3351     row   = *bjtmp++; /* 1st pivot row */
3352     while (row < i) {
3353       pc         = rtmp + row;
3354       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3355       multiplier = (*pc) * (*pv);
3356       *pc        = multiplier;
3357       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3358         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3359         pv         = ba + bdiag[row+1] + 1;
3360         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3361         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3362         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3363         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3364       }
3365       row = *bjtmp++;
3366     }
3367 
3368     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3369     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3370     nzi_bl = 0; j = 0;
3371     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3372       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3373       nzi_bl++; j++;
3374     }
3375     nzi_bu = nzi - nzi_bl -1;
3376     while (j < nzi) {
3377       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3378       j++;
3379     }
3380 
3381     bjtmp = bj + bi[i];
3382     batmp = ba + bi[i];
3383     /* apply level dropping rule to L part */
3384     ncut = nzi_al + dtcount;
3385     if (ncut < nzi_bl) {
3386       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3387       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3388     } else {
3389       ncut = nzi_bl;
3390     }
3391     for (j=0; j<ncut; j++) {
3392       bjtmp[j] = jtmp[j];
3393       batmp[j] = vtmp[j];
3394       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3395     }
3396     bi[i+1] = bi[i] + ncut;
3397     nzi = ncut + 1;
3398 
3399     /* apply level dropping rule to U part */
3400     ncut = nzi_au + dtcount;
3401     if (ncut < nzi_bu) {
3402       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3403       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3404     } else {
3405       ncut = nzi_bu;
3406     }
3407     nzi += ncut;
3408 
3409     /* mark bdiagonal */
3410     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3411     bdiag_rev[n-i-1] = bdiag[i+1];
3412     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3413     bjtmp = bj + bdiag[i];
3414     batmp = ba + bdiag[i];
3415     *bjtmp = i;
3416     *batmp = diag_tmp; /* rtmp[i]; */
3417     if (*batmp == 0.0) {
3418       *batmp = dt+shift;
3419       /* printf(" row %d add shift %g\n",i,shift); */
3420     }
3421     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3422     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3423 
3424     bjtmp = bj + bdiag[i+1]+1;
3425     batmp = ba + bdiag[i+1]+1;
3426     for (k=0; k<ncut; k++) {
3427       bjtmp[k] = jtmp[nzi_bl+1+k];
3428       batmp[k] = vtmp[nzi_bl+1+k];
3429       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3430     }
3431     /* printf("\n"); */
3432 
3433     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3434     /*
3435     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3436     printf(" ----------------------------\n");
3437     */
3438   } /* for (i=0; i<n; i++) */
3439   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3440   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3441 
3442   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3443   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3444 
3445   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3446   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3447   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3448   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3449 
3450   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3451   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3452 
3453   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3454   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3455   if (row_identity && icol_identity) {
3456     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3457   } else {
3458     B->ops->solve = MatSolve_SeqAIJ;
3459   }
3460 
3461   B->ops->solveadd          = 0;
3462   B->ops->solvetranspose    = 0;
3463   B->ops->solvetransposeadd = 0;
3464   B->ops->matsolve          = 0;
3465   B->assembled              = PETSC_TRUE;
3466   B->preallocated           = PETSC_TRUE;
3467   PetscFunctionReturn(0);
3468 }
3469 
3470 /* a wraper of MatILUDTFactor_SeqAIJ() */
3471 #undef __FUNCT__
3472 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3473 /*
3474     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3475 */
3476 
3477 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3478 {
3479   PetscErrorCode     ierr;
3480 
3481   PetscFunctionBegin;
3482   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 /*
3487    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3488    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3489 */
3490 #undef __FUNCT__
3491 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3492 /*
3493     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3494 */
3495 
3496 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3497 {
3498   Mat            C=fact;
3499   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3500   IS             isrow = b->row,isicol = b->icol;
3501   PetscErrorCode ierr;
3502   const PetscInt *r,*ic,*ics;
3503   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3504   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3505   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3506   PetscReal      dt=info->dt,shift=info->shiftamount;
3507   PetscBool      row_identity, col_identity;
3508 
3509   PetscFunctionBegin;
3510   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3511   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3512   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3513   ics  = ic;
3514 
3515   for (i=0; i<n; i++) {
3516     /* initialize rtmp array */
3517     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3518     bjtmp = bj + bi[i];
3519     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3520     rtmp[i] = 0.0;
3521     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3522     bjtmp = bj + bdiag[i+1] + 1;
3523     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3524 
3525     /* load in initial unfactored row of A */
3526     /* printf("row %d\n",i); */
3527     nz    = ai[r[i]+1] - ai[r[i]];
3528     ajtmp = aj + ai[r[i]];
3529     v     = aa + ai[r[i]];
3530     for (j=0; j<nz; j++) {
3531       rtmp[ics[*ajtmp++]] = v[j];
3532       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3533     }
3534     /* printf("\n"); */
3535 
3536     /* numerical factorization */
3537     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3538     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3539     k = 0;
3540     while (k < nzl) {
3541       row   = *bjtmp++;
3542       /* printf("  prow %d\n",row); */
3543       pc         = rtmp + row;
3544       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3545       multiplier = (*pc) * (*pv);
3546       *pc        = multiplier;
3547       if (PetscAbsScalar(multiplier) > dt) {
3548         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3549         pv         = b->a + bdiag[row+1] + 1;
3550         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3551         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3552         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3553       }
3554       k++;
3555     }
3556 
3557     /* finished row so stick it into b->a */
3558     /* L-part */
3559     pv = b->a + bi[i] ;
3560     pj = bj + bi[i] ;
3561     nzl = bi[i+1] - bi[i];
3562     for (j=0; j<nzl; j++) {
3563       pv[j] = rtmp[pj[j]];
3564       /* printf(" (%d,%g),",pj[j],pv[j]); */
3565     }
3566 
3567     /* diagonal: invert diagonal entries for simplier triangular solves */
3568     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3569     b->a[bdiag[i]] = 1.0/rtmp[i];
3570     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3571 
3572     /* U-part */
3573     pv = b->a + bdiag[i+1] + 1;
3574     pj = bj + bdiag[i+1] + 1;
3575     nzu = bdiag[i] - bdiag[i+1] - 1;
3576     for (j=0; j<nzu; j++) {
3577       pv[j] = rtmp[pj[j]];
3578       /* printf(" (%d,%g),",pj[j],pv[j]); */
3579     }
3580     /* printf("\n"); */
3581   }
3582 
3583   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3584   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3585   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3586 
3587   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3588   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3589   if (row_identity && col_identity) {
3590     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3591   } else {
3592     C->ops->solve   = MatSolve_SeqAIJ;
3593   }
3594   C->ops->solveadd           = 0;
3595   C->ops->solvetranspose     = 0;
3596   C->ops->solvetransposeadd  = 0;
3597   C->ops->matsolve           = 0;
3598   C->assembled    = PETSC_TRUE;
3599   C->preallocated = PETSC_TRUE;
3600   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3601   PetscFunctionReturn(0);
3602 }
3603