xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision a6dfd86ebdf75fa024070af26d51b62fd16650a3)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
32       else              past  = PetscAbsScalar(aa[j]);
33     }
34     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
35     if (past/future > best) {
36       best = past/future;
37       current = i;
38     }
39   }
40 
41   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
42   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
43   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
44   order[0] = current;
45   for (i=0; i<n-1; i++) {
46     done[current] = PETSC_TRUE;
47     best          = -1;
48     /* loop over all neighbors of current pivot */
49     for (j=ai[current]; j<ai[current+1]; j++) {
50       jj = aj[j];
51       if (done[jj]) continue;
52       /* loop over columns of potential next row computing weights for below and above diagonal */
53       past = future = 0.0;
54       for (k=ai[jj]; k<ai[jj+1]; k++) {
55         kk = aj[k];
56         if (done[kk]) past += PetscAbsScalar(aa[k]);
57         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
58       }
59       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
60       if (past/future > best) {
61         best = past/future;
62         newcurrent = jj;
63       }
64     }
65     if (best == -1) { /* no neighbors to select from so select best of all that remain */
66       best = -1;
67       for (k=0; k<n; k++) {
68         if (done[k]) continue;
69         future = 0.0;
70         past   = 0.0;
71         for (j=ai[k]; j<ai[k+1]; j++) {
72           kk = aj[j];
73           if (done[kk])       past += PetscAbsScalar(aa[j]);
74           else if (kk != k) future += PetscAbsScalar(aa[j]);
75         }
76         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
77         if (past/future > best) {
78           best = past/future;
79           newcurrent = k;
80         }
81       }
82     }
83     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
84     current = newcurrent;
85     order[i+1] = current;
86   }
87   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
88   *icol = *irow;
89   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
90   ierr = PetscFree(done);CHKERRQ(ierr);
91   ierr = PetscFree(order);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 EXTERN_C_END
95 
96 EXTERN_C_BEGIN
97 #undef __FUNCT__
98 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
99 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
100 {
101   PetscFunctionBegin;
102   *flg = PETSC_TRUE;
103   PetscFunctionReturn(0);
104 }
105 EXTERN_C_END
106 
107 EXTERN_C_BEGIN
108 #undef __FUNCT__
109 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
110 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
111 {
112   PetscInt           n = A->rmap->n;
113   PetscErrorCode     ierr;
114 
115   PetscFunctionBegin;
116 #if defined(PETSC_USE_COMPLEX)
117   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
118 #endif
119   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
120   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
121   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
122     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
123     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
124     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
125     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
126   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
127     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
128     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
129     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
130     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
131   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
132   (*B)->factortype = ftype;
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
139 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
140 {
141   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
142   IS                 isicol;
143   PetscErrorCode     ierr;
144   const PetscInt     *r,*ic;
145   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
146   PetscInt           *bi,*bj,*ajtmp;
147   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
148   PetscReal          f;
149   PetscInt           nlnk,*lnk,k,**bi_ptr;
150   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
151   PetscBT            lnkbt;
152 
153   PetscFunctionBegin;
154   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
155   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
156   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
157   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
158 
159   /* get new row pointers */
160   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
161   bi[0] = 0;
162 
163   /* bdiag is location of diagonal in factor */
164   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
165   bdiag[0] = 0;
166 
167   /* linked list for storing column indices of the active row */
168   nlnk = n + 1;
169   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
170 
171   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
172 
173   /* initial FreeSpace size is f*(ai[n]+1) */
174   f = info->fill;
175   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
176   current_space = free_space;
177 
178   for (i=0; i<n; i++) {
179     /* copy previous fill into linked list */
180     nzi = 0;
181     nnz = ai[r[i]+1] - ai[r[i]];
182     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
183     ajtmp = aj + ai[r[i]];
184     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
185     nzi += nlnk;
186 
187     /* add pivot rows into linked list */
188     row = lnk[n];
189     while (row < i) {
190       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
191       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
192       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
193       nzi += nlnk;
194       row  = lnk[row];
195     }
196     bi[i+1] = bi[i] + nzi;
197     im[i]   = nzi;
198 
199     /* mark bdiag */
200     nzbd = 0;
201     nnz  = nzi;
202     k    = lnk[n];
203     while (nnz-- && k < i) {
204       nzbd++;
205       k = lnk[k];
206     }
207     bdiag[i] = bi[i] + nzbd;
208 
209     /* if free space is not available, make more free space */
210     if (current_space->local_remaining<nzi) {
211       nnz = (n - i)*nzi; /* estimated and max additional space needed */
212       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
213       reallocs++;
214     }
215 
216     /* copy data into free space, then initialize lnk */
217     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
218     bi_ptr[i] = current_space->array;
219     current_space->array           += nzi;
220     current_space->local_used      += nzi;
221     current_space->local_remaining -= nzi;
222   }
223 #if defined(PETSC_USE_INFO)
224   if (ai[n] != 0) {
225     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
226     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
227     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
228     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
229     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
230   } else {
231     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
232   }
233 #endif
234 
235   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
237 
238   /* destroy list of free space and other temporary array(s) */
239   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
240   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
241   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
242   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
243 
244   /* put together the new matrix */
245   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
246   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
247   b    = (Mat_SeqAIJ*)(B)->data;
248   b->free_a       = PETSC_TRUE;
249   b->free_ij      = PETSC_TRUE;
250   b->singlemalloc = PETSC_FALSE;
251   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
252   b->j          = bj;
253   b->i          = bi;
254   b->diag       = bdiag;
255   b->ilen       = 0;
256   b->imax       = 0;
257   b->row        = isrow;
258   b->col        = iscol;
259   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
260   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
261   b->icol       = isicol;
262   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
263 
264   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
265   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
266   b->maxnz = b->nz = bi[n] ;
267 
268   (B)->factortype            = MAT_FACTOR_LU;
269   (B)->info.factor_mallocs   = reallocs;
270   (B)->info.fill_ratio_given = f;
271 
272   if (ai[n]) {
273     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
274   } else {
275     (B)->info.fill_ratio_needed = 0.0;
276   }
277   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
278   if (a->inode.size) {
279     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
287 {
288   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
289   IS                 isicol;
290   PetscErrorCode     ierr;
291   const PetscInt     *r,*ic;
292   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
293   PetscInt           *bi,*bj,*ajtmp;
294   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295   PetscReal          f;
296   PetscInt           nlnk,*lnk,k,**bi_ptr;
297   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
298   PetscBT            lnkbt;
299 
300   PetscFunctionBegin;
301   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
302   /*
303   PetscBool          olddatastruct=PETSC_FALSE;
304   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
305   if (olddatastruct) {
306     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
307     PetscFunctionReturn(0);
308   }
309   */
310   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
311   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
312   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
313   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
314 
315   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
317   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
318   bi[0] = bdiag[0] = 0;
319 
320   /* linked list for storing column indices of the active row */
321   nlnk = n + 1;
322   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
323 
324   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
325 
326   /* initial FreeSpace size is f*(ai[n]+1) */
327   f = info->fill;
328   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
329   current_space = free_space;
330 
331   for (i=0; i<n; i++) {
332     /* copy previous fill into linked list */
333     nzi = 0;
334     nnz = ai[r[i]+1] - ai[r[i]];
335     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
336     ajtmp = aj + ai[r[i]];
337     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
338     nzi += nlnk;
339 
340     /* add pivot rows into linked list */
341     row = lnk[n];
342     while (row < i) {
343       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
346       nzi  += nlnk;
347       row   = lnk[row];
348     }
349     bi[i+1] = bi[i] + nzi;
350     im[i]   = nzi;
351 
352     /* mark bdiag */
353     nzbd = 0;
354     nnz  = nzi;
355     k    = lnk[n];
356     while (nnz-- && k < i) {
357       nzbd++;
358       k = lnk[k];
359     }
360     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361 
362     /* if free space is not available, make more free space */
363     if (current_space->local_remaining<nzi) {
364       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
365       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
366       reallocs++;
367     }
368 
369     /* copy data into free space, then initialize lnk */
370     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
371     bi_ptr[i] = current_space->array;
372     current_space->array           += nzi;
373     current_space->local_used      += nzi;
374     current_space->local_remaining -= nzi;
375   }
376 
377   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
378   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
379 
380   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
381   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
382   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
383   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
384   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
385 
386   /* put together the new matrix */
387   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
388   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
389   b    = (Mat_SeqAIJ*)(B)->data;
390   b->free_a       = PETSC_TRUE;
391   b->free_ij      = PETSC_TRUE;
392   b->singlemalloc = PETSC_FALSE;
393   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
394   b->j          = bj;
395   b->i          = bi;
396   b->diag       = bdiag;
397   b->ilen       = 0;
398   b->imax       = 0;
399   b->row        = isrow;
400   b->col        = iscol;
401   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
402   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
403   b->icol       = isicol;
404   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
405 
406   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
407   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
408   b->maxnz = b->nz = bdiag[0]+1;
409   B->factortype            = MAT_FACTOR_LU;
410   B->info.factor_mallocs   = reallocs;
411   B->info.fill_ratio_given = f;
412 
413   if (ai[n]) {
414     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
415   } else {
416     B->info.fill_ratio_needed = 0.0;
417   }
418 #if defined(PETSC_USE_INFO)
419   if (ai[n] != 0) {
420     PetscReal af = B->info.fill_ratio_needed;
421     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
422     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
423     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
424     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
425   } else {
426     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
427   }
428 #endif
429   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
430   if (a->inode.size) {
431     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 /*
437     Trouble in factorization, should we dump the original matrix?
438 */
439 #undef __FUNCT__
440 #define __FUNCT__ "MatFactorDumpMatrix"
441 PetscErrorCode MatFactorDumpMatrix(Mat A)
442 {
443   PetscErrorCode ierr;
444   PetscBool      flg = PETSC_FALSE;
445 
446   PetscFunctionBegin;
447   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
448   if (flg) {
449     PetscViewer viewer;
450     char        filename[PETSC_MAX_PATH_LEN];
451 
452     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
453     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
454     ierr = MatView(A,viewer);CHKERRQ(ierr);
455     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
456   }
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
462 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
463 {
464   Mat              C=B;
465   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
466   IS               isrow = b->row,isicol = b->icol;
467   PetscErrorCode   ierr;
468   const PetscInt   *r,*ic,*ics;
469   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
470   PetscInt         i,j,k,nz,nzL,row,*pj;
471   const PetscInt   *ajtmp,*bjtmp;
472   MatScalar        *rtmp,*pc,multiplier,*pv;
473   const  MatScalar *aa=a->a,*v;
474   PetscBool        row_identity,col_identity;
475   FactorShiftCtx   sctx;
476   const PetscInt   *ddiag;
477   PetscReal        rs;
478   MatScalar        d;
479 
480   PetscFunctionBegin;
481   /* MatPivotSetUp(): initialize shift context sctx */
482   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
483 
484   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
485     ddiag          = a->diag;
486     sctx.shift_top = info->zeropivot;
487     for (i=0; i<n; i++) {
488       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
489       d  = (aa)[ddiag[i]];
490       rs = -PetscAbsScalar(d) - PetscRealPart(d);
491       v  = aa+ai[i];
492       nz = ai[i+1] - ai[i];
493       for (j=0; j<nz; j++)
494         rs += PetscAbsScalar(v[j]);
495       if (rs>sctx.shift_top) sctx.shift_top = rs;
496     }
497     sctx.shift_top   *= 1.1;
498     sctx.nshift_max   = 5;
499     sctx.shift_lo     = 0.;
500     sctx.shift_hi     = 1.;
501   }
502 
503   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
504   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
505   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
506   ics  = ic;
507 
508   do {
509     sctx.newshift = PETSC_FALSE;
510     for (i=0; i<n; i++) {
511       /* zero rtmp */
512       /* L part */
513       nz    = bi[i+1] - bi[i];
514       bjtmp = bj + bi[i];
515       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516 
517       /* U part */
518       nz = bdiag[i]-bdiag[i+1];
519       bjtmp = bj + bdiag[i+1]+1;
520       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
521 
522       /* load in initial (unfactored row) */
523       nz    = ai[r[i]+1] - ai[r[i]];
524       ajtmp = aj + ai[r[i]];
525       v     = aa + ai[r[i]];
526       for (j=0; j<nz; j++) {
527         rtmp[ics[ajtmp[j]]] = v[j];
528       }
529       /* ZeropivotApply() */
530       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
531 
532       /* elimination */
533       bjtmp = bj + bi[i];
534       row   = *bjtmp++;
535       nzL   = bi[i+1] - bi[i];
536       for (k=0; k < nzL;k++) {
537         pc = rtmp + row;
538         if (*pc != 0.0) {
539           pv         = b->a + bdiag[row];
540           multiplier = *pc * (*pv);
541           *pc        = multiplier;
542           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
543           pv = b->a + bdiag[row+1]+1;
544           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
545           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
546           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
547         }
548         row = *bjtmp++;
549       }
550 
551       /* finished row so stick it into b->a */
552       rs = 0.0;
553       /* L part */
554       pv   = b->a + bi[i] ;
555       pj   = b->j + bi[i] ;
556       nz   = bi[i+1] - bi[i];
557       for (j=0; j<nz; j++) {
558         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
559       }
560 
561       /* U part */
562       pv = b->a + bdiag[i+1]+1;
563       pj = b->j + bdiag[i+1]+1;
564       nz = bdiag[i] - bdiag[i+1]-1;
565       for (j=0; j<nz; j++) {
566         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
567       }
568 
569       sctx.rs  = rs;
570       sctx.pv  = rtmp[i];
571       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
572       if (sctx.newshift) break; /* break for-loop */
573       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
574 
575       /* Mark diagonal and invert diagonal for simplier triangular solves */
576       pv  = b->a + bdiag[i];
577       *pv = 1.0/rtmp[i];
578 
579     } /* endof for (i=0; i<n; i++) { */
580 
581     /* MatPivotRefine() */
582     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
583       /*
584        * if no shift in this attempt & shifting & started shifting & can refine,
585        * then try lower shift
586        */
587       sctx.shift_hi       = sctx.shift_fraction;
588       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
589       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
590       sctx.newshift       = PETSC_TRUE;
591       sctx.nshift++;
592     }
593   } while (sctx.newshift);
594 
595   ierr = PetscFree(rtmp);CHKERRQ(ierr);
596   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
597   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
598 
599   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
600   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
601   if (row_identity && col_identity) {
602     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603   } else {
604     C->ops->solve = MatSolve_SeqAIJ;
605   }
606   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
607   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
608   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
609   C->ops->matsolve           = MatMatSolve_SeqAIJ;
610   C->assembled    = PETSC_TRUE;
611   C->preallocated = PETSC_TRUE;
612   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
613 
614   /* MatShiftView(A,info,&sctx) */
615   if (sctx.nshift) {
616     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
618     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
622     }
623   }
624   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
625   PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
630 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
631 {
632   Mat             C=B;
633   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
634   IS              isrow = b->row,isicol = b->icol;
635   PetscErrorCode  ierr;
636   const PetscInt   *r,*ic,*ics;
637   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
638   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
639   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
640   MatScalar       *pv,*rtmp,*pc,multiplier,d;
641   const MatScalar *v,*aa=a->a;
642   PetscReal       rs=0.0;
643   FactorShiftCtx  sctx;
644   const PetscInt  *ddiag;
645   PetscBool       row_identity, col_identity;
646 
647   PetscFunctionBegin;
648   /* MatPivotSetUp(): initialize shift context sctx */
649   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
650 
651   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
652     ddiag          = a->diag;
653     sctx.shift_top = info->zeropivot;
654     for (i=0; i<n; i++) {
655       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
656       d  = (aa)[ddiag[i]];
657       rs = -PetscAbsScalar(d) - PetscRealPart(d);
658       v  = aa+ai[i];
659       nz = ai[i+1] - ai[i];
660       for (j=0; j<nz; j++)
661         rs += PetscAbsScalar(v[j]);
662       if (rs>sctx.shift_top) sctx.shift_top = rs;
663     }
664     sctx.shift_top   *= 1.1;
665     sctx.nshift_max   = 5;
666     sctx.shift_lo     = 0.;
667     sctx.shift_hi     = 1.;
668   }
669 
670   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
671   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
672   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
673   ics  = ic;
674 
675   do {
676     sctx.newshift = PETSC_FALSE;
677     for (i=0; i<n; i++) {
678       nz    = bi[i+1] - bi[i];
679       bjtmp = bj + bi[i];
680       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
681 
682       /* load in initial (unfactored row) */
683       nz    = ai[r[i]+1] - ai[r[i]];
684       ajtmp = aj + ai[r[i]];
685       v     = aa + ai[r[i]];
686       for (j=0; j<nz; j++) {
687         rtmp[ics[ajtmp[j]]] = v[j];
688       }
689       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690 
691       row = *bjtmp++;
692       while  (row < i) {
693         pc = rtmp + row;
694         if (*pc != 0.0) {
695           pv         = b->a + diag_offset[row];
696           pj         = b->j + diag_offset[row] + 1;
697           multiplier = *pc / *pv++;
698           *pc        = multiplier;
699           nz         = bi[row+1] - diag_offset[row] - 1;
700           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
702         }
703         row = *bjtmp++;
704       }
705       /* finished row so stick it into b->a */
706       pv   = b->a + bi[i] ;
707       pj   = b->j + bi[i] ;
708       nz   = bi[i+1] - bi[i];
709       diag = diag_offset[i] - bi[i];
710       rs   = 0.0;
711       for (j=0; j<nz; j++) {
712         pv[j] = rtmp[pj[j]];
713         rs   += PetscAbsScalar(pv[j]);
714       }
715       rs   -= PetscAbsScalar(pv[diag]);
716 
717       sctx.rs  = rs;
718       sctx.pv  = pv[diag];
719       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
720       if (sctx.newshift) break;
721       pv[diag] = sctx.pv;
722     }
723 
724     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
725       /*
726        * if no shift in this attempt & shifting & started shifting & can refine,
727        * then try lower shift
728        */
729       sctx.shift_hi       = sctx.shift_fraction;
730       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
731       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
732       sctx.newshift       = PETSC_TRUE;
733       sctx.nshift++;
734     }
735   } while (sctx.newshift);
736 
737   /* invert diagonal entries for simplier triangular solves */
738   for (i=0; i<n; i++) {
739     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
740   }
741   ierr = PetscFree(rtmp);CHKERRQ(ierr);
742   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
743   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
744 
745   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
746   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
747   if (row_identity && col_identity) {
748     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749   } else {
750     C->ops->solve   = MatSolve_SeqAIJ_inplace;
751   }
752   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
753   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
754   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
755   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
756   C->assembled    = PETSC_TRUE;
757   C->preallocated = PETSC_TRUE;
758   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
759   if (sctx.nshift) {
760      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
761       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
762     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
763       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
764     }
765   }
766   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
767   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
768   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 /*
773    This routine implements inplace ILU(0) with row or/and column permutations.
774    Input:
775      A - original matrix
776    Output;
777      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
778          a->j (col index) is permuted by the inverse of colperm, then sorted
779          a->a reordered accordingly with a->j
780          a->diag (ptr to diagonal elements) is updated.
781 */
782 #undef __FUNCT__
783 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
784 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
785 {
786   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
787   IS             isrow = a->row,isicol = a->icol;
788   PetscErrorCode ierr;
789   const PetscInt *r,*ic,*ics;
790   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
791   PetscInt       *ajtmp,nz,row;
792   PetscInt       *diag = a->diag,nbdiag,*pj;
793   PetscScalar    *rtmp,*pc,multiplier,d;
794   MatScalar      *pv,*v;
795   PetscReal      rs;
796   FactorShiftCtx sctx;
797   const  MatScalar *aa=a->a,*vtmp;
798 
799   PetscFunctionBegin;
800   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
801 
802   /* MatPivotSetUp(): initialize shift context sctx */
803   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
804 
805   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
806     const PetscInt   *ddiag = a->diag;
807     sctx.shift_top = info->zeropivot;
808     for (i=0; i<n; i++) {
809       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
810       d  = (aa)[ddiag[i]];
811       rs = -PetscAbsScalar(d) - PetscRealPart(d);
812       vtmp  = aa+ai[i];
813       nz = ai[i+1] - ai[i];
814       for (j=0; j<nz; j++)
815         rs += PetscAbsScalar(vtmp[j]);
816       if (rs>sctx.shift_top) sctx.shift_top = rs;
817     }
818     sctx.shift_top   *= 1.1;
819     sctx.nshift_max   = 5;
820     sctx.shift_lo     = 0.;
821     sctx.shift_hi     = 1.;
822   }
823 
824   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
825   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
826   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
827   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
828   ics = ic;
829 
830 #if defined(MV)
831   sctx.shift_top      = 0.;
832   sctx.nshift_max     = 0;
833   sctx.shift_lo       = 0.;
834   sctx.shift_hi       = 0.;
835   sctx.shift_fraction = 0.;
836 
837   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
838     sctx.shift_top = 0.;
839     for (i=0; i<n; i++) {
840       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
841       d  = (a->a)[diag[i]];
842       rs = -PetscAbsScalar(d) - PetscRealPart(d);
843       v  = a->a+ai[i];
844       nz = ai[i+1] - ai[i];
845       for (j=0; j<nz; j++)
846         rs += PetscAbsScalar(v[j]);
847       if (rs>sctx.shift_top) sctx.shift_top = rs;
848     }
849     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
850     sctx.shift_top    *= 1.1;
851     sctx.nshift_max   = 5;
852     sctx.shift_lo     = 0.;
853     sctx.shift_hi     = 1.;
854   }
855 
856   sctx.shift_amount = 0.;
857   sctx.nshift       = 0;
858 #endif
859 
860   do {
861     sctx.newshift = PETSC_FALSE;
862     for (i=0; i<n; i++) {
863       /* load in initial unfactored row */
864       nz    = ai[r[i]+1] - ai[r[i]];
865       ajtmp = aj + ai[r[i]];
866       v     = a->a + ai[r[i]];
867       /* sort permuted ajtmp and values v accordingly */
868       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
869       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
870 
871       diag[r[i]] = ai[r[i]];
872       for (j=0; j<nz; j++) {
873         rtmp[ajtmp[j]] = v[j];
874         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
875       }
876       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
877 
878       row = *ajtmp++;
879       while  (row < i) {
880         pc = rtmp + row;
881         if (*pc != 0.0) {
882           pv         = a->a + diag[r[row]];
883           pj         = aj + diag[r[row]] + 1;
884 
885           multiplier = *pc / *pv++;
886           *pc        = multiplier;
887           nz         = ai[r[row]+1] - diag[r[row]] - 1;
888           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
889           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
890         }
891         row = *ajtmp++;
892       }
893       /* finished row so overwrite it onto a->a */
894       pv   = a->a + ai[r[i]] ;
895       pj   = aj + ai[r[i]] ;
896       nz   = ai[r[i]+1] - ai[r[i]];
897       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
898 
899       rs   = 0.0;
900       for (j=0; j<nz; j++) {
901         pv[j] = rtmp[pj[j]];
902         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
903       }
904 
905       sctx.rs  = rs;
906       sctx.pv  = pv[nbdiag];
907       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
908       if (sctx.newshift) break;
909       pv[nbdiag] = sctx.pv;
910     }
911 
912     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
913       /*
914        * if no shift in this attempt & shifting & started shifting & can refine,
915        * then try lower shift
916        */
917       sctx.shift_hi        = sctx.shift_fraction;
918       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
919       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
920       sctx.newshift         = PETSC_TRUE;
921       sctx.nshift++;
922     }
923   } while (sctx.newshift);
924 
925   /* invert diagonal entries for simplier triangular solves */
926   for (i=0; i<n; i++) {
927     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
928   }
929 
930   ierr = PetscFree(rtmp);CHKERRQ(ierr);
931   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
932   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
933   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
934   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
935   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
936   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
937   A->assembled = PETSC_TRUE;
938   A->preallocated = PETSC_TRUE;
939   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
940   if (sctx.nshift) {
941     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
942       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
943     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
944       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
945     }
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 /* ----------------------------------------------------------- */
951 #undef __FUNCT__
952 #define __FUNCT__ "MatLUFactor_SeqAIJ"
953 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
954 {
955   PetscErrorCode ierr;
956   Mat            C;
957 
958   PetscFunctionBegin;
959   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
960   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
961   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
962   A->ops->solve            = C->ops->solve;
963   A->ops->solvetranspose   = C->ops->solvetranspose;
964   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
965   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 /* ----------------------------------------------------------- */
969 
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
973 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
974 {
975   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
976   IS                iscol = a->col,isrow = a->row;
977   PetscErrorCode    ierr;
978   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
979   PetscInt          nz;
980   const PetscInt    *rout,*cout,*r,*c;
981   PetscScalar       *x,*tmp,*tmps,sum;
982   const PetscScalar *b;
983   const MatScalar   *aa = a->a,*v;
984 
985   PetscFunctionBegin;
986   if (!n) PetscFunctionReturn(0);
987 
988   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
989   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
990   tmp  = a->solve_work;
991 
992   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
993   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
994 
995   /* forward solve the lower triangular */
996   tmp[0] = b[*r++];
997   tmps   = tmp;
998   for (i=1; i<n; i++) {
999     v   = aa + ai[i] ;
1000     vi  = aj + ai[i] ;
1001     nz  = a->diag[i] - ai[i];
1002     sum = b[*r++];
1003     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1004     tmp[i] = sum;
1005   }
1006 
1007   /* backward solve the upper triangular */
1008   for (i=n-1; i>=0; i--) {
1009     v   = aa + a->diag[i] + 1;
1010     vi  = aj + a->diag[i] + 1;
1011     nz  = ai[i+1] - a->diag[i] - 1;
1012     sum = tmp[i];
1013     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1014     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1015   }
1016 
1017   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1018   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1019   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1020   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1021   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1027 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1028 {
1029   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1030   IS              iscol = a->col,isrow = a->row;
1031   PetscErrorCode  ierr;
1032   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1033   PetscInt        nz,neq;
1034   const PetscInt  *rout,*cout,*r,*c;
1035   PetscScalar     *x,*b,*tmp,*tmps,sum;
1036   const MatScalar *aa = a->a,*v;
1037   PetscBool       bisdense,xisdense;
1038 
1039   PetscFunctionBegin;
1040   if (!n) PetscFunctionReturn(0);
1041 
1042   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1043   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1044   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1045   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1046 
1047   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1048   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1049 
1050   tmp  = a->solve_work;
1051   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1052   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1053 
1054   for (neq=0; neq<B->cmap->n; neq++) {
1055     /* forward solve the lower triangular */
1056     tmp[0] = b[r[0]];
1057     tmps   = tmp;
1058     for (i=1; i<n; i++) {
1059       v   = aa + ai[i] ;
1060       vi  = aj + ai[i] ;
1061       nz  = a->diag[i] - ai[i];
1062       sum = b[r[i]];
1063       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1064       tmp[i] = sum;
1065     }
1066     /* backward solve the upper triangular */
1067     for (i=n-1; i>=0; i--) {
1068       v   = aa + a->diag[i] + 1;
1069       vi  = aj + a->diag[i] + 1;
1070       nz  = ai[i+1] - a->diag[i] - 1;
1071       sum = tmp[i];
1072       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1073       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1074     }
1075 
1076     b += n;
1077     x += n;
1078   }
1079   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1080   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1081   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1082   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1083   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1089 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1090 {
1091   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1092   IS              iscol = a->col,isrow = a->row;
1093   PetscErrorCode  ierr;
1094   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1095   PetscInt        nz,neq;
1096   const PetscInt  *rout,*cout,*r,*c;
1097   PetscScalar     *x,*b,*tmp,sum;
1098   const MatScalar *aa = a->a,*v;
1099   PetscBool       bisdense,xisdense;
1100 
1101   PetscFunctionBegin;
1102   if (!n) PetscFunctionReturn(0);
1103 
1104   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1105   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1106   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1107   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1108 
1109   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1110   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1111 
1112   tmp  = a->solve_work;
1113   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1114   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1115 
1116   for (neq=0; neq<B->cmap->n; neq++) {
1117     /* forward solve the lower triangular */
1118     tmp[0] = b[r[0]];
1119     v      = aa;
1120     vi     = aj;
1121     for (i=1; i<n; i++) {
1122       nz  = ai[i+1] - ai[i];
1123       sum = b[r[i]];
1124       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1125       tmp[i] = sum;
1126       v += nz; vi += nz;
1127     }
1128 
1129     /* backward solve the upper triangular */
1130     for (i=n-1; i>=0; i--) {
1131       v   = aa + adiag[i+1]+1;
1132       vi  = aj + adiag[i+1]+1;
1133       nz  = adiag[i]-adiag[i+1]-1;
1134       sum = tmp[i];
1135       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1136       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1137     }
1138 
1139     b += n;
1140     x += n;
1141   }
1142   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1143   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1144   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1145   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1146   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1152 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1153 {
1154   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1155   IS              iscol = a->col,isrow = a->row;
1156   PetscErrorCode  ierr;
1157   const PetscInt  *r,*c,*rout,*cout;
1158   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1159   PetscInt        nz,row;
1160   PetscScalar     *x,*b,*tmp,*tmps,sum;
1161   const MatScalar *aa = a->a,*v;
1162 
1163   PetscFunctionBegin;
1164   if (!n) PetscFunctionReturn(0);
1165 
1166   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1168   tmp  = a->solve_work;
1169 
1170   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1171   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1172 
1173   /* forward solve the lower triangular */
1174   tmp[0] = b[*r++];
1175   tmps   = tmp;
1176   for (row=1; row<n; row++) {
1177     i   = rout[row]; /* permuted row */
1178     v   = aa + ai[i] ;
1179     vi  = aj + ai[i] ;
1180     nz  = a->diag[i] - ai[i];
1181     sum = b[*r++];
1182     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1183     tmp[row] = sum;
1184   }
1185 
1186   /* backward solve the upper triangular */
1187   for (row=n-1; row>=0; row--) {
1188     i   = rout[row]; /* permuted row */
1189     v   = aa + a->diag[i] + 1;
1190     vi  = aj + a->diag[i] + 1;
1191     nz  = ai[i+1] - a->diag[i] - 1;
1192     sum = tmp[row];
1193     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1194     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1195   }
1196 
1197   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1198   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1201   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 /* ----------------------------------------------------------- */
1206 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1209 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1210 {
1211   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1212   PetscErrorCode    ierr;
1213   PetscInt          n = A->rmap->n;
1214   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1215   PetscScalar       *x;
1216   const PetscScalar *b;
1217   const MatScalar   *aa = a->a;
1218 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1219   PetscInt          adiag_i,i,nz,ai_i;
1220   const PetscInt    *vi;
1221   const MatScalar   *v;
1222   PetscScalar       sum;
1223 #endif
1224 
1225   PetscFunctionBegin;
1226   if (!n) PetscFunctionReturn(0);
1227 
1228   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1229   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1230 
1231 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1232   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1233 #else
1234   /* forward solve the lower triangular */
1235   x[0] = b[0];
1236   for (i=1; i<n; i++) {
1237     ai_i = ai[i];
1238     v    = aa + ai_i;
1239     vi   = aj + ai_i;
1240     nz   = adiag[i] - ai_i;
1241     sum  = b[i];
1242     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1243     x[i] = sum;
1244   }
1245 
1246   /* backward solve the upper triangular */
1247   for (i=n-1; i>=0; i--) {
1248     adiag_i = adiag[i];
1249     v       = aa + adiag_i + 1;
1250     vi      = aj + adiag_i + 1;
1251     nz      = ai[i+1] - adiag_i - 1;
1252     sum     = x[i];
1253     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1254     x[i]    = sum*aa[adiag_i];
1255   }
1256 #endif
1257   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1258   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1259   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1265 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1266 {
1267   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1268   IS                iscol = a->col,isrow = a->row;
1269   PetscErrorCode    ierr;
1270   PetscInt          i, n = A->rmap->n,j;
1271   PetscInt          nz;
1272   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1273   PetscScalar       *x,*tmp,sum;
1274   const PetscScalar *b;
1275   const MatScalar   *aa = a->a,*v;
1276 
1277   PetscFunctionBegin;
1278   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1279 
1280   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1281   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1282   tmp  = a->solve_work;
1283 
1284   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1285   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1286 
1287   /* forward solve the lower triangular */
1288   tmp[0] = b[*r++];
1289   for (i=1; i<n; i++) {
1290     v   = aa + ai[i] ;
1291     vi  = aj + ai[i] ;
1292     nz  = a->diag[i] - ai[i];
1293     sum = b[*r++];
1294     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1295     tmp[i] = sum;
1296   }
1297 
1298   /* backward solve the upper triangular */
1299   for (i=n-1; i>=0; i--) {
1300     v   = aa + a->diag[i] + 1;
1301     vi  = aj + a->diag[i] + 1;
1302     nz  = ai[i+1] - a->diag[i] - 1;
1303     sum = tmp[i];
1304     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1305     tmp[i] = sum*aa[a->diag[i]];
1306     x[*c--] += tmp[i];
1307   }
1308 
1309   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1310   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1311   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1312   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1313   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1314 
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1320 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1321 {
1322   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1323   IS                iscol = a->col,isrow = a->row;
1324   PetscErrorCode    ierr;
1325   PetscInt          i, n = A->rmap->n,j;
1326   PetscInt          nz;
1327   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1328   PetscScalar       *x,*tmp,sum;
1329   const PetscScalar *b;
1330   const MatScalar   *aa = a->a,*v;
1331 
1332   PetscFunctionBegin;
1333   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1334 
1335   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1336   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1337   tmp  = a->solve_work;
1338 
1339   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1340   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1341 
1342   /* forward solve the lower triangular */
1343   tmp[0] = b[r[0]];
1344   v      = aa;
1345   vi     = aj;
1346   for (i=1; i<n; i++) {
1347     nz  = ai[i+1] - ai[i];
1348     sum = b[r[i]];
1349     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1350     tmp[i] = sum;
1351     v += nz; vi += nz;
1352   }
1353 
1354   /* backward solve the upper triangular */
1355   v  = aa + adiag[n-1];
1356   vi = aj + adiag[n-1];
1357   for (i=n-1; i>=0; i--) {
1358     nz  = adiag[i] - adiag[i+1] - 1;
1359     sum = tmp[i];
1360     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1361     tmp[i] = sum*v[nz];
1362     x[c[i]] += tmp[i];
1363     v += nz+1; vi += nz+1;
1364   }
1365 
1366   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1367   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1368   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1369   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1370   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1371 
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1377 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1378 {
1379   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1380   IS                iscol = a->col,isrow = a->row;
1381   PetscErrorCode    ierr;
1382   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1383   PetscInt          i,n = A->rmap->n,j;
1384   PetscInt          nz;
1385   PetscScalar       *x,*tmp,s1;
1386   const MatScalar   *aa = a->a,*v;
1387   const PetscScalar *b;
1388 
1389   PetscFunctionBegin;
1390   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1391   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1392   tmp  = a->solve_work;
1393 
1394   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1395   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1396 
1397   /* copy the b into temp work space according to permutation */
1398   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1399 
1400   /* forward solve the U^T */
1401   for (i=0; i<n; i++) {
1402     v   = aa + diag[i] ;
1403     vi  = aj + diag[i] + 1;
1404     nz  = ai[i+1] - diag[i] - 1;
1405     s1  = tmp[i];
1406     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1407     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1408     tmp[i] = s1;
1409   }
1410 
1411   /* backward solve the L^T */
1412   for (i=n-1; i>=0; i--) {
1413     v   = aa + diag[i] - 1 ;
1414     vi  = aj + diag[i] - 1 ;
1415     nz  = diag[i] - ai[i];
1416     s1  = tmp[i];
1417     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1418   }
1419 
1420   /* copy tmp into x according to permutation */
1421   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1422 
1423   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1424   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1425   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1426   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1427 
1428   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1434 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1435 {
1436   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1437   IS                iscol = a->col,isrow = a->row;
1438   PetscErrorCode    ierr;
1439   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1440   PetscInt          i,n = A->rmap->n,j;
1441   PetscInt          nz;
1442   PetscScalar       *x,*tmp,s1;
1443   const MatScalar   *aa = a->a,*v;
1444   const PetscScalar *b;
1445 
1446   PetscFunctionBegin;
1447   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1448   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1449   tmp  = a->solve_work;
1450 
1451   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1452   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1453 
1454   /* copy the b into temp work space according to permutation */
1455   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1456 
1457   /* forward solve the U^T */
1458   for (i=0; i<n; i++) {
1459     v   = aa + adiag[i+1] + 1;
1460     vi  = aj + adiag[i+1] + 1;
1461     nz  = adiag[i] - adiag[i+1] - 1;
1462     s1  = tmp[i];
1463     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1464     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1465     tmp[i] = s1;
1466   }
1467 
1468   /* backward solve the L^T */
1469   for (i=n-1; i>=0; i--) {
1470     v   = aa + ai[i];
1471     vi  = aj + ai[i];
1472     nz  = ai[i+1] - ai[i];
1473     s1  = tmp[i];
1474     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1475   }
1476 
1477   /* copy tmp into x according to permutation */
1478   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1479 
1480   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1481   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1482   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1483   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1484 
1485   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1491 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1492 {
1493   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1494   IS                iscol = a->col,isrow = a->row;
1495   PetscErrorCode    ierr;
1496   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1497   PetscInt          i,n = A->rmap->n,j;
1498   PetscInt          nz;
1499   PetscScalar       *x,*tmp,s1;
1500   const MatScalar   *aa = a->a,*v;
1501   const PetscScalar *b;
1502 
1503   PetscFunctionBegin;
1504   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1505   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1506   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1507   tmp  = a->solve_work;
1508 
1509   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1510   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1511 
1512   /* copy the b into temp work space according to permutation */
1513   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1514 
1515   /* forward solve the U^T */
1516   for (i=0; i<n; i++) {
1517     v   = aa + diag[i] ;
1518     vi  = aj + diag[i] + 1;
1519     nz  = ai[i+1] - diag[i] - 1;
1520     s1  = tmp[i];
1521     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1522     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1523     tmp[i] = s1;
1524   }
1525 
1526   /* backward solve the L^T */
1527   for (i=n-1; i>=0; i--) {
1528     v   = aa + diag[i] - 1 ;
1529     vi  = aj + diag[i] - 1 ;
1530     nz  = diag[i] - ai[i];
1531     s1  = tmp[i];
1532     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1533   }
1534 
1535   /* copy tmp into x according to permutation */
1536   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1537 
1538   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1539   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1540   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1541   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1542 
1543   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1549 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1550 {
1551   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1552   IS                iscol = a->col,isrow = a->row;
1553   PetscErrorCode    ierr;
1554   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1555   PetscInt          i,n = A->rmap->n,j;
1556   PetscInt          nz;
1557   PetscScalar       *x,*tmp,s1;
1558   const MatScalar   *aa = a->a,*v;
1559   const PetscScalar *b;
1560 
1561   PetscFunctionBegin;
1562   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1563   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1564   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1565   tmp  = a->solve_work;
1566 
1567   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1568   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1569 
1570   /* copy the b into temp work space according to permutation */
1571   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1572 
1573   /* forward solve the U^T */
1574   for (i=0; i<n; i++) {
1575     v   = aa + adiag[i+1] + 1;
1576     vi  = aj + adiag[i+1] + 1;
1577     nz  = adiag[i] - adiag[i+1] - 1;
1578     s1  = tmp[i];
1579     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1580     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1581     tmp[i] = s1;
1582   }
1583 
1584 
1585   /* backward solve the L^T */
1586   for (i=n-1; i>=0; i--) {
1587     v   = aa + ai[i] ;
1588     vi  = aj + ai[i];
1589     nz  = ai[i+1] - ai[i];
1590     s1  = tmp[i];
1591     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1592   }
1593 
1594   /* copy tmp into x according to permutation */
1595   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1596 
1597   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1598   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1599   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1600   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1601 
1602   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 /* ----------------------------------------------------------------*/
1607 
1608 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1609 
1610 /*
1611    ilu() under revised new data structure.
1612    Factored arrays bj and ba are stored as
1613      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1614 
1615    bi=fact->i is an array of size n+1, in which
1616    bi+
1617      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1618      bi[n]:  points to L(n-1,n-1)+1
1619 
1620   bdiag=fact->diag is an array of size n+1,in which
1621      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1622      bdiag[n]: points to entry of U(n-1,0)-1
1623 
1624    U(i,:) contains bdiag[i] as its last entry, i.e.,
1625     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1626 */
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1629 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1630 {
1631 
1632   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1633   PetscErrorCode     ierr;
1634   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1635   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1636   PetscBool          missing;
1637   IS                 isicol;
1638 
1639   PetscFunctionBegin;
1640   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1641   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1642   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1643   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1644   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1645   b    = (Mat_SeqAIJ*)(fact)->data;
1646 
1647   /* allocate matrix arrays for new data structure */
1648   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1649   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1650   b->singlemalloc = PETSC_TRUE;
1651   if (!b->diag) {
1652     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1653     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1654   }
1655   bdiag = b->diag;
1656 
1657   if (n > 0) {
1658     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1659   }
1660 
1661   /* set bi and bj with new data structure */
1662   bi = b->i;
1663   bj = b->j;
1664 
1665   /* L part */
1666   bi[0] = 0;
1667   for (i=0; i<n; i++) {
1668     nz = adiag[i] - ai[i];
1669     bi[i+1] = bi[i] + nz;
1670     aj = a->j + ai[i];
1671     for (j=0; j<nz; j++) {
1672       /*   *bj = aj[j]; bj++; */
1673       bj[k++] = aj[j];
1674     }
1675   }
1676 
1677   /* U part */
1678   bdiag[n] = bi[n]-1;
1679   for (i=n-1; i>=0; i--) {
1680     nz = ai[i+1] - adiag[i] - 1;
1681     aj = a->j + adiag[i] + 1;
1682     for (j=0; j<nz; j++) {
1683       /*      *bj = aj[j]; bj++; */
1684       bj[k++] = aj[j];
1685     }
1686     /* diag[i] */
1687     /*    *bj = i; bj++; */
1688     bj[k++] = i;
1689     bdiag[i] = bdiag[i+1] + nz + 1;
1690   }
1691 
1692   fact->factortype             = MAT_FACTOR_ILU;
1693   fact->info.factor_mallocs    = 0;
1694   fact->info.fill_ratio_given  = info->fill;
1695   fact->info.fill_ratio_needed = 1.0;
1696   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1697 
1698   b       = (Mat_SeqAIJ*)(fact)->data;
1699   b->row  = isrow;
1700   b->col  = iscol;
1701   b->icol = isicol;
1702   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1703   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1704   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1710 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1711 {
1712   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1713   IS                 isicol;
1714   PetscErrorCode     ierr;
1715   const PetscInt     *r,*ic;
1716   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1717   PetscInt           *bi,*cols,nnz,*cols_lvl;
1718   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1719   PetscInt           i,levels,diagonal_fill;
1720   PetscBool          col_identity,row_identity;
1721   PetscReal          f;
1722   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1723   PetscBT            lnkbt;
1724   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1725   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1726   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1727 
1728   PetscFunctionBegin;
1729   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1730   /*
1731   PetscBool          olddatastruct=PETSC_FALSE;
1732   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1733   if (olddatastruct) {
1734     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1735     PetscFunctionReturn(0);
1736   }
1737   */
1738 
1739   levels = (PetscInt)info->levels;
1740   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1741   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1742   if (!levels && row_identity && col_identity) {
1743     /* special case: ilu(0) with natural ordering */
1744     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1745     if (a->inode.size) {
1746       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1747     }
1748     PetscFunctionReturn(0);
1749   }
1750 
1751   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1752   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1753   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1754   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1755 
1756   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1757   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1758   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1759   bi[0] = bdiag[0] = 0;
1760   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1761 
1762   /* create a linked list for storing column indices of the active row */
1763   nlnk = n + 1;
1764   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1765 
1766   /* initial FreeSpace size is f*(ai[n]+1) */
1767   f             = info->fill;
1768   diagonal_fill = (PetscInt)info->diagonal_fill;
1769   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1770   current_space = free_space;
1771   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1772   current_space_lvl = free_space_lvl;
1773   for (i=0; i<n; i++) {
1774     nzi = 0;
1775     /* copy current row into linked list */
1776     nnz  = ai[r[i]+1] - ai[r[i]];
1777     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1778     cols = aj + ai[r[i]];
1779     lnk[i] = -1; /* marker to indicate if diagonal exists */
1780     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1781     nzi += nlnk;
1782 
1783     /* make sure diagonal entry is included */
1784     if (diagonal_fill && lnk[i] == -1) {
1785       fm = n;
1786       while (lnk[fm] < i) fm = lnk[fm];
1787       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1788       lnk[fm]    = i;
1789       lnk_lvl[i] = 0;
1790       nzi++; dcount++;
1791     }
1792 
1793     /* add pivot rows into the active row */
1794     nzbd = 0;
1795     prow = lnk[n];
1796     while (prow < i) {
1797       nnz      = bdiag[prow];
1798       cols     = bj_ptr[prow] + nnz + 1;
1799       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1801       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1802       nzi += nlnk;
1803       prow = lnk[prow];
1804       nzbd++;
1805     }
1806     bdiag[i] = nzbd;
1807     bi[i+1]  = bi[i] + nzi;
1808     /* if free space is not available, make more free space */
1809     if (current_space->local_remaining<nzi) {
1810       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1811       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1812       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1813       reallocs++;
1814     }
1815 
1816     /* copy data into free_space and free_space_lvl, then initialize lnk */
1817     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1818     bj_ptr[i]    = current_space->array;
1819     bjlvl_ptr[i] = current_space_lvl->array;
1820 
1821     /* make sure the active row i has diagonal entry */
1822     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1823 
1824     current_space->array           += nzi;
1825     current_space->local_used      += nzi;
1826     current_space->local_remaining -= nzi;
1827     current_space_lvl->array           += nzi;
1828     current_space_lvl->local_used      += nzi;
1829     current_space_lvl->local_remaining -= nzi;
1830   }
1831 
1832   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1833   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1834   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1835   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1836   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1837 
1838   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1839   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1840   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1841 
1842 #if defined(PETSC_USE_INFO)
1843   {
1844     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1845     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1846     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1847     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1848     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1849     if (diagonal_fill) {
1850       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1851     }
1852   }
1853 #endif
1854   /* put together the new matrix */
1855   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1856   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1857   b = (Mat_SeqAIJ*)(fact)->data;
1858   b->free_a       = PETSC_TRUE;
1859   b->free_ij      = PETSC_TRUE;
1860   b->singlemalloc = PETSC_FALSE;
1861   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1862   b->j          = bj;
1863   b->i          = bi;
1864   b->diag       = bdiag;
1865   b->ilen       = 0;
1866   b->imax       = 0;
1867   b->row        = isrow;
1868   b->col        = iscol;
1869   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1870   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1871   b->icol       = isicol;
1872   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1873   /* In b structure:  Free imax, ilen, old a, old j.
1874      Allocate bdiag, solve_work, new a, new j */
1875   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1876   b->maxnz = b->nz = bdiag[0]+1;
1877   (fact)->info.factor_mallocs    = reallocs;
1878   (fact)->info.fill_ratio_given  = f;
1879   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1880   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1881   if (a->inode.size) {
1882     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1883   }
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1889 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1890 {
1891   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1892   IS                 isicol;
1893   PetscErrorCode     ierr;
1894   const PetscInt     *r,*ic;
1895   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1896   PetscInt           *bi,*cols,nnz,*cols_lvl;
1897   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1898   PetscInt           i,levels,diagonal_fill;
1899   PetscBool          col_identity,row_identity;
1900   PetscReal          f;
1901   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1902   PetscBT            lnkbt;
1903   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1904   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1905   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1906   PetscBool          missing;
1907 
1908   PetscFunctionBegin;
1909   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1910   f             = info->fill;
1911   levels        = (PetscInt)info->levels;
1912   diagonal_fill = (PetscInt)info->diagonal_fill;
1913   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1914 
1915   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1916   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1917   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1918     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1919     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1920     if (a->inode.size) {
1921       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1922     }
1923     fact->factortype = MAT_FACTOR_ILU;
1924     (fact)->info.factor_mallocs    = 0;
1925     (fact)->info.fill_ratio_given  = info->fill;
1926     (fact)->info.fill_ratio_needed = 1.0;
1927     b               = (Mat_SeqAIJ*)(fact)->data;
1928     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1929     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1930     b->row              = isrow;
1931     b->col              = iscol;
1932     b->icol             = isicol;
1933     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1934     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1935     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1936     PetscFunctionReturn(0);
1937   }
1938 
1939   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1940   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1941 
1942   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1943   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1944   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1945   bi[0] = bdiag[0] = 0;
1946 
1947   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1948 
1949   /* create a linked list for storing column indices of the active row */
1950   nlnk = n + 1;
1951   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1952 
1953   /* initial FreeSpace size is f*(ai[n]+1) */
1954   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1955   current_space = free_space;
1956   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1957   current_space_lvl = free_space_lvl;
1958 
1959   for (i=0; i<n; i++) {
1960     nzi = 0;
1961     /* copy current row into linked list */
1962     nnz  = ai[r[i]+1] - ai[r[i]];
1963     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1964     cols = aj + ai[r[i]];
1965     lnk[i] = -1; /* marker to indicate if diagonal exists */
1966     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1967     nzi += nlnk;
1968 
1969     /* make sure diagonal entry is included */
1970     if (diagonal_fill && lnk[i] == -1) {
1971       fm = n;
1972       while (lnk[fm] < i) fm = lnk[fm];
1973       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1974       lnk[fm]    = i;
1975       lnk_lvl[i] = 0;
1976       nzi++; dcount++;
1977     }
1978 
1979     /* add pivot rows into the active row */
1980     nzbd = 0;
1981     prow = lnk[n];
1982     while (prow < i) {
1983       nnz      = bdiag[prow];
1984       cols     = bj_ptr[prow] + nnz + 1;
1985       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1986       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1987       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1988       nzi += nlnk;
1989       prow = lnk[prow];
1990       nzbd++;
1991     }
1992     bdiag[i] = nzbd;
1993     bi[i+1]  = bi[i] + nzi;
1994 
1995     /* if free space is not available, make more free space */
1996     if (current_space->local_remaining<nzi) {
1997       nnz = nzi*(n - i); /* estimated and max additional space needed */
1998       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1999       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2000       reallocs++;
2001     }
2002 
2003     /* copy data into free_space and free_space_lvl, then initialize lnk */
2004     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2005     bj_ptr[i]    = current_space->array;
2006     bjlvl_ptr[i] = current_space_lvl->array;
2007 
2008     /* make sure the active row i has diagonal entry */
2009     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2010 
2011     current_space->array           += nzi;
2012     current_space->local_used      += nzi;
2013     current_space->local_remaining -= nzi;
2014     current_space_lvl->array           += nzi;
2015     current_space_lvl->local_used      += nzi;
2016     current_space_lvl->local_remaining -= nzi;
2017   }
2018 
2019   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2020   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2021 
2022   /* destroy list of free space and other temporary arrays */
2023   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2024   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2025   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2026   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2027   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2028 
2029 #if defined(PETSC_USE_INFO)
2030   {
2031     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2032     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2033     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2034     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2035     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2036     if (diagonal_fill) {
2037       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2038     }
2039   }
2040 #endif
2041 
2042   /* put together the new matrix */
2043   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2044   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2045   b = (Mat_SeqAIJ*)(fact)->data;
2046   b->free_a       = PETSC_TRUE;
2047   b->free_ij      = PETSC_TRUE;
2048   b->singlemalloc = PETSC_FALSE;
2049   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2050   b->j          = bj;
2051   b->i          = bi;
2052   for (i=0; i<n; i++) bdiag[i] += bi[i];
2053   b->diag       = bdiag;
2054   b->ilen       = 0;
2055   b->imax       = 0;
2056   b->row        = isrow;
2057   b->col        = iscol;
2058   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2059   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2060   b->icol       = isicol;
2061   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2062   /* In b structure:  Free imax, ilen, old a, old j.
2063      Allocate bdiag, solve_work, new a, new j */
2064   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2065   b->maxnz             = b->nz = bi[n] ;
2066   (fact)->info.factor_mallocs    = reallocs;
2067   (fact)->info.fill_ratio_given  = f;
2068   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2069   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2070   if (a->inode.size) {
2071     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2078 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2079 {
2080   Mat            C = B;
2081   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2082   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2083   IS             ip=b->row,iip = b->icol;
2084   PetscErrorCode ierr;
2085   const PetscInt *rip,*riip;
2086   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2087   PetscInt       *ai=a->i,*aj=a->j;
2088   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2089   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2090   PetscBool      perm_identity;
2091   FactorShiftCtx sctx;
2092   PetscReal      rs;
2093   MatScalar      d,*v;
2094 
2095   PetscFunctionBegin;
2096   /* MatPivotSetUp(): initialize shift context sctx */
2097   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2098 
2099   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2100     sctx.shift_top = info->zeropivot;
2101     for (i=0; i<mbs; i++) {
2102       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2103       d  = (aa)[a->diag[i]];
2104       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2105       v  = aa+ai[i];
2106       nz = ai[i+1] - ai[i];
2107       for (j=0; j<nz; j++)
2108         rs += PetscAbsScalar(v[j]);
2109       if (rs>sctx.shift_top) sctx.shift_top = rs;
2110     }
2111     sctx.shift_top   *= 1.1;
2112     sctx.nshift_max   = 5;
2113     sctx.shift_lo     = 0.;
2114     sctx.shift_hi     = 1.;
2115   }
2116 
2117   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2118   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2119 
2120   /* allocate working arrays
2121      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2122      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2123   */
2124   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2125 
2126   do {
2127     sctx.newshift = PETSC_FALSE;
2128 
2129     for (i=0; i<mbs; i++) c2r[i] = mbs;
2130     if (mbs) il[0] = 0;
2131 
2132     for (k = 0; k<mbs; k++) {
2133       /* zero rtmp */
2134       nz = bi[k+1] - bi[k];
2135       bjtmp = bj + bi[k];
2136       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2137 
2138       /* load in initial unfactored row */
2139       bval = ba + bi[k];
2140       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2141       for (j = jmin; j < jmax; j++) {
2142         col = riip[aj[j]];
2143         if (col >= k) { /* only take upper triangular entry */
2144           rtmp[col] = aa[j];
2145           *bval++   = 0.0; /* for in-place factorization */
2146         }
2147       }
2148       /* shift the diagonal of the matrix: ZeropivotApply() */
2149       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2150 
2151       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2152       dk = rtmp[k];
2153       i  = c2r[k]; /* first row to be added to k_th row  */
2154 
2155       while (i < k) {
2156         nexti = c2r[i]; /* next row to be added to k_th row */
2157 
2158         /* compute multiplier, update diag(k) and U(i,k) */
2159         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2160         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2161         dk   += uikdi*ba[ili]; /* update diag[k] */
2162         ba[ili] = uikdi; /* -U(i,k) */
2163 
2164         /* add multiple of row i to k-th row */
2165         jmin = ili + 1; jmax = bi[i+1];
2166         if (jmin < jmax) {
2167           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2168           /* update il and c2r for row i */
2169           il[i] = jmin;
2170           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2171         }
2172         i = nexti;
2173       }
2174 
2175       /* copy data into U(k,:) */
2176       rs   = 0.0;
2177       jmin = bi[k]; jmax = bi[k+1]-1;
2178       if (jmin < jmax) {
2179         for (j=jmin; j<jmax; j++) {
2180           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2181         }
2182         /* add the k-th row into il and c2r */
2183         il[k] = jmin;
2184         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2185       }
2186 
2187       /* MatPivotCheck() */
2188       sctx.rs  = rs;
2189       sctx.pv  = dk;
2190       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2191       if (sctx.newshift) break;
2192       dk = sctx.pv;
2193 
2194       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2195     }
2196   } while (sctx.newshift);
2197 
2198   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2199   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2200   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2201 
2202   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2203   if (perm_identity) {
2204     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2205     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2206     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2207     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2208   } else {
2209     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2210     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2211     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2212     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2213   }
2214 
2215   C->assembled    = PETSC_TRUE;
2216   C->preallocated = PETSC_TRUE;
2217   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2218 
2219   /* MatPivotView() */
2220   if (sctx.nshift) {
2221     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2222       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2223     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2224       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2225     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2226       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2227     }
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2234 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2235 {
2236   Mat            C = B;
2237   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2238   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2239   IS             ip=b->row,iip = b->icol;
2240   PetscErrorCode ierr;
2241   const PetscInt *rip,*riip;
2242   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2243   PetscInt       *ai=a->i,*aj=a->j;
2244   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2245   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2246   PetscBool      perm_identity;
2247   FactorShiftCtx sctx;
2248   PetscReal      rs;
2249   MatScalar      d,*v;
2250 
2251   PetscFunctionBegin;
2252   /* MatPivotSetUp(): initialize shift context sctx */
2253   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2254 
2255   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2256     sctx.shift_top = info->zeropivot;
2257     for (i=0; i<mbs; i++) {
2258       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2259       d  = (aa)[a->diag[i]];
2260       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2261       v  = aa+ai[i];
2262       nz = ai[i+1] - ai[i];
2263       for (j=0; j<nz; j++)
2264         rs += PetscAbsScalar(v[j]);
2265       if (rs>sctx.shift_top) sctx.shift_top = rs;
2266     }
2267     sctx.shift_top   *= 1.1;
2268     sctx.nshift_max   = 5;
2269     sctx.shift_lo     = 0.;
2270     sctx.shift_hi     = 1.;
2271   }
2272 
2273   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2274   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2275 
2276   /* initialization */
2277   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2278 
2279   do {
2280     sctx.newshift = PETSC_FALSE;
2281 
2282     for (i=0; i<mbs; i++) jl[i] = mbs;
2283     il[0] = 0;
2284 
2285     for (k = 0; k<mbs; k++) {
2286       /* zero rtmp */
2287       nz = bi[k+1] - bi[k];
2288       bjtmp = bj + bi[k];
2289       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2290 
2291       bval = ba + bi[k];
2292       /* initialize k-th row by the perm[k]-th row of A */
2293       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2294       for (j = jmin; j < jmax; j++) {
2295         col = riip[aj[j]];
2296         if (col >= k) { /* only take upper triangular entry */
2297           rtmp[col] = aa[j];
2298           *bval++  = 0.0; /* for in-place factorization */
2299         }
2300       }
2301       /* shift the diagonal of the matrix */
2302       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2303 
2304       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2305       dk = rtmp[k];
2306       i = jl[k]; /* first row to be added to k_th row  */
2307 
2308       while (i < k) {
2309         nexti = jl[i]; /* next row to be added to k_th row */
2310 
2311         /* compute multiplier, update diag(k) and U(i,k) */
2312         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2313         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2314         dk += uikdi*ba[ili];
2315         ba[ili] = uikdi; /* -U(i,k) */
2316 
2317         /* add multiple of row i to k-th row */
2318         jmin = ili + 1; jmax = bi[i+1];
2319         if (jmin < jmax) {
2320           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2321           /* update il and jl for row i */
2322           il[i] = jmin;
2323           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2324         }
2325         i = nexti;
2326       }
2327 
2328       /* shift the diagonals when zero pivot is detected */
2329       /* compute rs=sum of abs(off-diagonal) */
2330       rs   = 0.0;
2331       jmin = bi[k]+1;
2332       nz   = bi[k+1] - jmin;
2333       bcol = bj + jmin;
2334       for (j=0; j<nz; j++) {
2335         rs += PetscAbsScalar(rtmp[bcol[j]]);
2336       }
2337 
2338       sctx.rs = rs;
2339       sctx.pv = dk;
2340       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2341       if (sctx.newshift) break;
2342       dk = sctx.pv;
2343 
2344       /* copy data into U(k,:) */
2345       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2346       jmin = bi[k]+1; jmax = bi[k+1];
2347       if (jmin < jmax) {
2348         for (j=jmin; j<jmax; j++) {
2349           col = bj[j]; ba[j] = rtmp[col];
2350         }
2351         /* add the k-th row into il and jl */
2352         il[k] = jmin;
2353         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2354       }
2355     }
2356   } while (sctx.newshift);
2357 
2358   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2359   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2360   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2361 
2362   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2363   if (perm_identity) {
2364     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2365     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368   } else {
2369     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2370     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2371     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2372     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2373   }
2374 
2375   C->assembled    = PETSC_TRUE;
2376   C->preallocated = PETSC_TRUE;
2377   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2378   if (sctx.nshift) {
2379     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2380       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2381     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2382       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2383     }
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 /*
2389    icc() under revised new data structure.
2390    Factored arrays bj and ba are stored as
2391      U(0,:),...,U(i,:),U(n-1,:)
2392 
2393    ui=fact->i is an array of size n+1, in which
2394    ui+
2395      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2396      ui[n]:  points to U(n-1,n-1)+1
2397 
2398   udiag=fact->diag is an array of size n,in which
2399      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2400 
2401    U(i,:) contains udiag[i] as its last entry, i.e.,
2402     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2403 */
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2407 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2408 {
2409   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2410   Mat_SeqSBAIJ       *b;
2411   PetscErrorCode     ierr;
2412   PetscBool          perm_identity,missing;
2413   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2414   const PetscInt     *rip,*riip;
2415   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2416   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2417   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2418   PetscReal          fill=info->fill,levels=info->levels;
2419   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2420   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2421   PetscBT            lnkbt;
2422   IS                 iperm;
2423 
2424   PetscFunctionBegin;
2425   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2426   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2427   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2428   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2429   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2430 
2431   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2432   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2433   ui[0] = 0;
2434 
2435   /* ICC(0) without matrix ordering: simply rearrange column indices */
2436   if (!levels && perm_identity) {
2437     for (i=0; i<am; i++) {
2438       ncols    = ai[i+1] - a->diag[i];
2439       ui[i+1]  = ui[i] + ncols;
2440       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2441     }
2442     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2443     cols = uj;
2444     for (i=0; i<am; i++) {
2445       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2446       ncols = ai[i+1] - a->diag[i] -1;
2447       for (j=0; j<ncols; j++) *cols++ = aj[j];
2448       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2449     }
2450   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2451     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2452     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2453 
2454     /* initialization */
2455     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2456 
2457     /* jl: linked list for storing indices of the pivot rows
2458        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2459     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2460     for (i=0; i<am; i++) {
2461       jl[i] = am; il[i] = 0;
2462     }
2463 
2464     /* create and initialize a linked list for storing column indices of the active row k */
2465     nlnk = am + 1;
2466     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2467 
2468     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2469     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2470     current_space = free_space;
2471     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2472     current_space_lvl = free_space_lvl;
2473 
2474     for (k=0; k<am; k++) {  /* for each active row k */
2475       /* initialize lnk by the column indices of row rip[k] of A */
2476       nzk   = 0;
2477       ncols = ai[rip[k]+1] - ai[rip[k]];
2478       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2479       ncols_upper = 0;
2480       for (j=0; j<ncols; j++) {
2481         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2482         if (riip[i] >= k) { /* only take upper triangular entry */
2483           ajtmp[ncols_upper] = i;
2484           ncols_upper++;
2485         }
2486       }
2487       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2488       nzk += nlnk;
2489 
2490       /* update lnk by computing fill-in for each pivot row to be merged in */
2491       prow = jl[k]; /* 1st pivot row */
2492 
2493       while (prow < k) {
2494         nextprow = jl[prow];
2495 
2496         /* merge prow into k-th row */
2497         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2498         jmax = ui[prow+1];
2499         ncols = jmax-jmin;
2500         i     = jmin - ui[prow];
2501         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2502         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2503         j     = *(uj - 1);
2504         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2505         nzk += nlnk;
2506 
2507         /* update il and jl for prow */
2508         if (jmin < jmax) {
2509           il[prow] = jmin;
2510           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2511         }
2512         prow = nextprow;
2513       }
2514 
2515       /* if free space is not available, make more free space */
2516       if (current_space->local_remaining<nzk) {
2517         i  = am - k + 1; /* num of unfactored rows */
2518         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2519         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2520         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2521         reallocs++;
2522       }
2523 
2524       /* copy data into free_space and free_space_lvl, then initialize lnk */
2525       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2526       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2527 
2528       /* add the k-th row into il and jl */
2529       if (nzk > 1) {
2530         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2531         jl[k] = jl[i]; jl[i] = k;
2532         il[k] = ui[k] + 1;
2533       }
2534       uj_ptr[k]     = current_space->array;
2535       uj_lvl_ptr[k] = current_space_lvl->array;
2536 
2537       current_space->array           += nzk;
2538       current_space->local_used      += nzk;
2539       current_space->local_remaining -= nzk;
2540 
2541       current_space_lvl->array           += nzk;
2542       current_space_lvl->local_used      += nzk;
2543       current_space_lvl->local_remaining -= nzk;
2544 
2545       ui[k+1] = ui[k] + nzk;
2546     }
2547 
2548     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2549     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2550     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2551     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2552 
2553     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2554     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2555     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2556     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2557     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2558 
2559   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2560 
2561   /* put together the new matrix in MATSEQSBAIJ format */
2562   b    = (Mat_SeqSBAIJ*)(fact)->data;
2563   b->singlemalloc = PETSC_FALSE;
2564   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2565   b->j    = uj;
2566   b->i    = ui;
2567   b->diag = udiag;
2568   b->free_diag = PETSC_TRUE;
2569   b->ilen = 0;
2570   b->imax = 0;
2571   b->row  = perm;
2572   b->col  = perm;
2573   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2574   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2575   b->icol = iperm;
2576   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2577   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2578   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2579   b->maxnz   = b->nz = ui[am];
2580   b->free_a  = PETSC_TRUE;
2581   b->free_ij = PETSC_TRUE;
2582 
2583   fact->info.factor_mallocs   = reallocs;
2584   fact->info.fill_ratio_given = fill;
2585   if (ai[am] != 0) {
2586     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2587     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2588   } else {
2589     fact->info.fill_ratio_needed = 0.0;
2590   }
2591 #if defined(PETSC_USE_INFO)
2592     if (ai[am] != 0) {
2593       PetscReal af = fact->info.fill_ratio_needed;
2594       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2595       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2596       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2597     } else {
2598       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2599     }
2600 #endif
2601   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2607 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2608 {
2609   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2610   Mat_SeqSBAIJ       *b;
2611   PetscErrorCode     ierr;
2612   PetscBool          perm_identity,missing;
2613   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2614   const PetscInt     *rip,*riip;
2615   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2616   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2617   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2618   PetscReal          fill=info->fill,levels=info->levels;
2619   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2620   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2621   PetscBT            lnkbt;
2622   IS                 iperm;
2623 
2624   PetscFunctionBegin;
2625   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2626   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2627   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2628   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2629   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2630 
2631   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2632   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2633   ui[0] = 0;
2634 
2635   /* ICC(0) without matrix ordering: simply copies fill pattern */
2636   if (!levels && perm_identity) {
2637 
2638     for (i=0; i<am; i++) {
2639       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2640       udiag[i] = ui[i];
2641     }
2642     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2643     cols = uj;
2644     for (i=0; i<am; i++) {
2645       aj    = a->j + a->diag[i];
2646       ncols = ui[i+1] - ui[i];
2647       for (j=0; j<ncols; j++) *cols++ = *aj++;
2648     }
2649   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2650     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2651     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2652 
2653     /* initialization */
2654     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2655 
2656     /* jl: linked list for storing indices of the pivot rows
2657        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2658     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2659     for (i=0; i<am; i++) {
2660       jl[i] = am; il[i] = 0;
2661     }
2662 
2663     /* create and initialize a linked list for storing column indices of the active row k */
2664     nlnk = am + 1;
2665     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2666 
2667     /* initial FreeSpace size is fill*(ai[am]+1) */
2668     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2669     current_space = free_space;
2670     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2671     current_space_lvl = free_space_lvl;
2672 
2673     for (k=0; k<am; k++) {  /* for each active row k */
2674       /* initialize lnk by the column indices of row rip[k] of A */
2675       nzk   = 0;
2676       ncols = ai[rip[k]+1] - ai[rip[k]];
2677       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2678       ncols_upper = 0;
2679       for (j=0; j<ncols; j++) {
2680         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2681         if (riip[i] >= k) { /* only take upper triangular entry */
2682           ajtmp[ncols_upper] = i;
2683           ncols_upper++;
2684         }
2685       }
2686       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2687       nzk += nlnk;
2688 
2689       /* update lnk by computing fill-in for each pivot row to be merged in */
2690       prow = jl[k]; /* 1st pivot row */
2691 
2692       while (prow < k) {
2693         nextprow = jl[prow];
2694 
2695         /* merge prow into k-th row */
2696         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2697         jmax = ui[prow+1];
2698         ncols = jmax-jmin;
2699         i     = jmin - ui[prow];
2700         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2701         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2702         j     = *(uj - 1);
2703         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2704         nzk += nlnk;
2705 
2706         /* update il and jl for prow */
2707         if (jmin < jmax) {
2708           il[prow] = jmin;
2709           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2710         }
2711         prow = nextprow;
2712       }
2713 
2714       /* if free space is not available, make more free space */
2715       if (current_space->local_remaining<nzk) {
2716         i = am - k + 1; /* num of unfactored rows */
2717         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2718         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2719         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2720         reallocs++;
2721       }
2722 
2723       /* copy data into free_space and free_space_lvl, then initialize lnk */
2724       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2725       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2726 
2727       /* add the k-th row into il and jl */
2728       if (nzk > 1) {
2729         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2730         jl[k] = jl[i]; jl[i] = k;
2731         il[k] = ui[k] + 1;
2732       }
2733       uj_ptr[k]     = current_space->array;
2734       uj_lvl_ptr[k] = current_space_lvl->array;
2735 
2736       current_space->array           += nzk;
2737       current_space->local_used      += nzk;
2738       current_space->local_remaining -= nzk;
2739 
2740       current_space_lvl->array           += nzk;
2741       current_space_lvl->local_used      += nzk;
2742       current_space_lvl->local_remaining -= nzk;
2743 
2744       ui[k+1] = ui[k] + nzk;
2745     }
2746 
2747 #if defined(PETSC_USE_INFO)
2748     if (ai[am] != 0) {
2749       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2750       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2751       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2752       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2753     } else {
2754       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2755     }
2756 #endif
2757 
2758     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2759     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2760     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2761     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2762 
2763     /* destroy list of free space and other temporary array(s) */
2764     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2765     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2766     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2767     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2768 
2769   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2770 
2771   /* put together the new matrix in MATSEQSBAIJ format */
2772 
2773   b    = (Mat_SeqSBAIJ*)fact->data;
2774   b->singlemalloc = PETSC_FALSE;
2775   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2776   b->j    = uj;
2777   b->i    = ui;
2778   b->diag = udiag;
2779   b->free_diag = PETSC_TRUE;
2780   b->ilen = 0;
2781   b->imax = 0;
2782   b->row  = perm;
2783   b->col  = perm;
2784   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2785   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2786   b->icol = iperm;
2787   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2788   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2789   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2790   b->maxnz   = b->nz = ui[am];
2791   b->free_a  = PETSC_TRUE;
2792   b->free_ij = PETSC_TRUE;
2793 
2794   fact->info.factor_mallocs    = reallocs;
2795   fact->info.fill_ratio_given  = fill;
2796   if (ai[am] != 0) {
2797     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2798   } else {
2799     fact->info.fill_ratio_needed = 0.0;
2800   }
2801   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2807 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2808 {
2809   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2810   Mat_SeqSBAIJ       *b;
2811   PetscErrorCode     ierr;
2812   PetscBool          perm_identity;
2813   PetscReal          fill = info->fill;
2814   const PetscInt     *rip,*riip;
2815   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2816   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2817   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2818   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2819   PetscBT            lnkbt;
2820   IS                 iperm;
2821 
2822   PetscFunctionBegin;
2823   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2824   /* check whether perm is the identity mapping */
2825   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2826   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2827   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2828   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2829 
2830   /* initialization */
2831   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2832   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2833   ui[0] = 0;
2834 
2835   /* jl: linked list for storing indices of the pivot rows
2836      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2837   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2838   for (i=0; i<am; i++) {
2839     jl[i] = am; il[i] = 0;
2840   }
2841 
2842   /* create and initialize a linked list for storing column indices of the active row k */
2843   nlnk = am + 1;
2844   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2845 
2846   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2847   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2848   current_space = free_space;
2849 
2850   for (k=0; k<am; k++) {  /* for each active row k */
2851     /* initialize lnk by the column indices of row rip[k] of A */
2852     nzk   = 0;
2853     ncols = ai[rip[k]+1] - ai[rip[k]];
2854     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2855     ncols_upper = 0;
2856     for (j=0; j<ncols; j++) {
2857       i = riip[*(aj + ai[rip[k]] + j)];
2858       if (i >= k) { /* only take upper triangular entry */
2859         cols[ncols_upper] = i;
2860         ncols_upper++;
2861       }
2862     }
2863     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2864     nzk += nlnk;
2865 
2866     /* update lnk by computing fill-in for each pivot row to be merged in */
2867     prow = jl[k]; /* 1st pivot row */
2868 
2869     while (prow < k) {
2870       nextprow = jl[prow];
2871       /* merge prow into k-th row */
2872       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2873       jmax = ui[prow+1];
2874       ncols = jmax-jmin;
2875       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2876       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2877       nzk += nlnk;
2878 
2879       /* update il and jl for prow */
2880       if (jmin < jmax) {
2881         il[prow] = jmin;
2882         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2883       }
2884       prow = nextprow;
2885     }
2886 
2887     /* if free space is not available, make more free space */
2888     if (current_space->local_remaining<nzk) {
2889       i  = am - k + 1; /* num of unfactored rows */
2890       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2891       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2892       reallocs++;
2893     }
2894 
2895     /* copy data into free space, then initialize lnk */
2896     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2897 
2898     /* add the k-th row into il and jl */
2899     if (nzk > 1) {
2900       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2901       jl[k] = jl[i]; jl[i] = k;
2902       il[k] = ui[k] + 1;
2903     }
2904     ui_ptr[k] = current_space->array;
2905     current_space->array           += nzk;
2906     current_space->local_used      += nzk;
2907     current_space->local_remaining -= nzk;
2908 
2909     ui[k+1] = ui[k] + nzk;
2910   }
2911 
2912   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2913   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2914   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2915 
2916   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2917   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2918   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2919   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2920 
2921   /* put together the new matrix in MATSEQSBAIJ format */
2922 
2923   b = (Mat_SeqSBAIJ*)fact->data;
2924   b->singlemalloc = PETSC_FALSE;
2925   b->free_a       = PETSC_TRUE;
2926   b->free_ij      = PETSC_TRUE;
2927   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2928   b->j    = uj;
2929   b->i    = ui;
2930   b->diag = udiag;
2931   b->free_diag = PETSC_TRUE;
2932   b->ilen = 0;
2933   b->imax = 0;
2934   b->row  = perm;
2935   b->col  = perm;
2936   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2937   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2938   b->icol = iperm;
2939   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2940   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2941   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2942   b->maxnz = b->nz = ui[am];
2943 
2944   fact->info.factor_mallocs    = reallocs;
2945   fact->info.fill_ratio_given  = fill;
2946   if (ai[am] != 0) {
2947     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2948     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2949   } else {
2950     fact->info.fill_ratio_needed = 0.0;
2951   }
2952 #if defined(PETSC_USE_INFO)
2953   if (ai[am] != 0) {
2954     PetscReal af = fact->info.fill_ratio_needed;
2955     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2956     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2957     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2958   } else {
2959      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2960   }
2961 #endif
2962   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2968 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2969 {
2970   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2971   Mat_SeqSBAIJ       *b;
2972   PetscErrorCode     ierr;
2973   PetscBool          perm_identity;
2974   PetscReal          fill = info->fill;
2975   const PetscInt     *rip,*riip;
2976   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2977   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2978   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2979   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2980   PetscBT            lnkbt;
2981   IS                 iperm;
2982 
2983   PetscFunctionBegin;
2984   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2985   /* check whether perm is the identity mapping */
2986   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2987   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2988   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2989   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2990 
2991   /* initialization */
2992   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2993   ui[0] = 0;
2994 
2995   /* jl: linked list for storing indices of the pivot rows
2996      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2997   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2998   for (i=0; i<am; i++) {
2999     jl[i] = am; il[i] = 0;
3000   }
3001 
3002   /* create and initialize a linked list for storing column indices of the active row k */
3003   nlnk = am + 1;
3004   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3005 
3006   /* initial FreeSpace size is fill*(ai[am]+1) */
3007   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3008   current_space = free_space;
3009 
3010   for (k=0; k<am; k++) {  /* for each active row k */
3011     /* initialize lnk by the column indices of row rip[k] of A */
3012     nzk   = 0;
3013     ncols = ai[rip[k]+1] - ai[rip[k]];
3014     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3015     ncols_upper = 0;
3016     for (j=0; j<ncols; j++) {
3017       i = riip[*(aj + ai[rip[k]] + j)];
3018       if (i >= k) { /* only take upper triangular entry */
3019         cols[ncols_upper] = i;
3020         ncols_upper++;
3021       }
3022     }
3023     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3024     nzk += nlnk;
3025 
3026     /* update lnk by computing fill-in for each pivot row to be merged in */
3027     prow = jl[k]; /* 1st pivot row */
3028 
3029     while (prow < k) {
3030       nextprow = jl[prow];
3031       /* merge prow into k-th row */
3032       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3033       jmax = ui[prow+1];
3034       ncols = jmax-jmin;
3035       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3036       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3037       nzk += nlnk;
3038 
3039       /* update il and jl for prow */
3040       if (jmin < jmax) {
3041         il[prow] = jmin;
3042         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3043       }
3044       prow = nextprow;
3045     }
3046 
3047     /* if free space is not available, make more free space */
3048     if (current_space->local_remaining<nzk) {
3049       i = am - k + 1; /* num of unfactored rows */
3050       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3051       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3052       reallocs++;
3053     }
3054 
3055     /* copy data into free space, then initialize lnk */
3056     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3057 
3058     /* add the k-th row into il and jl */
3059     if (nzk-1 > 0) {
3060       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3061       jl[k] = jl[i]; jl[i] = k;
3062       il[k] = ui[k] + 1;
3063     }
3064     ui_ptr[k] = current_space->array;
3065     current_space->array           += nzk;
3066     current_space->local_used      += nzk;
3067     current_space->local_remaining -= nzk;
3068 
3069     ui[k+1] = ui[k] + nzk;
3070   }
3071 
3072 #if defined(PETSC_USE_INFO)
3073   if (ai[am] != 0) {
3074     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3075     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3076     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3077     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3078   } else {
3079      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3080   }
3081 #endif
3082 
3083   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3084   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3085   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3086 
3087   /* destroy list of free space and other temporary array(s) */
3088   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3089   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3090   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3091 
3092   /* put together the new matrix in MATSEQSBAIJ format */
3093 
3094   b = (Mat_SeqSBAIJ*)fact->data;
3095   b->singlemalloc = PETSC_FALSE;
3096   b->free_a       = PETSC_TRUE;
3097   b->free_ij      = PETSC_TRUE;
3098   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3099   b->j    = uj;
3100   b->i    = ui;
3101   b->diag = 0;
3102   b->ilen = 0;
3103   b->imax = 0;
3104   b->row  = perm;
3105   b->col  = perm;
3106   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3107   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3108   b->icol = iperm;
3109   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3110   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3111   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3112   b->maxnz = b->nz = ui[am];
3113 
3114   fact->info.factor_mallocs    = reallocs;
3115   fact->info.fill_ratio_given  = fill;
3116   if (ai[am] != 0) {
3117     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3118   } else {
3119     fact->info.fill_ratio_needed = 0.0;
3120   }
3121   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 #undef __FUNCT__
3126 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3127 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3128 {
3129   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3130   PetscErrorCode    ierr;
3131   PetscInt          n = A->rmap->n;
3132   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3133   PetscScalar       *x,sum;
3134   const PetscScalar *b;
3135   const MatScalar   *aa = a->a,*v;
3136   PetscInt          i,nz;
3137 
3138   PetscFunctionBegin;
3139   if (!n) PetscFunctionReturn(0);
3140 
3141   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3143 
3144   /* forward solve the lower triangular */
3145   x[0] = b[0];
3146   v    = aa;
3147   vi   = aj;
3148   for (i=1; i<n; i++) {
3149     nz  = ai[i+1] - ai[i];
3150     sum = b[i];
3151     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3152     v  += nz;
3153     vi += nz;
3154     x[i] = sum;
3155   }
3156 
3157   /* backward solve the upper triangular */
3158   for (i=n-1; i>=0; i--) {
3159     v   = aa + adiag[i+1] + 1;
3160     vi  = aj + adiag[i+1] + 1;
3161     nz = adiag[i] - adiag[i+1]-1;
3162     sum = x[i];
3163     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3164     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3165   }
3166 
3167   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3168   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3169   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3170   PetscFunctionReturn(0);
3171 }
3172 
3173 #undef __FUNCT__
3174 #define __FUNCT__ "MatSolve_SeqAIJ"
3175 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3176 {
3177   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3178   IS                iscol = a->col,isrow = a->row;
3179   PetscErrorCode    ierr;
3180   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3181   const PetscInt    *rout,*cout,*r,*c;
3182   PetscScalar       *x,*tmp,sum;
3183   const PetscScalar *b;
3184   const MatScalar   *aa = a->a,*v;
3185 
3186   PetscFunctionBegin;
3187   if (!n) PetscFunctionReturn(0);
3188 
3189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3190   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3191   tmp  = a->solve_work;
3192 
3193   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3194   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3195 
3196   /* forward solve the lower triangular */
3197   tmp[0] = b[r[0]];
3198   v      = aa;
3199   vi     = aj;
3200   for (i=1; i<n; i++) {
3201     nz  = ai[i+1] - ai[i];
3202     sum = b[r[i]];
3203     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3204     tmp[i] = sum;
3205     v += nz; vi += nz;
3206   }
3207 
3208   /* backward solve the upper triangular */
3209   for (i=n-1; i>=0; i--) {
3210     v   = aa + adiag[i+1]+1;
3211     vi  = aj + adiag[i+1]+1;
3212     nz  = adiag[i]-adiag[i+1]-1;
3213     sum = tmp[i];
3214     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3215     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3216   }
3217 
3218   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3219   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3220   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3221   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3222   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3228 /*
3229     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3230 */
3231 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3232 {
3233   Mat                B = *fact;
3234   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3235   IS                 isicol;
3236   PetscErrorCode     ierr;
3237   const PetscInt     *r,*ic;
3238   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3239   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3240   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3241   PetscInt           nlnk,*lnk;
3242   PetscBT            lnkbt;
3243   PetscBool          row_identity,icol_identity;
3244   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3245   const PetscInt     *ics;
3246   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3247   PetscReal          dt=info->dt,shift=info->shiftamount;
3248   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3249   PetscBool          missing;
3250 
3251   PetscFunctionBegin;
3252 
3253   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3254   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3255 
3256   /* ------- symbolic factorization, can be reused ---------*/
3257   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3258   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3259   adiag=a->diag;
3260 
3261   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3262 
3263   /* bdiag is location of diagonal in factor */
3264   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3265   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3266 
3267   /* allocate row pointers bi */
3268   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3269 
3270   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3271   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3272   nnz_max  = ai[n]+2*n*dtcount+2;
3273 
3274   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3275   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3276 
3277   /* put together the new matrix */
3278   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3279   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3280   b    = (Mat_SeqAIJ*)B->data;
3281   b->free_a       = PETSC_TRUE;
3282   b->free_ij      = PETSC_TRUE;
3283   b->singlemalloc = PETSC_FALSE;
3284   b->a          = ba;
3285   b->j          = bj;
3286   b->i          = bi;
3287   b->diag       = bdiag;
3288   b->ilen       = 0;
3289   b->imax       = 0;
3290   b->row        = isrow;
3291   b->col        = iscol;
3292   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3293   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3294   b->icol       = isicol;
3295   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3296 
3297   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3298   b->maxnz = nnz_max;
3299 
3300   B->factortype            = MAT_FACTOR_ILUDT;
3301   B->info.factor_mallocs   = 0;
3302   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3303   CHKMEMQ;
3304   /* ------- end of symbolic factorization ---------*/
3305 
3306   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3307   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3308   ics  = ic;
3309 
3310   /* linked list for storing column indices of the active row */
3311   nlnk = n + 1;
3312   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3313 
3314   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3315   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3316   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3317   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3318   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3319 
3320   bi[0]    = 0;
3321   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3322   bdiag_rev[n] = bdiag[0];
3323   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3324   for (i=0; i<n; i++) {
3325     /* copy initial fill into linked list */
3326     nzi = ai[r[i]+1] - ai[r[i]];
3327     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3328     nzi_al = adiag[r[i]] - ai[r[i]];
3329     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3330     ajtmp = aj + ai[r[i]];
3331     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3332 
3333     /* load in initial (unfactored row) */
3334     aatmp = a->a + ai[r[i]];
3335     for (j=0; j<nzi; j++) {
3336       rtmp[ics[*ajtmp++]] = *aatmp++;
3337     }
3338 
3339     /* add pivot rows into linked list */
3340     row = lnk[n];
3341     while (row < i ) {
3342       nzi_bl = bi[row+1] - bi[row] + 1;
3343       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3344       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3345       nzi  += nlnk;
3346       row   = lnk[row];
3347     }
3348 
3349     /* copy data from lnk into jtmp, then initialize lnk */
3350     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3351 
3352     /* numerical factorization */
3353     bjtmp = jtmp;
3354     row   = *bjtmp++; /* 1st pivot row */
3355     while  ( row < i ) {
3356       pc         = rtmp + row;
3357       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3358       multiplier = (*pc) * (*pv);
3359       *pc        = multiplier;
3360       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3361         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3362         pv         = ba + bdiag[row+1] + 1;
3363         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3364         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3365         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3366         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3367       }
3368       row = *bjtmp++;
3369     }
3370 
3371     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3372     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3373     nzi_bl = 0; j = 0;
3374     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3375       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3376       nzi_bl++; j++;
3377     }
3378     nzi_bu = nzi - nzi_bl -1;
3379     while (j < nzi) {
3380       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3381       j++;
3382     }
3383 
3384     bjtmp = bj + bi[i];
3385     batmp = ba + bi[i];
3386     /* apply level dropping rule to L part */
3387     ncut = nzi_al + dtcount;
3388     if (ncut < nzi_bl) {
3389       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3390       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3391     } else {
3392       ncut = nzi_bl;
3393     }
3394     for (j=0; j<ncut; j++) {
3395       bjtmp[j] = jtmp[j];
3396       batmp[j] = vtmp[j];
3397       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3398     }
3399     bi[i+1] = bi[i] + ncut;
3400     nzi = ncut + 1;
3401 
3402     /* apply level dropping rule to U part */
3403     ncut = nzi_au + dtcount;
3404     if (ncut < nzi_bu) {
3405       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3406       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3407     } else {
3408       ncut = nzi_bu;
3409     }
3410     nzi += ncut;
3411 
3412     /* mark bdiagonal */
3413     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3414     bdiag_rev[n-i-1] = bdiag[i+1];
3415     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3416     bjtmp = bj + bdiag[i];
3417     batmp = ba + bdiag[i];
3418     *bjtmp = i;
3419     *batmp = diag_tmp; /* rtmp[i]; */
3420     if (*batmp == 0.0) {
3421       *batmp = dt+shift;
3422       /* printf(" row %d add shift %g\n",i,shift); */
3423     }
3424     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3425     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3426 
3427     bjtmp = bj + bdiag[i+1]+1;
3428     batmp = ba + bdiag[i+1]+1;
3429     for (k=0; k<ncut; k++) {
3430       bjtmp[k] = jtmp[nzi_bl+1+k];
3431       batmp[k] = vtmp[nzi_bl+1+k];
3432       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3433     }
3434     /* printf("\n"); */
3435 
3436     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3437     /*
3438     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3439     printf(" ----------------------------\n");
3440     */
3441   } /* for (i=0; i<n; i++) */
3442   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3443   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3444 
3445   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3446   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3447 
3448   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3449   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3450   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3451   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3452 
3453   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3454   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3455 
3456   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3457   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3458   if (row_identity && icol_identity) {
3459     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3460   } else {
3461     B->ops->solve = MatSolve_SeqAIJ;
3462   }
3463 
3464   B->ops->solveadd          = 0;
3465   B->ops->solvetranspose    = 0;
3466   B->ops->solvetransposeadd = 0;
3467   B->ops->matsolve          = 0;
3468   B->assembled              = PETSC_TRUE;
3469   B->preallocated           = PETSC_TRUE;
3470   PetscFunctionReturn(0);
3471 }
3472 
3473 /* a wraper of MatILUDTFactor_SeqAIJ() */
3474 #undef __FUNCT__
3475 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3476 /*
3477     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3478 */
3479 
3480 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3481 {
3482   PetscErrorCode     ierr;
3483 
3484   PetscFunctionBegin;
3485   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3486   PetscFunctionReturn(0);
3487 }
3488 
3489 /*
3490    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3491    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3492 */
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3495 /*
3496     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3497 */
3498 
3499 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3500 {
3501   Mat            C=fact;
3502   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3503   IS             isrow = b->row,isicol = b->icol;
3504   PetscErrorCode ierr;
3505   const PetscInt *r,*ic,*ics;
3506   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3507   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3508   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3509   PetscReal      dt=info->dt,shift=info->shiftamount;
3510   PetscBool      row_identity, col_identity;
3511 
3512   PetscFunctionBegin;
3513   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3514   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3515   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3516   ics  = ic;
3517 
3518   for (i=0; i<n; i++) {
3519     /* initialize rtmp array */
3520     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3521     bjtmp = bj + bi[i];
3522     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3523     rtmp[i] = 0.0;
3524     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3525     bjtmp = bj + bdiag[i+1] + 1;
3526     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3527 
3528     /* load in initial unfactored row of A */
3529     /* printf("row %d\n",i); */
3530     nz    = ai[r[i]+1] - ai[r[i]];
3531     ajtmp = aj + ai[r[i]];
3532     v     = aa + ai[r[i]];
3533     for (j=0; j<nz; j++) {
3534       rtmp[ics[*ajtmp++]] = v[j];
3535       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3536     }
3537     /* printf("\n"); */
3538 
3539     /* numerical factorization */
3540     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3541     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3542     k = 0;
3543     while (k < nzl) {
3544       row   = *bjtmp++;
3545       /* printf("  prow %d\n",row); */
3546       pc         = rtmp + row;
3547       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3548       multiplier = (*pc) * (*pv);
3549       *pc        = multiplier;
3550       if (PetscAbsScalar(multiplier) > dt) {
3551         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3552         pv         = b->a + bdiag[row+1] + 1;
3553         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3554         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3555         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3556       }
3557       k++;
3558     }
3559 
3560     /* finished row so stick it into b->a */
3561     /* L-part */
3562     pv = b->a + bi[i] ;
3563     pj = bj + bi[i] ;
3564     nzl = bi[i+1] - bi[i];
3565     for (j=0; j<nzl; j++) {
3566       pv[j] = rtmp[pj[j]];
3567       /* printf(" (%d,%g),",pj[j],pv[j]); */
3568     }
3569 
3570     /* diagonal: invert diagonal entries for simplier triangular solves */
3571     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3572     b->a[bdiag[i]] = 1.0/rtmp[i];
3573     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3574 
3575     /* U-part */
3576     pv = b->a + bdiag[i+1] + 1;
3577     pj = bj + bdiag[i+1] + 1;
3578     nzu = bdiag[i] - bdiag[i+1] - 1;
3579     for (j=0; j<nzu; j++) {
3580       pv[j] = rtmp[pj[j]];
3581       /* printf(" (%d,%g),",pj[j],pv[j]); */
3582     }
3583     /* printf("\n"); */
3584   }
3585 
3586   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3587   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3588   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3589 
3590   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3591   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3592   if (row_identity && col_identity) {
3593     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3594   } else {
3595     C->ops->solve   = MatSolve_SeqAIJ;
3596   }
3597   C->ops->solveadd           = 0;
3598   C->ops->solvetranspose     = 0;
3599   C->ops->solvetransposeadd  = 0;
3600   C->ops->matsolve           = 0;
3601   C->assembled    = PETSC_TRUE;
3602   C->preallocated = PETSC_TRUE;
3603   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3604   PetscFunctionReturn(0);
3605 }
3606