xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e81e1a36b96a916d72bd8b55b408ecc5f4483efb)
1 #define PETSCMAT_DLL
2 
3 
4 #include "../src/mat/impls/aij/seq/aij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 EXTERN_C_BEGIN
10 #undef __FUNCT__
11 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
12 /*
13       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
14 
15       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
16 */
17 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
18 {
19   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
20   PetscErrorCode    ierr;
21   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
22   const PetscInt    *ai = a->i, *aj = a->j;
23   const PetscScalar *aa = a->a;
24   PetscTruth        *done;
25   PetscReal         best,past = 0,future;
26 
27   PetscFunctionBegin;
28   /* pick initial row */
29   best = -1;
30   for (i=0; i<n; i++) {
31     future = 0.0;
32     for (j=ai[i]; j<ai[i+1]; j++) {
33       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
34     }
35     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
36     if (past/future > best) {
37       best = past/future;
38       current = i;
39     }
40   }
41 
42   ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr);
43   ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr);
44   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
45   order[0] = current;
46   for (i=0; i<n-1; i++) {
47     done[current] = PETSC_TRUE;
48     best          = -1;
49     /* loop over all neighbors of current pivot */
50     for (j=ai[current]; j<ai[current+1]; j++) {
51       jj = aj[j];
52       if (done[jj]) continue;
53       /* loop over columns of potential next row computing weights for below and above diagonal */
54       past = future = 0.0;
55       for (k=ai[jj]; k<ai[jj+1]; k++) {
56         kk = aj[k];
57         if (done[kk]) past += PetscAbsScalar(aa[k]);
58         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
59       }
60       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
61       if (past/future > best) {
62         best = past/future;
63         newcurrent = jj;
64       }
65     }
66     if (best == -1) { /* no neighbors to select from so select best of all that remain */
67       best = -1;
68       for (k=0; k<n; k++) {
69         if (done[k]) continue;
70         future = 0.0;
71         past   = 0.0;
72         for (j=ai[k]; j<ai[k+1]; j++) {
73           kk = aj[j];
74           if (done[kk]) past += PetscAbsScalar(aa[j]);
75           else if (kk != k) future  += PetscAbsScalar(aa[j]);
76         }
77         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
78         if (past/future > best) {
79           best = past/future;
80           newcurrent = k;
81         }
82       }
83     }
84     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
85     current = newcurrent;
86     order[i+1] = current;
87   }
88   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr);
89   *icol = *irow;
90   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
91   ierr = PetscFree(done);CHKERRQ(ierr);
92   ierr = PetscFree(order);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 EXTERN_C_BEGIN
98 #undef __FUNCT__
99 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
100 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
101 {
102   PetscFunctionBegin;
103   *flg = PETSC_TRUE;
104   PetscFunctionReturn(0);
105 }
106 EXTERN_C_END
107 
108 EXTERN_C_BEGIN
109 #undef __FUNCT__
110 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
111 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
112 {
113   PetscInt           n = A->rmap->n;
114   PetscErrorCode     ierr;
115 
116   PetscFunctionBegin;
117   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
119   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
120     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
121     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
122     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
123   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
124     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
125     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
126     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
127     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
128   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
129   (*B)->factortype = ftype;
130   PetscFunctionReturn(0);
131 }
132 EXTERN_C_END
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137 {
138   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
139   IS                 isicol;
140   PetscErrorCode     ierr;
141   const PetscInt     *r,*ic;
142   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
143   PetscInt           *bi,*bj,*ajtmp;
144   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
145   PetscReal          f;
146   PetscInt           nlnk,*lnk,k,**bi_ptr;
147   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
148   PetscBT            lnkbt;
149 
150   PetscFunctionBegin;
151   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
153   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
154   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
155 
156   /* get new row pointers */
157   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
158   bi[0] = 0;
159 
160   /* bdiag is location of diagonal in factor */
161   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
162   bdiag[0] = 0;
163 
164   /* linked list for storing column indices of the active row */
165   nlnk = n + 1;
166   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
167 
168   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
169 
170   /* initial FreeSpace size is f*(ai[n]+1) */
171   f = info->fill;
172   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
173   current_space = free_space;
174 
175   for (i=0; i<n; i++) {
176     /* copy previous fill into linked list */
177     nzi = 0;
178     nnz = ai[r[i]+1] - ai[r[i]];
179     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
180     ajtmp = aj + ai[r[i]];
181     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
182     nzi += nlnk;
183 
184     /* add pivot rows into linked list */
185     row = lnk[n];
186     while (row < i) {
187       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
188       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
189       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
190       nzi += nlnk;
191       row  = lnk[row];
192     }
193     bi[i+1] = bi[i] + nzi;
194     im[i]   = nzi;
195 
196     /* mark bdiag */
197     nzbd = 0;
198     nnz  = nzi;
199     k    = lnk[n];
200     while (nnz-- && k < i){
201       nzbd++;
202       k = lnk[k];
203     }
204     bdiag[i] = bi[i] + nzbd;
205 
206     /* if free space is not available, make more free space */
207     if (current_space->local_remaining<nzi) {
208       nnz = (n - i)*nzi; /* estimated and max additional space needed */
209       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
210       reallocs++;
211     }
212 
213     /* copy data into free space, then initialize lnk */
214     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
215     bi_ptr[i] = current_space->array;
216     current_space->array           += nzi;
217     current_space->local_used      += nzi;
218     current_space->local_remaining -= nzi;
219   }
220 #if defined(PETSC_USE_INFO)
221   if (ai[n] != 0) {
222     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
223     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
224     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
225     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
226     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
227   } else {
228     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
229   }
230 #endif
231 
232   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
233   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
234 
235   /* destroy list of free space and other temporary array(s) */
236   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
237   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
238   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
239   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
240 
241   /* put together the new matrix */
242   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
243   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
244   b    = (Mat_SeqAIJ*)(B)->data;
245   b->free_a       = PETSC_TRUE;
246   b->free_ij      = PETSC_TRUE;
247   b->singlemalloc = PETSC_FALSE;
248   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
249   b->j          = bj;
250   b->i          = bi;
251   b->diag       = bdiag;
252   b->ilen       = 0;
253   b->imax       = 0;
254   b->row        = isrow;
255   b->col        = iscol;
256   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
257   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
258   b->icol       = isicol;
259   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
260 
261   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
262   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
263   b->maxnz = b->nz = bi[n] ;
264 
265   (B)->factortype            = MAT_FACTOR_LU;
266   (B)->info.factor_mallocs   = reallocs;
267   (B)->info.fill_ratio_given = f;
268 
269   if (ai[n]) {
270     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
271   } else {
272     (B)->info.fill_ratio_needed = 0.0;
273   }
274   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
275   if (a->inode.size) {
276     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284 {
285   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
286   IS                 isicol;
287   PetscErrorCode     ierr;
288   const PetscInt     *r,*ic;
289   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
290   PetscInt           *bi,*bj,*ajtmp;
291   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
292   PetscReal          f;
293   PetscInt           nlnk,*lnk,k,**bi_ptr;
294   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
295   PetscBT            lnkbt;
296   PetscTruth         olddatastruct=PETSC_FALSE;
297 
298   PetscFunctionBegin;
299   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
300   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
301   if(olddatastruct){
302     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
303     PetscFunctionReturn(0);
304   }
305 
306   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
308   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
309   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
313   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
319 
320   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
325   current_space = free_space;
326 
327   for (i=0; i<n; i++) {
328     /* copy previous fill into linked list */
329     nzi = 0;
330     nnz = ai[r[i]+1] - ai[r[i]];
331     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
332     ajtmp = aj + ai[r[i]];
333     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i){
339       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
342       nzi  += nlnk;
343       row   = lnk[row];
344     }
345     bi[i+1] = bi[i] + nzi;
346     im[i]   = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i){
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining<nzi) {
360       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
361       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
367     bi_ptr[i] = current_space->array;
368     current_space->array           += nzi;
369     current_space->local_used      += nzi;
370     current_space->local_remaining -= nzi;
371   }
372 
373   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
374   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
375 
376   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
377   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
378   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
379   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
380   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
381 
382   /* put together the new matrix */
383   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
384   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
385   b    = (Mat_SeqAIJ*)(B)->data;
386   b->free_a       = PETSC_TRUE;
387   b->free_ij      = PETSC_TRUE;
388   b->singlemalloc = PETSC_FALSE;
389   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
390   b->j          = bj;
391   b->i          = bi;
392   b->diag       = bdiag;
393   b->ilen       = 0;
394   b->imax       = 0;
395   b->row        = isrow;
396   b->col        = iscol;
397   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
398   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
399   b->icol       = isicol;
400   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
404   b->maxnz = b->nz = bdiag[0]+1;
405   B->factortype            = MAT_FACTOR_LU;
406   B->info.factor_mallocs   = reallocs;
407   B->info.fill_ratio_given = f;
408 
409   if (ai[n]) {
410     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
411   } else {
412     B->info.fill_ratio_needed = 0.0;
413   }
414 #if defined(PETSC_USE_INFO)
415   if (ai[n] != 0) {
416     PetscReal af = B->info.fill_ratio_needed;
417     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
418     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
419     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
420     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
421   } else {
422     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
423   }
424 #endif
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size) {
427     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433     Trouble in factorization, should we dump the original matrix?
434 */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatFactorDumpMatrix"
437 PetscErrorCode MatFactorDumpMatrix(Mat A)
438 {
439   PetscErrorCode ierr;
440   PetscTruth     flg = PETSC_FALSE;
441 
442   PetscFunctionBegin;
443   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
444   if (flg) {
445     PetscViewer viewer;
446     char        filename[PETSC_MAX_PATH_LEN];
447 
448     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
449     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
450     ierr = MatView(A,viewer);CHKERRQ(ierr);
451     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
459 {
460   Mat              C=B;
461   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
462   IS               isrow = b->row,isicol = b->icol;
463   PetscErrorCode   ierr;
464   const PetscInt   *r,*ic,*ics;
465   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466   PetscInt         i,j,k,nz,nzL,row,*pj;
467   const PetscInt   *ajtmp,*bjtmp;
468   MatScalar        *rtmp,*pc,multiplier,*pv;
469   const  MatScalar *aa=a->a,*v;
470   PetscTruth       row_identity,col_identity;
471   FactorShiftCtx   sctx;
472   const PetscInt   *ddiag;
473   PetscReal        rs;
474   MatScalar        d;
475 
476   PetscFunctionBegin;
477   /* MatPivotSetUp(): initialize shift context sctx */
478   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
479 
480   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481     ddiag          = a->diag;
482     sctx.shift_top = info->zeropivot;
483     for (i=0; i<n; i++) {
484       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485       d  = (aa)[ddiag[i]];
486       rs = -PetscAbsScalar(d) - PetscRealPart(d);
487       v  = aa+ai[i];
488       nz = ai[i+1] - ai[i];
489       for (j=0; j<nz; j++)
490 	rs += PetscAbsScalar(v[j]);
491       if (rs>sctx.shift_top) sctx.shift_top = rs;
492     }
493     sctx.shift_top   *= 1.1;
494     sctx.nshift_max   = 5;
495     sctx.shift_lo     = 0.;
496     sctx.shift_hi     = 1.;
497   }
498 
499   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
500   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
501   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
502   ics  = ic;
503 
504   do {
505     sctx.newshift = PETSC_FALSE;
506     for (i=0; i<n; i++){
507       /* zero rtmp */
508       /* L part */
509       nz    = bi[i+1] - bi[i];
510       bjtmp = bj + bi[i];
511       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* U part */
514       nz = bdiag[i]-bdiag[i+1];
515       bjtmp = bj + bdiag[i+1]+1;
516       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* load in initial (unfactored row) */
519       nz    = ai[r[i]+1] - ai[r[i]];
520       ajtmp = aj + ai[r[i]];
521       v     = aa + ai[r[i]];
522       for (j=0; j<nz; j++) {
523         rtmp[ics[ajtmp[j]]] = v[j];
524       }
525       /* ZeropivotApply() */
526       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
527 
528       /* elimination */
529       bjtmp = bj + bi[i];
530       row   = *bjtmp++;
531       nzL   = bi[i+1] - bi[i];
532       for(k=0; k < nzL;k++) {
533         pc = rtmp + row;
534         if (*pc != 0.0) {
535           pv         = b->a + bdiag[row];
536           multiplier = *pc * (*pv);
537           *pc        = multiplier;
538           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539 	  pv = b->a + bdiag[row+1]+1;
540 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv   = b->a + bi[i] ;
551       pj   = b->j + bi[i] ;
552       nz   = bi[i+1] - bi[i];
553       for (j=0; j<nz; j++) {
554         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
555       }
556 
557       /* U part */
558       pv = b->a + bdiag[i+1]+1;
559       pj = b->j + bdiag[i+1]+1;
560       nz = bdiag[i] - bdiag[i+1]-1;
561       for (j=0; j<nz; j++) {
562         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563       }
564 
565       sctx.rs  = rs;
566       sctx.pv  = rtmp[i];
567       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
568       if(sctx.newshift) break; /* break for-loop */
569       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
570 
571       /* Mark diagonal and invert diagonal for simplier triangular solves */
572       pv  = b->a + bdiag[i];
573       *pv = 1.0/rtmp[i];
574 
575     } /* endof for (i=0; i<n; i++){ */
576 
577     /* MatPivotRefine() */
578     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
579       /*
580        * if no shift in this attempt & shifting & started shifting & can refine,
581        * then try lower shift
582        */
583       sctx.shift_hi       = sctx.shift_fraction;
584       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
585       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
586       sctx.newshift       = PETSC_TRUE;
587       sctx.nshift++;
588     }
589   } while (sctx.newshift);
590 
591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
594 
595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
597   if (row_identity && col_identity) {
598     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
599   } else {
600     C->ops->solve = MatSolve_SeqAIJ;
601   }
602   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
603   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
604   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
605   C->ops->matsolve           = MatMatSolve_SeqAIJ;
606   C->assembled    = PETSC_TRUE;
607   C->preallocated = PETSC_TRUE;
608   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
609 
610   /* MatShiftView(A,info,&sctx) */
611   if (sctx.nshift){
612     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
616     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
617       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
618     }
619   }
620   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
627 {
628   Mat             C=B;
629   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
630   IS              isrow = b->row,isicol = b->icol;
631   PetscErrorCode  ierr;
632   const PetscInt   *r,*ic,*ics;
633   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
634   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636   MatScalar       *pv,*rtmp,*pc,multiplier,d;
637   const MatScalar *v,*aa=a->a;
638   PetscReal       rs=0.0;
639   FactorShiftCtx  sctx;
640   const PetscInt  *ddiag;
641   PetscTruth      row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
646 
647   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i=0; i<n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa+ai[i];
655       nz = ai[i+1] - ai[i];
656       for (j=0; j<nz; j++)
657 	rs += PetscAbsScalar(v[j]);
658       if (rs>sctx.shift_top) sctx.shift_top = rs;
659     }
660     sctx.shift_top   *= 1.1;
661     sctx.nshift_max   = 5;
662     sctx.shift_lo     = 0.;
663     sctx.shift_hi     = 1.;
664   }
665 
666   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
667   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
668   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
669   ics  = ic;
670 
671   do {
672     sctx.newshift = PETSC_FALSE;
673     for (i=0; i<n; i++){
674       nz    = bi[i+1] - bi[i];
675       bjtmp = bj + bi[i];
676       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
677 
678       /* load in initial (unfactored row) */
679       nz    = ai[r[i]+1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = aa + ai[r[i]];
682       for (j=0; j<nz; j++) {
683         rtmp[ics[ajtmp[j]]] = v[j];
684       }
685       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
686 
687       row = *bjtmp++;
688       while  (row < i) {
689         pc = rtmp + row;
690         if (*pc != 0.0) {
691           pv         = b->a + diag_offset[row];
692           pj         = b->j + diag_offset[row] + 1;
693           multiplier = *pc / *pv++;
694           *pc        = multiplier;
695           nz         = bi[row+1] - diag_offset[row] - 1;
696           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
698         }
699         row = *bjtmp++;
700       }
701       /* finished row so stick it into b->a */
702       pv   = b->a + bi[i] ;
703       pj   = b->j + bi[i] ;
704       nz   = bi[i+1] - bi[i];
705       diag = diag_offset[i] - bi[i];
706       rs   = 0.0;
707       for (j=0; j<nz; j++) {
708         pv[j] = rtmp[pj[j]];
709         rs   += PetscAbsScalar(pv[j]);
710       }
711       rs   -= PetscAbsScalar(pv[diag]);
712 
713       sctx.rs  = rs;
714       sctx.pv  = pv[diag];
715       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
716       if (sctx.newshift) break;
717       pv[diag] = sctx.pv;
718     }
719 
720     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721       /*
722        * if no shift in this attempt & shifting & started shifting & can refine,
723        * then try lower shift
724        */
725       sctx.shift_hi       = sctx.shift_fraction;
726       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
728       sctx.newshift       = PETSC_TRUE;
729       sctx.nshift++;
730     }
731   } while (sctx.newshift);
732 
733   /* invert diagonal entries for simplier triangular solves */
734   for (i=0; i<n; i++) {
735     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736   }
737   ierr = PetscFree(rtmp);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
739   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
740 
741   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
742   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
743   if (row_identity && col_identity) {
744     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745   } else {
746     C->ops->solve   = MatSolve_SeqAIJ_inplace;
747   }
748   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
749   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
750   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
751   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
752   C->assembled    = PETSC_TRUE;
753   C->preallocated = PETSC_TRUE;
754   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
755   if (sctx.nshift){
756      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
757       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
758     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
759       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
760     }
761   }
762   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
763   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
764   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 /*
769    This routine implements inplace ILU(0) with row or/and column permutations.
770    Input:
771      A - original matrix
772    Output;
773      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774          a->j (col index) is permuted by the inverse of colperm, then sorted
775          a->a reordered accordingly with a->j
776          a->diag (ptr to diagonal elements) is updated.
777 */
778 #undef __FUNCT__
779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781 {
782   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
783   IS             isrow = a->row,isicol = a->icol;
784   PetscErrorCode ierr;
785   const PetscInt *r,*ic,*ics;
786   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
787   PetscInt       *ajtmp,nz,row;
788   PetscInt       *diag = a->diag,nbdiag,*pj;
789   PetscScalar    *rtmp,*pc,multiplier,d;
790   MatScalar      *pv,*v;
791   PetscReal      rs;
792   FactorShiftCtx sctx;
793   const  MatScalar *aa=a->a,*vtmp;
794 
795   PetscFunctionBegin;
796   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
797 
798   /* MatPivotSetUp(): initialize shift context sctx */
799   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
800 
801   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
802     const PetscInt   *ddiag = a->diag;
803     sctx.shift_top = info->zeropivot;
804     for (i=0; i<n; i++) {
805       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
806       d  = (aa)[ddiag[i]];
807       rs = -PetscAbsScalar(d) - PetscRealPart(d);
808       vtmp  = aa+ai[i];
809       nz = ai[i+1] - ai[i];
810       for (j=0; j<nz; j++)
811 	rs += PetscAbsScalar(vtmp[j]);
812       if (rs>sctx.shift_top) sctx.shift_top = rs;
813     }
814     sctx.shift_top   *= 1.1;
815     sctx.nshift_max   = 5;
816     sctx.shift_lo     = 0.;
817     sctx.shift_hi     = 1.;
818   }
819 
820   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
821   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
822   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
823   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
824   ics = ic;
825 
826 #if defined(MV)
827   sctx.shift_top      = 0.;
828   sctx.nshift_max     = 0;
829   sctx.shift_lo       = 0.;
830   sctx.shift_hi       = 0.;
831   sctx.shift_fraction = 0.;
832 
833   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
834     sctx.shift_top = 0.;
835     for (i=0; i<n; i++) {
836       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
837       d  = (a->a)[diag[i]];
838       rs = -PetscAbsScalar(d) - PetscRealPart(d);
839       v  = a->a+ai[i];
840       nz = ai[i+1] - ai[i];
841       for (j=0; j<nz; j++)
842 	rs += PetscAbsScalar(v[j]);
843       if (rs>sctx.shift_top) sctx.shift_top = rs;
844     }
845     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846     sctx.shift_top    *= 1.1;
847     sctx.nshift_max   = 5;
848     sctx.shift_lo     = 0.;
849     sctx.shift_hi     = 1.;
850   }
851 
852   sctx.shift_amount = 0.;
853   sctx.nshift       = 0;
854 #endif
855 
856   do {
857     sctx.newshift = PETSC_FALSE;
858     for (i=0; i<n; i++){
859       /* load in initial unfactored row */
860       nz    = ai[r[i]+1] - ai[r[i]];
861       ajtmp = aj + ai[r[i]];
862       v     = a->a + ai[r[i]];
863       /* sort permuted ajtmp and values v accordingly */
864       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
865       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
866 
867       diag[r[i]] = ai[r[i]];
868       for (j=0; j<nz; j++) {
869         rtmp[ajtmp[j]] = v[j];
870         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871       }
872       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
873 
874       row = *ajtmp++;
875       while  (row < i) {
876         pc = rtmp + row;
877         if (*pc != 0.0) {
878           pv         = a->a + diag[r[row]];
879           pj         = aj + diag[r[row]] + 1;
880 
881           multiplier = *pc / *pv++;
882           *pc        = multiplier;
883           nz         = ai[r[row]+1] - diag[r[row]] - 1;
884           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
886         }
887         row = *ajtmp++;
888       }
889       /* finished row so overwrite it onto a->a */
890       pv   = a->a + ai[r[i]] ;
891       pj   = aj + ai[r[i]] ;
892       nz   = ai[r[i]+1] - ai[r[i]];
893       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894 
895       rs   = 0.0;
896       for (j=0; j<nz; j++) {
897         pv[j] = rtmp[pj[j]];
898         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899       }
900 
901       sctx.rs  = rs;
902       sctx.pv  = pv[nbdiag];
903       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
904       if (sctx.newshift) break;
905       pv[nbdiag] = sctx.pv;
906     }
907 
908     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
909       /*
910        * if no shift in this attempt & shifting & started shifting & can refine,
911        * then try lower shift
912        */
913       sctx.shift_hi        = sctx.shift_fraction;
914       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
915       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
916       sctx.newshift         = PETSC_TRUE;
917       sctx.nshift++;
918     }
919   } while (sctx.newshift);
920 
921   /* invert diagonal entries for simplier triangular solves */
922   for (i=0; i<n; i++) {
923     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
924   }
925 
926   ierr = PetscFree(rtmp);CHKERRQ(ierr);
927   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
928   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
929   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
930   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
931   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
932   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
933   A->assembled = PETSC_TRUE;
934   A->preallocated = PETSC_TRUE;
935   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
936   if (sctx.nshift){
937     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
939     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 /* ----------------------------------------------------------- */
947 #undef __FUNCT__
948 #define __FUNCT__ "MatLUFactor_SeqAIJ"
949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
950 {
951   PetscErrorCode ierr;
952   Mat            C;
953 
954   PetscFunctionBegin;
955   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
956   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
957   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
958   A->ops->solve            = C->ops->solve;
959   A->ops->solvetranspose   = C->ops->solvetranspose;
960   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
961   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 /* ----------------------------------------------------------- */
965 
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
970 {
971   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
972   IS                iscol = a->col,isrow = a->row;
973   PetscErrorCode    ierr;
974   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
975   PetscInt          nz;
976   const PetscInt    *rout,*cout,*r,*c;
977   PetscScalar       *x,*tmp,*tmps,sum;
978   const PetscScalar *b;
979   const MatScalar   *aa = a->a,*v;
980 
981   PetscFunctionBegin;
982   if (!n) PetscFunctionReturn(0);
983 
984   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
985   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
986   tmp  = a->solve_work;
987 
988   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
989   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
990 
991   /* forward solve the lower triangular */
992   tmp[0] = b[*r++];
993   tmps   = tmp;
994   for (i=1; i<n; i++) {
995     v   = aa + ai[i] ;
996     vi  = aj + ai[i] ;
997     nz  = a->diag[i] - ai[i];
998     sum = b[*r++];
999     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1000     tmp[i] = sum;
1001   }
1002 
1003   /* backward solve the upper triangular */
1004   for (i=n-1; i>=0; i--){
1005     v   = aa + a->diag[i] + 1;
1006     vi  = aj + a->diag[i] + 1;
1007     nz  = ai[i+1] - a->diag[i] - 1;
1008     sum = tmp[i];
1009     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1011   }
1012 
1013   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1014   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1015   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1016   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1017   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1024 {
1025   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1026   IS              iscol = a->col,isrow = a->row;
1027   PetscErrorCode  ierr;
1028   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1029   PetscInt        nz,neq;
1030   const PetscInt  *rout,*cout,*r,*c;
1031   PetscScalar     *x,*b,*tmp,*tmps,sum;
1032   const MatScalar *aa = a->a,*v;
1033   PetscTruth      bisdense,xisdense;
1034 
1035   PetscFunctionBegin;
1036   if (!n) PetscFunctionReturn(0);
1037 
1038   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1039   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1040   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1041   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1042 
1043   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1044   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1045 
1046   tmp  = a->solve_work;
1047   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1048   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1049 
1050   for (neq=0; neq<B->cmap->n; neq++){
1051     /* forward solve the lower triangular */
1052     tmp[0] = b[r[0]];
1053     tmps   = tmp;
1054     for (i=1; i<n; i++) {
1055       v   = aa + ai[i] ;
1056       vi  = aj + ai[i] ;
1057       nz  = a->diag[i] - ai[i];
1058       sum = b[r[i]];
1059       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060       tmp[i] = sum;
1061     }
1062     /* backward solve the upper triangular */
1063     for (i=n-1; i>=0; i--){
1064       v   = aa + a->diag[i] + 1;
1065       vi  = aj + a->diag[i] + 1;
1066       nz  = ai[i+1] - a->diag[i] - 1;
1067       sum = tmp[i];
1068       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1069       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1070     }
1071 
1072     b += n;
1073     x += n;
1074   }
1075   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1076   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1077   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1078   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1079   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1086 {
1087   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1088   IS              iscol = a->col,isrow = a->row;
1089   PetscErrorCode  ierr;
1090   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1091   PetscInt        nz,neq;
1092   const PetscInt  *rout,*cout,*r,*c;
1093   PetscScalar     *x,*b,*tmp,sum;
1094   const MatScalar *aa = a->a,*v;
1095   PetscTruth      bisdense,xisdense;
1096 
1097   PetscFunctionBegin;
1098   if (!n) PetscFunctionReturn(0);
1099 
1100   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1101   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1102   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1103   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1104 
1105   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1106   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1107 
1108   tmp  = a->solve_work;
1109   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1110   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1111 
1112   for (neq=0; neq<B->cmap->n; neq++){
1113     /* forward solve the lower triangular */
1114     tmp[0] = b[r[0]];
1115     v      = aa;
1116     vi     = aj;
1117     for (i=1; i<n; i++) {
1118       nz  = ai[i+1] - ai[i];
1119       sum = b[r[i]];
1120       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1121       tmp[i] = sum;
1122       v += nz; vi += nz;
1123     }
1124 
1125     /* backward solve the upper triangular */
1126     for (i=n-1; i>=0; i--){
1127       v   = aa + adiag[i+1]+1;
1128       vi  = aj + adiag[i+1]+1;
1129       nz  = adiag[i]-adiag[i+1]-1;
1130       sum = tmp[i];
1131       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1132       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1133     }
1134 
1135     b += n;
1136     x += n;
1137   }
1138   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1139   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1140   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1141   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1142   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1149 {
1150   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1151   IS              iscol = a->col,isrow = a->row;
1152   PetscErrorCode  ierr;
1153   const PetscInt  *r,*c,*rout,*cout;
1154   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1155   PetscInt        nz,row;
1156   PetscScalar     *x,*b,*tmp,*tmps,sum;
1157   const MatScalar *aa = a->a,*v;
1158 
1159   PetscFunctionBegin;
1160   if (!n) PetscFunctionReturn(0);
1161 
1162   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1163   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1164   tmp  = a->solve_work;
1165 
1166   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1167   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1168 
1169   /* forward solve the lower triangular */
1170   tmp[0] = b[*r++];
1171   tmps   = tmp;
1172   for (row=1; row<n; row++) {
1173     i   = rout[row]; /* permuted row */
1174     v   = aa + ai[i] ;
1175     vi  = aj + ai[i] ;
1176     nz  = a->diag[i] - ai[i];
1177     sum = b[*r++];
1178     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179     tmp[row] = sum;
1180   }
1181 
1182   /* backward solve the upper triangular */
1183   for (row=n-1; row>=0; row--){
1184     i   = rout[row]; /* permuted row */
1185     v   = aa + a->diag[i] + 1;
1186     vi  = aj + a->diag[i] + 1;
1187     nz  = ai[i+1] - a->diag[i] - 1;
1188     sum = tmp[row];
1189     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1190     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1191   }
1192 
1193   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1194   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1195   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1196   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1197   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /* ----------------------------------------------------------- */
1202 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h"
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1206 {
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1208   PetscErrorCode    ierr;
1209   PetscInt          n = A->rmap->n;
1210   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1211   PetscScalar       *x;
1212   const PetscScalar *b;
1213   const MatScalar   *aa = a->a;
1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1215   PetscInt          adiag_i,i,nz,ai_i;
1216   const PetscInt    *vi;
1217   const MatScalar   *v;
1218   PetscScalar       sum;
1219 #endif
1220 
1221   PetscFunctionBegin;
1222   if (!n) PetscFunctionReturn(0);
1223 
1224   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1225   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1226 
1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1228   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1229 #else
1230   /* forward solve the lower triangular */
1231   x[0] = b[0];
1232   for (i=1; i<n; i++) {
1233     ai_i = ai[i];
1234     v    = aa + ai_i;
1235     vi   = aj + ai_i;
1236     nz   = adiag[i] - ai_i;
1237     sum  = b[i];
1238     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1239     x[i] = sum;
1240   }
1241 
1242   /* backward solve the upper triangular */
1243   for (i=n-1; i>=0; i--){
1244     adiag_i = adiag[i];
1245     v       = aa + adiag_i + 1;
1246     vi      = aj + adiag_i + 1;
1247     nz      = ai[i+1] - adiag_i - 1;
1248     sum     = x[i];
1249     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1250     x[i]    = sum*aa[adiag_i];
1251   }
1252 #endif
1253   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1254   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1255   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1262 {
1263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1264   IS                iscol = a->col,isrow = a->row;
1265   PetscErrorCode    ierr;
1266   PetscInt          i, n = A->rmap->n,j;
1267   PetscInt          nz;
1268   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1269   PetscScalar       *x,*tmp,sum;
1270   const PetscScalar *b;
1271   const MatScalar   *aa = a->a,*v;
1272 
1273   PetscFunctionBegin;
1274   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1275 
1276   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1277   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1278   tmp  = a->solve_work;
1279 
1280   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1281   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1282 
1283   /* forward solve the lower triangular */
1284   tmp[0] = b[*r++];
1285   for (i=1; i<n; i++) {
1286     v   = aa + ai[i] ;
1287     vi  = aj + ai[i] ;
1288     nz  = a->diag[i] - ai[i];
1289     sum = b[*r++];
1290     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1291     tmp[i] = sum;
1292   }
1293 
1294   /* backward solve the upper triangular */
1295   for (i=n-1; i>=0; i--){
1296     v   = aa + a->diag[i] + 1;
1297     vi  = aj + a->diag[i] + 1;
1298     nz  = ai[i+1] - a->diag[i] - 1;
1299     sum = tmp[i];
1300     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301     tmp[i] = sum*aa[a->diag[i]];
1302     x[*c--] += tmp[i];
1303   }
1304 
1305   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1306   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1307   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1308   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1309   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1310 
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1317 {
1318   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1319   IS                iscol = a->col,isrow = a->row;
1320   PetscErrorCode    ierr;
1321   PetscInt          i, n = A->rmap->n,j;
1322   PetscInt          nz;
1323   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1324   PetscScalar       *x,*tmp,sum;
1325   const PetscScalar *b;
1326   const MatScalar   *aa = a->a,*v;
1327 
1328   PetscFunctionBegin;
1329   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1330 
1331   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1332   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1333   tmp  = a->solve_work;
1334 
1335   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1336   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1337 
1338   /* forward solve the lower triangular */
1339   tmp[0] = b[r[0]];
1340   v      = aa;
1341   vi     = aj;
1342   for (i=1; i<n; i++) {
1343     nz  = ai[i+1] - ai[i];
1344     sum = b[r[i]];
1345     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346     tmp[i] = sum;
1347     v += nz; vi += nz;
1348   }
1349 
1350   /* backward solve the upper triangular */
1351   v  = aa + adiag[n-1];
1352   vi = aj + adiag[n-1];
1353   for (i=n-1; i>=0; i--){
1354     nz  = adiag[i] - adiag[i+1] - 1;
1355     sum = tmp[i];
1356     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1357     tmp[i] = sum*v[nz];
1358     x[c[i]] += tmp[i];
1359     v += nz+1; vi += nz+1;
1360   }
1361 
1362   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1363   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1365   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1366   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1367 
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1374 {
1375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1376   IS                iscol = a->col,isrow = a->row;
1377   PetscErrorCode    ierr;
1378   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1379   PetscInt          i,n = A->rmap->n,j;
1380   PetscInt          nz;
1381   PetscScalar       *x,*tmp,s1;
1382   const MatScalar   *aa = a->a,*v;
1383   const PetscScalar *b;
1384 
1385   PetscFunctionBegin;
1386   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1388   tmp  = a->solve_work;
1389 
1390   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1391   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1392 
1393   /* copy the b into temp work space according to permutation */
1394   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1395 
1396   /* forward solve the U^T */
1397   for (i=0; i<n; i++) {
1398     v   = aa + diag[i] ;
1399     vi  = aj + diag[i] + 1;
1400     nz  = ai[i+1] - diag[i] - 1;
1401     s1  = tmp[i];
1402     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1403     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1404     tmp[i] = s1;
1405   }
1406 
1407   /* backward solve the L^T */
1408   for (i=n-1; i>=0; i--){
1409     v   = aa + diag[i] - 1 ;
1410     vi  = aj + diag[i] - 1 ;
1411     nz  = diag[i] - ai[i];
1412     s1  = tmp[i];
1413     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1414   }
1415 
1416   /* copy tmp into x according to permutation */
1417   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1418 
1419   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1420   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1421   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1423 
1424   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1431 {
1432   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1433   IS                iscol = a->col,isrow = a->row;
1434   PetscErrorCode    ierr;
1435   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1436   PetscInt          i,n = A->rmap->n,j;
1437   PetscInt          nz;
1438   PetscScalar       *x,*tmp,s1;
1439   const MatScalar   *aa = a->a,*v;
1440   const PetscScalar *b;
1441 
1442   PetscFunctionBegin;
1443   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1444   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1445   tmp  = a->solve_work;
1446 
1447   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1448   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1449 
1450   /* copy the b into temp work space according to permutation */
1451   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1452 
1453   /* forward solve the U^T */
1454   for (i=0; i<n; i++) {
1455     v   = aa + adiag[i+1] + 1;
1456     vi  = aj + adiag[i+1] + 1;
1457     nz  = adiag[i] - adiag[i+1] - 1;
1458     s1  = tmp[i];
1459     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1460     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1461     tmp[i] = s1;
1462   }
1463 
1464   /* backward solve the L^T */
1465   for (i=n-1; i>=0; i--){
1466     v   = aa + ai[i];
1467     vi  = aj + ai[i];
1468     nz  = ai[i+1] - ai[i];
1469     s1  = tmp[i];
1470     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1471   }
1472 
1473   /* copy tmp into x according to permutation */
1474   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1475 
1476   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1477   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1478   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1479   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1480 
1481   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1488 {
1489   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1490   IS                iscol = a->col,isrow = a->row;
1491   PetscErrorCode    ierr;
1492   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1493   PetscInt          i,n = A->rmap->n,j;
1494   PetscInt          nz;
1495   PetscScalar       *x,*tmp,s1;
1496   const MatScalar   *aa = a->a,*v;
1497   const PetscScalar *b;
1498 
1499   PetscFunctionBegin;
1500   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1501   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1502   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1503   tmp  = a->solve_work;
1504 
1505   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1506   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1507 
1508   /* copy the b into temp work space according to permutation */
1509   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1510 
1511   /* forward solve the U^T */
1512   for (i=0; i<n; i++) {
1513     v   = aa + diag[i] ;
1514     vi  = aj + diag[i] + 1;
1515     nz  = ai[i+1] - diag[i] - 1;
1516     s1  = tmp[i];
1517     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1518     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1519     tmp[i] = s1;
1520   }
1521 
1522   /* backward solve the L^T */
1523   for (i=n-1; i>=0; i--){
1524     v   = aa + diag[i] - 1 ;
1525     vi  = aj + diag[i] - 1 ;
1526     nz  = diag[i] - ai[i];
1527     s1  = tmp[i];
1528     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1529   }
1530 
1531   /* copy tmp into x according to permutation */
1532   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1533 
1534   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1535   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1536   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1538 
1539   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1546 {
1547   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1548   IS                iscol = a->col,isrow = a->row;
1549   PetscErrorCode    ierr;
1550   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1551   PetscInt          i,n = A->rmap->n,j;
1552   PetscInt          nz;
1553   PetscScalar       *x,*tmp,s1;
1554   const MatScalar   *aa = a->a,*v;
1555   const PetscScalar *b;
1556 
1557   PetscFunctionBegin;
1558   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1559   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1560   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1561   tmp  = a->solve_work;
1562 
1563   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1564   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1565 
1566   /* copy the b into temp work space according to permutation */
1567   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1568 
1569   /* forward solve the U^T */
1570   for (i=0; i<n; i++) {
1571     v   = aa + adiag[i+1] + 1;
1572     vi  = aj + adiag[i+1] + 1;
1573     nz  = adiag[i] - adiag[i+1] - 1;
1574     s1  = tmp[i];
1575     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1576     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1577     tmp[i] = s1;
1578   }
1579 
1580 
1581   /* backward solve the L^T */
1582   for (i=n-1; i>=0; i--){
1583     v   = aa + ai[i] ;
1584     vi  = aj + ai[i];
1585     nz  = ai[i+1] - ai[i];
1586     s1  = tmp[i];
1587     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1588   }
1589 
1590   /* copy tmp into x according to permutation */
1591   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1592 
1593   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1594   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1595   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1596   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1597 
1598   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* ----------------------------------------------------------------*/
1603 
1604 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth);
1605 
1606 /*
1607    ilu() under revised new data structure.
1608    Factored arrays bj and ba are stored as
1609      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1610 
1611    bi=fact->i is an array of size n+1, in which
1612    bi+
1613      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1614      bi[n]:  points to L(n-1,n-1)+1
1615 
1616   bdiag=fact->diag is an array of size n+1,in which
1617      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1618      bdiag[n]: points to entry of U(n-1,0)-1
1619 
1620    U(i,:) contains bdiag[i] as its last entry, i.e.,
1621     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1622 */
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1626 {
1627 
1628   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1629   PetscErrorCode     ierr;
1630   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1631   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1632   PetscTruth         missing;
1633   IS                 isicol;
1634 
1635   PetscFunctionBegin;
1636   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1637   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1638   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1639   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1640 
1641   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1642   b    = (Mat_SeqAIJ*)(fact)->data;
1643 
1644   /* allocate matrix arrays for new data structure */
1645   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1646   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1647   b->singlemalloc = PETSC_TRUE;
1648   if (!b->diag){
1649     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1650     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1651   }
1652   bdiag = b->diag;
1653 
1654   if (n > 0) {
1655     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1656   }
1657 
1658   /* set bi and bj with new data structure */
1659   bi = b->i;
1660   bj = b->j;
1661 
1662   /* L part */
1663   bi[0] = 0;
1664   for (i=0; i<n; i++){
1665     nz = adiag[i] - ai[i];
1666     bi[i+1] = bi[i] + nz;
1667     aj = a->j + ai[i];
1668     for (j=0; j<nz; j++){
1669       /*   *bj = aj[j]; bj++; */
1670       bj[k++] = aj[j];
1671     }
1672   }
1673 
1674   /* U part */
1675   bdiag[n] = bi[n]-1;
1676   for (i=n-1; i>=0; i--){
1677     nz = ai[i+1] - adiag[i] - 1;
1678     aj = a->j + adiag[i] + 1;
1679     for (j=0; j<nz; j++){
1680       /*      *bj = aj[j]; bj++; */
1681       bj[k++] = aj[j];
1682     }
1683     /* diag[i] */
1684     /*    *bj = i; bj++; */
1685     bj[k++] = i;
1686     bdiag[i] = bdiag[i+1] + nz + 1;
1687   }
1688 
1689   fact->factortype             = MAT_FACTOR_ILU;
1690   fact->info.factor_mallocs    = 0;
1691   fact->info.fill_ratio_given  = info->fill;
1692   fact->info.fill_ratio_needed = 1.0;
1693   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1694 
1695   b       = (Mat_SeqAIJ*)(fact)->data;
1696   b->row  = isrow;
1697   b->col  = iscol;
1698   b->icol = isicol;
1699   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1700   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1701   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1707 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1708 {
1709   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1710   IS                 isicol;
1711   PetscErrorCode     ierr;
1712   const PetscInt     *r,*ic;
1713   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1714   PetscInt           *bi,*cols,nnz,*cols_lvl;
1715   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1716   PetscInt           i,levels,diagonal_fill;
1717   PetscTruth         col_identity,row_identity;
1718   PetscReal          f;
1719   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1720   PetscBT            lnkbt;
1721   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1722   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1723   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1724 
1725   PetscFunctionBegin;
1726   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1727   /*
1728   PetscTruth         olddatastruct=PETSC_FALSE;
1729   ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1730   if(olddatastruct){
1731     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1732     PetscFunctionReturn(0);
1733   }
1734   */
1735 
1736   levels = (PetscInt)info->levels;
1737   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1738   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1739 
1740   if (!levels && row_identity && col_identity) {
1741     /* special case: ilu(0) with natural ordering */
1742     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1743     if (a->inode.size) {
1744       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1745     }
1746     PetscFunctionReturn(0);
1747   }
1748 
1749   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1750   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1751   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1752   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1753 
1754   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1756   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1757   bi[0] = bdiag[0] = 0;
1758 
1759   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1760 
1761   /* create a linked list for storing column indices of the active row */
1762   nlnk = n + 1;
1763   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1764 
1765   /* initial FreeSpace size is f*(ai[n]+1) */
1766   f             = info->fill;
1767   diagonal_fill = (PetscInt)info->diagonal_fill;
1768   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1769   current_space = free_space;
1770   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1771   current_space_lvl = free_space_lvl;
1772 
1773   for (i=0; i<n; i++) {
1774     nzi = 0;
1775     /* copy current row into linked list */
1776     nnz  = ai[r[i]+1] - ai[r[i]];
1777     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1778     cols = aj + ai[r[i]];
1779     lnk[i] = -1; /* marker to indicate if diagonal exists */
1780     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1781     nzi += nlnk;
1782 
1783     /* make sure diagonal entry is included */
1784     if (diagonal_fill && lnk[i] == -1) {
1785       fm = n;
1786       while (lnk[fm] < i) fm = lnk[fm];
1787       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1788       lnk[fm]    = i;
1789       lnk_lvl[i] = 0;
1790       nzi++; dcount++;
1791     }
1792 
1793     /* add pivot rows into the active row */
1794     nzbd = 0;
1795     prow = lnk[n];
1796     while (prow < i) {
1797       nnz      = bdiag[prow];
1798       cols     = bj_ptr[prow] + nnz + 1;
1799       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1801       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1802       nzi += nlnk;
1803       prow = lnk[prow];
1804       nzbd++;
1805     }
1806     bdiag[i] = nzbd;
1807     bi[i+1]  = bi[i] + nzi;
1808 
1809     /* if free space is not available, make more free space */
1810     if (current_space->local_remaining<nzi) {
1811       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1812       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1813       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1814       reallocs++;
1815     }
1816 
1817     /* copy data into free_space and free_space_lvl, then initialize lnk */
1818     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1819     bj_ptr[i]    = current_space->array;
1820     bjlvl_ptr[i] = current_space_lvl->array;
1821 
1822     /* make sure the active row i has diagonal entry */
1823     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1824 
1825     current_space->array           += nzi;
1826     current_space->local_used      += nzi;
1827     current_space->local_remaining -= nzi;
1828     current_space_lvl->array           += nzi;
1829     current_space_lvl->local_used      += nzi;
1830     current_space_lvl->local_remaining -= nzi;
1831   }
1832 
1833   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1834   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1835 
1836   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1837   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1838   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1839 
1840   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1841   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1842   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1843 
1844 #if defined(PETSC_USE_INFO)
1845   {
1846     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1847     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1848     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1849     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1850     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1851     if (diagonal_fill) {
1852       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1853     }
1854   }
1855 #endif
1856 
1857   /* put together the new matrix */
1858   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1859   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1860   b = (Mat_SeqAIJ*)(fact)->data;
1861   b->free_a       = PETSC_TRUE;
1862   b->free_ij      = PETSC_TRUE;
1863   b->singlemalloc = PETSC_FALSE;
1864   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1865   b->j          = bj;
1866   b->i          = bi;
1867   b->diag       = bdiag;
1868   b->ilen       = 0;
1869   b->imax       = 0;
1870   b->row        = isrow;
1871   b->col        = iscol;
1872   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1873   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1874   b->icol       = isicol;
1875   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1876   /* In b structure:  Free imax, ilen, old a, old j.
1877      Allocate bdiag, solve_work, new a, new j */
1878   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1879   b->maxnz = b->nz = bdiag[0]+1;
1880   (fact)->info.factor_mallocs    = reallocs;
1881   (fact)->info.fill_ratio_given  = f;
1882   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1883   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1884   if (a->inode.size) {
1885     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1892 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1893 {
1894   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1895   IS                 isicol;
1896   PetscErrorCode     ierr;
1897   const PetscInt     *r,*ic;
1898   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1899   PetscInt           *bi,*cols,nnz,*cols_lvl;
1900   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1901   PetscInt           i,levels,diagonal_fill;
1902   PetscTruth         col_identity,row_identity;
1903   PetscReal          f;
1904   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1905   PetscBT            lnkbt;
1906   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1907   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1908   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1909   PetscTruth         missing;
1910 
1911   PetscFunctionBegin;
1912   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1913   f             = info->fill;
1914   levels        = (PetscInt)info->levels;
1915   diagonal_fill = (PetscInt)info->diagonal_fill;
1916   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1917 
1918   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1919   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1920   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1921     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1922     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1923     if (a->inode.size) {
1924       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1925     }
1926     fact->factortype = MAT_FACTOR_ILU;
1927     (fact)->info.factor_mallocs    = 0;
1928     (fact)->info.fill_ratio_given  = info->fill;
1929     (fact)->info.fill_ratio_needed = 1.0;
1930     b               = (Mat_SeqAIJ*)(fact)->data;
1931     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1932     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1933     b->row              = isrow;
1934     b->col              = iscol;
1935     b->icol             = isicol;
1936     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1937     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1938     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1939     PetscFunctionReturn(0);
1940   }
1941 
1942   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1943   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1944 
1945   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1946   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1947   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1948   bi[0] = bdiag[0] = 0;
1949 
1950   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1951 
1952   /* create a linked list for storing column indices of the active row */
1953   nlnk = n + 1;
1954   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1955 
1956   /* initial FreeSpace size is f*(ai[n]+1) */
1957   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1958   current_space = free_space;
1959   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1960   current_space_lvl = free_space_lvl;
1961 
1962   for (i=0; i<n; i++) {
1963     nzi = 0;
1964     /* copy current row into linked list */
1965     nnz  = ai[r[i]+1] - ai[r[i]];
1966     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1967     cols = aj + ai[r[i]];
1968     lnk[i] = -1; /* marker to indicate if diagonal exists */
1969     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1970     nzi += nlnk;
1971 
1972     /* make sure diagonal entry is included */
1973     if (diagonal_fill && lnk[i] == -1) {
1974       fm = n;
1975       while (lnk[fm] < i) fm = lnk[fm];
1976       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1977       lnk[fm]    = i;
1978       lnk_lvl[i] = 0;
1979       nzi++; dcount++;
1980     }
1981 
1982     /* add pivot rows into the active row */
1983     nzbd = 0;
1984     prow = lnk[n];
1985     while (prow < i) {
1986       nnz      = bdiag[prow];
1987       cols     = bj_ptr[prow] + nnz + 1;
1988       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1989       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1990       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1991       nzi += nlnk;
1992       prow = lnk[prow];
1993       nzbd++;
1994     }
1995     bdiag[i] = nzbd;
1996     bi[i+1]  = bi[i] + nzi;
1997 
1998     /* if free space is not available, make more free space */
1999     if (current_space->local_remaining<nzi) {
2000       nnz = nzi*(n - i); /* estimated and max additional space needed */
2001       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2002       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2003       reallocs++;
2004     }
2005 
2006     /* copy data into free_space and free_space_lvl, then initialize lnk */
2007     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2008     bj_ptr[i]    = current_space->array;
2009     bjlvl_ptr[i] = current_space_lvl->array;
2010 
2011     /* make sure the active row i has diagonal entry */
2012     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2013 
2014     current_space->array           += nzi;
2015     current_space->local_used      += nzi;
2016     current_space->local_remaining -= nzi;
2017     current_space_lvl->array           += nzi;
2018     current_space_lvl->local_used      += nzi;
2019     current_space_lvl->local_remaining -= nzi;
2020   }
2021 
2022   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2023   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2024 
2025   /* destroy list of free space and other temporary arrays */
2026   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2027   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2028   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2029   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2030   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2031 
2032 #if defined(PETSC_USE_INFO)
2033   {
2034     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2035     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2036     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2037     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2038     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2039     if (diagonal_fill) {
2040       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2041     }
2042   }
2043 #endif
2044 
2045   /* put together the new matrix */
2046   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2047   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2048   b = (Mat_SeqAIJ*)(fact)->data;
2049   b->free_a       = PETSC_TRUE;
2050   b->free_ij      = PETSC_TRUE;
2051   b->singlemalloc = PETSC_FALSE;
2052   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2053   b->j          = bj;
2054   b->i          = bi;
2055   for (i=0; i<n; i++) bdiag[i] += bi[i];
2056   b->diag       = bdiag;
2057   b->ilen       = 0;
2058   b->imax       = 0;
2059   b->row        = isrow;
2060   b->col        = iscol;
2061   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2062   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2063   b->icol       = isicol;
2064   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2065   /* In b structure:  Free imax, ilen, old a, old j.
2066      Allocate bdiag, solve_work, new a, new j */
2067   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2068   b->maxnz             = b->nz = bi[n] ;
2069   (fact)->info.factor_mallocs    = reallocs;
2070   (fact)->info.fill_ratio_given  = f;
2071   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2072   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2073   if (a->inode.size) {
2074     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2081 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2082 {
2083   Mat            C = B;
2084   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2085   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2086   IS             ip=b->row,iip = b->icol;
2087   PetscErrorCode ierr;
2088   const PetscInt *rip,*riip;
2089   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2090   PetscInt       *ai=a->i,*aj=a->j;
2091   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2092   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2093   PetscTruth     perm_identity;
2094   FactorShiftCtx sctx;
2095   PetscReal      rs;
2096   MatScalar      d,*v;
2097 
2098   PetscFunctionBegin;
2099   /* MatPivotSetUp(): initialize shift context sctx */
2100   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2101 
2102   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2103     sctx.shift_top = info->zeropivot;
2104     for (i=0; i<mbs; i++) {
2105       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2106       d  = (aa)[a->diag[i]];
2107       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2108       v  = aa+ai[i];
2109       nz = ai[i+1] - ai[i];
2110       for (j=0; j<nz; j++)
2111 	rs += PetscAbsScalar(v[j]);
2112       if (rs>sctx.shift_top) sctx.shift_top = rs;
2113     }
2114     sctx.shift_top   *= 1.1;
2115     sctx.nshift_max   = 5;
2116     sctx.shift_lo     = 0.;
2117     sctx.shift_hi     = 1.;
2118   }
2119 
2120   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2121   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2122 
2123   /* allocate working arrays
2124      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2125      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2126   */
2127   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2128 
2129   do {
2130     sctx.newshift = PETSC_FALSE;
2131 
2132     for (i=0; i<mbs; i++) c2r[i] = mbs;
2133     il[0] = 0;
2134 
2135     for (k = 0; k<mbs; k++){
2136       /* zero rtmp */
2137       nz = bi[k+1] - bi[k];
2138       bjtmp = bj + bi[k];
2139       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2140 
2141       /* load in initial unfactored row */
2142       bval = ba + bi[k];
2143       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2144       for (j = jmin; j < jmax; j++){
2145         col = riip[aj[j]];
2146         if (col >= k){ /* only take upper triangular entry */
2147           rtmp[col] = aa[j];
2148           *bval++   = 0.0; /* for in-place factorization */
2149         }
2150       }
2151       /* shift the diagonal of the matrix: ZeropivotApply() */
2152       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2153 
2154       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155       dk = rtmp[k];
2156       i  = c2r[k]; /* first row to be added to k_th row  */
2157 
2158       while (i < k){
2159         nexti = c2r[i]; /* next row to be added to k_th row */
2160 
2161         /* compute multiplier, update diag(k) and U(i,k) */
2162         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2163         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2164         dk   += uikdi*ba[ili]; /* update diag[k] */
2165         ba[ili] = uikdi; /* -U(i,k) */
2166 
2167         /* add multiple of row i to k-th row */
2168         jmin = ili + 1; jmax = bi[i+1];
2169         if (jmin < jmax){
2170           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2171           /* update il and c2r for row i */
2172           il[i] = jmin;
2173           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2174         }
2175         i = nexti;
2176       }
2177 
2178       /* copy data into U(k,:) */
2179       rs   = 0.0;
2180       jmin = bi[k]; jmax = bi[k+1]-1;
2181       if (jmin < jmax) {
2182         for (j=jmin; j<jmax; j++){
2183           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2184         }
2185         /* add the k-th row into il and c2r */
2186         il[k] = jmin;
2187         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2188       }
2189 
2190       /* MatPivotCheck() */
2191       sctx.rs  = rs;
2192       sctx.pv  = dk;
2193       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
2194       if(sctx.newshift) break;
2195       dk = sctx.pv;
2196 
2197       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2198     }
2199   } while (sctx.newshift);
2200 
2201   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2202   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2203   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2204 
2205   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2206   if (perm_identity){
2207     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2208     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2209     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2210     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2211   } else {
2212     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2213     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2214     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2215     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2216   }
2217 
2218   C->assembled    = PETSC_TRUE;
2219   C->preallocated = PETSC_TRUE;
2220   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2221 
2222   /* MatPivotView() */
2223   if (sctx.nshift){
2224     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2225       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2226     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2227       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2228     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2229       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2230     }
2231   }
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2237 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2238 {
2239   Mat            C = B;
2240   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2241   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2242   IS             ip=b->row,iip = b->icol;
2243   PetscErrorCode ierr;
2244   const PetscInt *rip,*riip;
2245   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2246   PetscInt       *ai=a->i,*aj=a->j;
2247   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2248   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2249   PetscTruth     perm_identity;
2250   FactorShiftCtx sctx;
2251   PetscReal      rs;
2252   MatScalar      d,*v;
2253 
2254   PetscFunctionBegin;
2255   /* MatPivotSetUp(): initialize shift context sctx */
2256   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2257 
2258   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2259     sctx.shift_top = info->zeropivot;
2260     for (i=0; i<mbs; i++) {
2261       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2262       d  = (aa)[a->diag[i]];
2263       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2264       v  = aa+ai[i];
2265       nz = ai[i+1] - ai[i];
2266       for (j=0; j<nz; j++)
2267 	rs += PetscAbsScalar(v[j]);
2268       if (rs>sctx.shift_top) sctx.shift_top = rs;
2269     }
2270     sctx.shift_top   *= 1.1;
2271     sctx.nshift_max   = 5;
2272     sctx.shift_lo     = 0.;
2273     sctx.shift_hi     = 1.;
2274   }
2275 
2276   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2277   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2278 
2279   /* initialization */
2280   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2281 
2282   do {
2283     sctx.newshift = PETSC_FALSE;
2284 
2285     for (i=0; i<mbs; i++) jl[i] = mbs;
2286     il[0] = 0;
2287 
2288     for (k = 0; k<mbs; k++){
2289       /* zero rtmp */
2290       nz = bi[k+1] - bi[k];
2291       bjtmp = bj + bi[k];
2292       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2293 
2294       bval = ba + bi[k];
2295       /* initialize k-th row by the perm[k]-th row of A */
2296       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2297       for (j = jmin; j < jmax; j++){
2298         col = riip[aj[j]];
2299         if (col >= k){ /* only take upper triangular entry */
2300           rtmp[col] = aa[j];
2301           *bval++  = 0.0; /* for in-place factorization */
2302         }
2303       }
2304       /* shift the diagonal of the matrix */
2305       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2306 
2307       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2308       dk = rtmp[k];
2309       i = jl[k]; /* first row to be added to k_th row  */
2310 
2311       while (i < k){
2312         nexti = jl[i]; /* next row to be added to k_th row */
2313 
2314         /* compute multiplier, update diag(k) and U(i,k) */
2315         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2316         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2317         dk += uikdi*ba[ili];
2318         ba[ili] = uikdi; /* -U(i,k) */
2319 
2320         /* add multiple of row i to k-th row */
2321         jmin = ili + 1; jmax = bi[i+1];
2322         if (jmin < jmax){
2323           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2324           /* update il and jl for row i */
2325           il[i] = jmin;
2326           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2327         }
2328         i = nexti;
2329       }
2330 
2331       /* shift the diagonals when zero pivot is detected */
2332       /* compute rs=sum of abs(off-diagonal) */
2333       rs   = 0.0;
2334       jmin = bi[k]+1;
2335       nz   = bi[k+1] - jmin;
2336       bcol = bj + jmin;
2337       for (j=0; j<nz; j++) {
2338         rs += PetscAbsScalar(rtmp[bcol[j]]);
2339       }
2340 
2341       sctx.rs = rs;
2342       sctx.pv = dk;
2343       ierr = MatPivotCheck(info,&sctx,k);CHKERRQ(ierr);
2344       if (sctx.newshift) break;
2345       dk = sctx.pv;
2346 
2347       /* copy data into U(k,:) */
2348       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2349       jmin = bi[k]+1; jmax = bi[k+1];
2350       if (jmin < jmax) {
2351         for (j=jmin; j<jmax; j++){
2352           col = bj[j]; ba[j] = rtmp[col];
2353         }
2354         /* add the k-th row into il and jl */
2355         il[k] = jmin;
2356         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2357       }
2358     }
2359   } while (sctx.newshift);
2360 
2361   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2362   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2363   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2364 
2365   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2366   if (perm_identity){
2367     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371   } else {
2372     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2373     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2374     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2375     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2376   }
2377 
2378   C->assembled    = PETSC_TRUE;
2379   C->preallocated = PETSC_TRUE;
2380   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2381   if (sctx.nshift){
2382     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2383       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2384     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2385       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2386     }
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 /*
2392    icc() under revised new data structure.
2393    Factored arrays bj and ba are stored as
2394      U(0,:),...,U(i,:),U(n-1,:)
2395 
2396    ui=fact->i is an array of size n+1, in which
2397    ui+
2398      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2399      ui[n]:  points to U(n-1,n-1)+1
2400 
2401   udiag=fact->diag is an array of size n,in which
2402      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2403 
2404    U(i,:) contains udiag[i] as its last entry, i.e.,
2405     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2406 */
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2410 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2411 {
2412   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2413   Mat_SeqSBAIJ       *b;
2414   PetscErrorCode     ierr;
2415   PetscTruth         perm_identity,missing;
2416   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2417   const PetscInt     *rip,*riip;
2418   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2419   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2420   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2421   PetscReal          fill=info->fill,levels=info->levels;
2422   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2423   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2424   PetscBT            lnkbt;
2425   IS                 iperm;
2426 
2427   PetscFunctionBegin;
2428   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2429   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2430   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2431   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2432   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2433 
2434   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2435   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2436   ui[0] = 0;
2437 
2438   /* ICC(0) without matrix ordering: simply rearrange column indices */
2439   if (!levels && perm_identity) {
2440     for (i=0; i<am; i++) {
2441       ncols    = ai[i+1] - a->diag[i];
2442       ui[i+1]  = ui[i] + ncols;
2443       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2444     }
2445     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2446     cols = uj;
2447     for (i=0; i<am; i++) {
2448       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2449       ncols = ai[i+1] - a->diag[i] -1;
2450       for (j=0; j<ncols; j++) *cols++ = aj[j];
2451       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2452     }
2453   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2454     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2455     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2456 
2457     /* initialization */
2458     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2459 
2460     /* jl: linked list for storing indices of the pivot rows
2461        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2462     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2463     for (i=0; i<am; i++){
2464       jl[i] = am; il[i] = 0;
2465     }
2466 
2467     /* create and initialize a linked list for storing column indices of the active row k */
2468     nlnk = am + 1;
2469     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2470 
2471     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2472     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2473     current_space = free_space;
2474     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2475     current_space_lvl = free_space_lvl;
2476 
2477     for (k=0; k<am; k++){  /* for each active row k */
2478       /* initialize lnk by the column indices of row rip[k] of A */
2479       nzk   = 0;
2480       ncols = ai[rip[k]+1] - ai[rip[k]];
2481       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2482       ncols_upper = 0;
2483       for (j=0; j<ncols; j++){
2484         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2485         if (riip[i] >= k){ /* only take upper triangular entry */
2486           ajtmp[ncols_upper] = i;
2487           ncols_upper++;
2488         }
2489       }
2490       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2491       nzk += nlnk;
2492 
2493       /* update lnk by computing fill-in for each pivot row to be merged in */
2494       prow = jl[k]; /* 1st pivot row */
2495 
2496       while (prow < k){
2497         nextprow = jl[prow];
2498 
2499         /* merge prow into k-th row */
2500         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2501         jmax = ui[prow+1];
2502         ncols = jmax-jmin;
2503         i     = jmin - ui[prow];
2504         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2505         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2506         j     = *(uj - 1);
2507         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2508         nzk += nlnk;
2509 
2510         /* update il and jl for prow */
2511         if (jmin < jmax){
2512           il[prow] = jmin;
2513           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2514         }
2515         prow = nextprow;
2516       }
2517 
2518       /* if free space is not available, make more free space */
2519       if (current_space->local_remaining<nzk) {
2520         i  = am - k + 1; /* num of unfactored rows */
2521         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2522         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2523         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2524         reallocs++;
2525       }
2526 
2527       /* copy data into free_space and free_space_lvl, then initialize lnk */
2528       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2529       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2530 
2531       /* add the k-th row into il and jl */
2532       if (nzk > 1){
2533         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2534         jl[k] = jl[i]; jl[i] = k;
2535         il[k] = ui[k] + 1;
2536       }
2537       uj_ptr[k]     = current_space->array;
2538       uj_lvl_ptr[k] = current_space_lvl->array;
2539 
2540       current_space->array           += nzk;
2541       current_space->local_used      += nzk;
2542       current_space->local_remaining -= nzk;
2543 
2544       current_space_lvl->array           += nzk;
2545       current_space_lvl->local_used      += nzk;
2546       current_space_lvl->local_remaining -= nzk;
2547 
2548       ui[k+1] = ui[k] + nzk;
2549     }
2550 
2551     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2552     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2553     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2554     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2555 
2556     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2557     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2558     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2559     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2560     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2561 
2562   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2563 
2564   /* put together the new matrix in MATSEQSBAIJ format */
2565   b    = (Mat_SeqSBAIJ*)(fact)->data;
2566   b->singlemalloc = PETSC_FALSE;
2567   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2568   b->j    = uj;
2569   b->i    = ui;
2570   b->diag = udiag;
2571   b->free_diag = PETSC_TRUE;
2572   b->ilen = 0;
2573   b->imax = 0;
2574   b->row  = perm;
2575   b->col  = perm;
2576   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2577   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2578   b->icol = iperm;
2579   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2580   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2581   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2582   b->maxnz   = b->nz = ui[am];
2583   b->free_a  = PETSC_TRUE;
2584   b->free_ij = PETSC_TRUE;
2585 
2586   fact->info.factor_mallocs   = reallocs;
2587   fact->info.fill_ratio_given = fill;
2588   if (ai[am] != 0) {
2589     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2590     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2591   } else {
2592     fact->info.fill_ratio_needed = 0.0;
2593   }
2594 #if defined(PETSC_USE_INFO)
2595     if (ai[am] != 0) {
2596       PetscReal af = fact->info.fill_ratio_needed;
2597       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2598       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2599       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2600     } else {
2601       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2602     }
2603 #endif
2604   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2610 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2611 {
2612   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2613   Mat_SeqSBAIJ       *b;
2614   PetscErrorCode     ierr;
2615   PetscTruth         perm_identity,missing;
2616   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2617   const PetscInt     *rip,*riip;
2618   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2619   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2620   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2621   PetscReal          fill=info->fill,levels=info->levels;
2622   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2623   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2624   PetscBT            lnkbt;
2625   IS                 iperm;
2626 
2627   PetscFunctionBegin;
2628   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2629   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2630   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2631   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2632   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2633 
2634   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2635   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2636   ui[0] = 0;
2637 
2638   /* ICC(0) without matrix ordering: simply copies fill pattern */
2639   if (!levels && perm_identity) {
2640 
2641     for (i=0; i<am; i++) {
2642       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2643       udiag[i] = ui[i];
2644     }
2645     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2646     cols = uj;
2647     for (i=0; i<am; i++) {
2648       aj    = a->j + a->diag[i];
2649       ncols = ui[i+1] - ui[i];
2650       for (j=0; j<ncols; j++) *cols++ = *aj++;
2651     }
2652   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2653     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2654     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2655 
2656     /* initialization */
2657     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2658 
2659     /* jl: linked list for storing indices of the pivot rows
2660        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2661     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2662     for (i=0; i<am; i++){
2663       jl[i] = am; il[i] = 0;
2664     }
2665 
2666     /* create and initialize a linked list for storing column indices of the active row k */
2667     nlnk = am + 1;
2668     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2669 
2670     /* initial FreeSpace size is fill*(ai[am]+1) */
2671     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2672     current_space = free_space;
2673     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2674     current_space_lvl = free_space_lvl;
2675 
2676     for (k=0; k<am; k++){  /* for each active row k */
2677       /* initialize lnk by the column indices of row rip[k] of A */
2678       nzk   = 0;
2679       ncols = ai[rip[k]+1] - ai[rip[k]];
2680       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2681       ncols_upper = 0;
2682       for (j=0; j<ncols; j++){
2683         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2684         if (riip[i] >= k){ /* only take upper triangular entry */
2685           ajtmp[ncols_upper] = i;
2686           ncols_upper++;
2687         }
2688       }
2689       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2690       nzk += nlnk;
2691 
2692       /* update lnk by computing fill-in for each pivot row to be merged in */
2693       prow = jl[k]; /* 1st pivot row */
2694 
2695       while (prow < k){
2696         nextprow = jl[prow];
2697 
2698         /* merge prow into k-th row */
2699         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2700         jmax = ui[prow+1];
2701         ncols = jmax-jmin;
2702         i     = jmin - ui[prow];
2703         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2704         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2705         j     = *(uj - 1);
2706         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2707         nzk += nlnk;
2708 
2709         /* update il and jl for prow */
2710         if (jmin < jmax){
2711           il[prow] = jmin;
2712           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2713         }
2714         prow = nextprow;
2715       }
2716 
2717       /* if free space is not available, make more free space */
2718       if (current_space->local_remaining<nzk) {
2719         i = am - k + 1; /* num of unfactored rows */
2720         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2721         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2722         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2723         reallocs++;
2724       }
2725 
2726       /* copy data into free_space and free_space_lvl, then initialize lnk */
2727       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2728       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2729 
2730       /* add the k-th row into il and jl */
2731       if (nzk > 1){
2732         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2733         jl[k] = jl[i]; jl[i] = k;
2734         il[k] = ui[k] + 1;
2735       }
2736       uj_ptr[k]     = current_space->array;
2737       uj_lvl_ptr[k] = current_space_lvl->array;
2738 
2739       current_space->array           += nzk;
2740       current_space->local_used      += nzk;
2741       current_space->local_remaining -= nzk;
2742 
2743       current_space_lvl->array           += nzk;
2744       current_space_lvl->local_used      += nzk;
2745       current_space_lvl->local_remaining -= nzk;
2746 
2747       ui[k+1] = ui[k] + nzk;
2748     }
2749 
2750 #if defined(PETSC_USE_INFO)
2751     if (ai[am] != 0) {
2752       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2753       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2754       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2755       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2756     } else {
2757       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2758     }
2759 #endif
2760 
2761     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2762     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2763     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2764     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2765 
2766     /* destroy list of free space and other temporary array(s) */
2767     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2768     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2769     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2770     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2771 
2772   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2773 
2774   /* put together the new matrix in MATSEQSBAIJ format */
2775 
2776   b    = (Mat_SeqSBAIJ*)fact->data;
2777   b->singlemalloc = PETSC_FALSE;
2778   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2779   b->j    = uj;
2780   b->i    = ui;
2781   b->diag = udiag;
2782   b->free_diag = PETSC_TRUE;
2783   b->ilen = 0;
2784   b->imax = 0;
2785   b->row  = perm;
2786   b->col  = perm;
2787   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2788   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2789   b->icol = iperm;
2790   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2791   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2792   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2793   b->maxnz   = b->nz = ui[am];
2794   b->free_a  = PETSC_TRUE;
2795   b->free_ij = PETSC_TRUE;
2796 
2797   fact->info.factor_mallocs    = reallocs;
2798   fact->info.fill_ratio_given  = fill;
2799   if (ai[am] != 0) {
2800     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2801   } else {
2802     fact->info.fill_ratio_needed = 0.0;
2803   }
2804   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2809 {
2810   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2811   Mat_SeqSBAIJ       *b;
2812   PetscErrorCode     ierr;
2813   PetscTruth         perm_identity;
2814   PetscReal          fill = info->fill;
2815   const PetscInt     *rip,*riip;
2816   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2817   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2818   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2819   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2820   PetscBT            lnkbt;
2821   IS                 iperm;
2822 
2823   PetscFunctionBegin;
2824   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2825   /* check whether perm is the identity mapping */
2826   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2827   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2828   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2829   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2830 
2831   /* initialization */
2832   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2833   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2834   ui[0] = 0;
2835 
2836   /* jl: linked list for storing indices of the pivot rows
2837      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2838   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2839   for (i=0; i<am; i++){
2840     jl[i] = am; il[i] = 0;
2841   }
2842 
2843   /* create and initialize a linked list for storing column indices of the active row k */
2844   nlnk = am + 1;
2845   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2846 
2847   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2848   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2849   current_space = free_space;
2850 
2851   for (k=0; k<am; k++){  /* for each active row k */
2852     /* initialize lnk by the column indices of row rip[k] of A */
2853     nzk   = 0;
2854     ncols = ai[rip[k]+1] - ai[rip[k]];
2855     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2856     ncols_upper = 0;
2857     for (j=0; j<ncols; j++){
2858       i = riip[*(aj + ai[rip[k]] + j)];
2859       if (i >= k){ /* only take upper triangular entry */
2860         cols[ncols_upper] = i;
2861         ncols_upper++;
2862       }
2863     }
2864     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2865     nzk += nlnk;
2866 
2867     /* update lnk by computing fill-in for each pivot row to be merged in */
2868     prow = jl[k]; /* 1st pivot row */
2869 
2870     while (prow < k){
2871       nextprow = jl[prow];
2872       /* merge prow into k-th row */
2873       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2874       jmax = ui[prow+1];
2875       ncols = jmax-jmin;
2876       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2877       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2878       nzk += nlnk;
2879 
2880       /* update il and jl for prow */
2881       if (jmin < jmax){
2882         il[prow] = jmin;
2883         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2884       }
2885       prow = nextprow;
2886     }
2887 
2888     /* if free space is not available, make more free space */
2889     if (current_space->local_remaining<nzk) {
2890       i  = am - k + 1; /* num of unfactored rows */
2891       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2892       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2893       reallocs++;
2894     }
2895 
2896     /* copy data into free space, then initialize lnk */
2897     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2898 
2899     /* add the k-th row into il and jl */
2900     if (nzk > 1){
2901       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2902       jl[k] = jl[i]; jl[i] = k;
2903       il[k] = ui[k] + 1;
2904     }
2905     ui_ptr[k] = current_space->array;
2906     current_space->array           += nzk;
2907     current_space->local_used      += nzk;
2908     current_space->local_remaining -= nzk;
2909 
2910     ui[k+1] = ui[k] + nzk;
2911   }
2912 
2913   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2914   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2915   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2916 
2917   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2918   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2919   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2920   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2921 
2922   /* put together the new matrix in MATSEQSBAIJ format */
2923 
2924   b = (Mat_SeqSBAIJ*)fact->data;
2925   b->singlemalloc = PETSC_FALSE;
2926   b->free_a       = PETSC_TRUE;
2927   b->free_ij      = PETSC_TRUE;
2928   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2929   b->j    = uj;
2930   b->i    = ui;
2931   b->diag = udiag;
2932   b->free_diag = PETSC_TRUE;
2933   b->ilen = 0;
2934   b->imax = 0;
2935   b->row  = perm;
2936   b->col  = perm;
2937   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2938   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2939   b->icol = iperm;
2940   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2941   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2942   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2943   b->maxnz = b->nz = ui[am];
2944 
2945   fact->info.factor_mallocs    = reallocs;
2946   fact->info.fill_ratio_given  = fill;
2947   if (ai[am] != 0) {
2948     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2949     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2950   } else {
2951     fact->info.fill_ratio_needed = 0.0;
2952   }
2953 #if defined(PETSC_USE_INFO)
2954   if (ai[am] != 0) {
2955     PetscReal af = fact->info.fill_ratio_needed;
2956     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2957     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2958     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2959   } else {
2960      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2961   }
2962 #endif
2963   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2969 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2970 {
2971   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2972   Mat_SeqSBAIJ       *b;
2973   PetscErrorCode     ierr;
2974   PetscTruth         perm_identity;
2975   PetscReal          fill = info->fill;
2976   const PetscInt     *rip,*riip;
2977   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2978   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2979   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2980   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2981   PetscBT            lnkbt;
2982   IS                 iperm;
2983 
2984   PetscFunctionBegin;
2985   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2986   /* check whether perm is the identity mapping */
2987   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2988   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2989   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2990   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2991 
2992   /* initialization */
2993   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2994   ui[0] = 0;
2995 
2996   /* jl: linked list for storing indices of the pivot rows
2997      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2998   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2999   for (i=0; i<am; i++){
3000     jl[i] = am; il[i] = 0;
3001   }
3002 
3003   /* create and initialize a linked list for storing column indices of the active row k */
3004   nlnk = am + 1;
3005   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3006 
3007   /* initial FreeSpace size is fill*(ai[am]+1) */
3008   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3009   current_space = free_space;
3010 
3011   for (k=0; k<am; k++){  /* for each active row k */
3012     /* initialize lnk by the column indices of row rip[k] of A */
3013     nzk   = 0;
3014     ncols = ai[rip[k]+1] - ai[rip[k]];
3015     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3016     ncols_upper = 0;
3017     for (j=0; j<ncols; j++){
3018       i = riip[*(aj + ai[rip[k]] + j)];
3019       if (i >= k){ /* only take upper triangular entry */
3020         cols[ncols_upper] = i;
3021         ncols_upper++;
3022       }
3023     }
3024     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3025     nzk += nlnk;
3026 
3027     /* update lnk by computing fill-in for each pivot row to be merged in */
3028     prow = jl[k]; /* 1st pivot row */
3029 
3030     while (prow < k){
3031       nextprow = jl[prow];
3032       /* merge prow into k-th row */
3033       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3034       jmax = ui[prow+1];
3035       ncols = jmax-jmin;
3036       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3037       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3038       nzk += nlnk;
3039 
3040       /* update il and jl for prow */
3041       if (jmin < jmax){
3042         il[prow] = jmin;
3043         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3044       }
3045       prow = nextprow;
3046     }
3047 
3048     /* if free space is not available, make more free space */
3049     if (current_space->local_remaining<nzk) {
3050       i = am - k + 1; /* num of unfactored rows */
3051       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3052       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3053       reallocs++;
3054     }
3055 
3056     /* copy data into free space, then initialize lnk */
3057     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3058 
3059     /* add the k-th row into il and jl */
3060     if (nzk-1 > 0){
3061       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3062       jl[k] = jl[i]; jl[i] = k;
3063       il[k] = ui[k] + 1;
3064     }
3065     ui_ptr[k] = current_space->array;
3066     current_space->array           += nzk;
3067     current_space->local_used      += nzk;
3068     current_space->local_remaining -= nzk;
3069 
3070     ui[k+1] = ui[k] + nzk;
3071   }
3072 
3073 #if defined(PETSC_USE_INFO)
3074   if (ai[am] != 0) {
3075     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3076     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3077     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3078     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3079   } else {
3080      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3081   }
3082 #endif
3083 
3084   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3085   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3086   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3087 
3088   /* destroy list of free space and other temporary array(s) */
3089   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3090   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3091   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3092 
3093   /* put together the new matrix in MATSEQSBAIJ format */
3094 
3095   b = (Mat_SeqSBAIJ*)fact->data;
3096   b->singlemalloc = PETSC_FALSE;
3097   b->free_a       = PETSC_TRUE;
3098   b->free_ij      = PETSC_TRUE;
3099   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3100   b->j    = uj;
3101   b->i    = ui;
3102   b->diag = 0;
3103   b->ilen = 0;
3104   b->imax = 0;
3105   b->row  = perm;
3106   b->col  = perm;
3107   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3108   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3109   b->icol = iperm;
3110   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3111   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3112   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3113   b->maxnz = b->nz = ui[am];
3114 
3115   fact->info.factor_mallocs    = reallocs;
3116   fact->info.fill_ratio_given  = fill;
3117   if (ai[am] != 0) {
3118     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3119   } else {
3120     fact->info.fill_ratio_needed = 0.0;
3121   }
3122   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3123   PetscFunctionReturn(0);
3124 }
3125 
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3128 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3129 {
3130   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3131   PetscErrorCode    ierr;
3132   PetscInt          n = A->rmap->n;
3133   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3134   PetscScalar       *x,sum;
3135   const PetscScalar *b;
3136   const MatScalar   *aa = a->a,*v;
3137   PetscInt          i,nz;
3138 
3139   PetscFunctionBegin;
3140   if (!n) PetscFunctionReturn(0);
3141 
3142   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3143   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3144 
3145   /* forward solve the lower triangular */
3146   x[0] = b[0];
3147   v    = aa;
3148   vi   = aj;
3149   for (i=1; i<n; i++) {
3150     nz  = ai[i+1] - ai[i];
3151     sum = b[i];
3152     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3153     v  += nz;
3154     vi += nz;
3155     x[i] = sum;
3156   }
3157 
3158   /* backward solve the upper triangular */
3159   for (i=n-1; i>=0; i--){
3160     v   = aa + adiag[i+1] + 1;
3161     vi  = aj + adiag[i+1] + 1;
3162     nz = adiag[i] - adiag[i+1]-1;
3163     sum = x[i];
3164     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3165     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3166   }
3167 
3168   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3169   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3170   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatSolve_SeqAIJ"
3176 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3177 {
3178   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3179   IS                iscol = a->col,isrow = a->row;
3180   PetscErrorCode    ierr;
3181   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3182   const PetscInt    *rout,*cout,*r,*c;
3183   PetscScalar       *x,*tmp,sum;
3184   const PetscScalar *b;
3185   const MatScalar   *aa = a->a,*v;
3186 
3187   PetscFunctionBegin;
3188   if (!n) PetscFunctionReturn(0);
3189 
3190   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3191   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3192   tmp  = a->solve_work;
3193 
3194   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3195   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3196 
3197   /* forward solve the lower triangular */
3198   tmp[0] = b[r[0]];
3199   v      = aa;
3200   vi     = aj;
3201   for (i=1; i<n; i++) {
3202     nz  = ai[i+1] - ai[i];
3203     sum = b[r[i]];
3204     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3205     tmp[i] = sum;
3206     v += nz; vi += nz;
3207   }
3208 
3209   /* backward solve the upper triangular */
3210   for (i=n-1; i>=0; i--){
3211     v   = aa + adiag[i+1]+1;
3212     vi  = aj + adiag[i+1]+1;
3213     nz  = adiag[i]-adiag[i+1]-1;
3214     sum = tmp[i];
3215     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3216     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3217   }
3218 
3219   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3220   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3221   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3223   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3229 /*
3230     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3231 */
3232 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3233 {
3234   Mat                B = *fact;
3235   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3236   IS                 isicol;
3237   PetscErrorCode     ierr;
3238   const PetscInt     *r,*ic;
3239   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3240   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3241   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3242   PetscInt           nlnk,*lnk;
3243   PetscBT            lnkbt;
3244   PetscTruth         row_identity,icol_identity;
3245   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3246   const PetscInt     *ics;
3247   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3248   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3249   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3250   PetscTruth         missing;
3251 
3252   PetscFunctionBegin;
3253 
3254   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3255   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3256   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3257 
3258   /* ------- symbolic factorization, can be reused ---------*/
3259   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3260   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3261   adiag=a->diag;
3262 
3263   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3264 
3265   /* bdiag is location of diagonal in factor */
3266   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3267   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3268 
3269   /* allocate row pointers bi */
3270   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3271 
3272   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3273   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3274   nnz_max  = ai[n]+2*n*dtcount+2;
3275 
3276   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3277   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3278 
3279   /* put together the new matrix */
3280   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3281   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3282   b    = (Mat_SeqAIJ*)B->data;
3283   b->free_a       = PETSC_TRUE;
3284   b->free_ij      = PETSC_TRUE;
3285   b->singlemalloc = PETSC_FALSE;
3286   b->a          = ba;
3287   b->j          = bj;
3288   b->i          = bi;
3289   b->diag       = bdiag;
3290   b->ilen       = 0;
3291   b->imax       = 0;
3292   b->row        = isrow;
3293   b->col        = iscol;
3294   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3295   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3296   b->icol       = isicol;
3297   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3298 
3299   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3300   b->maxnz = nnz_max;
3301 
3302   B->factortype            = MAT_FACTOR_ILUDT;
3303   B->info.factor_mallocs   = 0;
3304   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3305   CHKMEMQ;
3306   /* ------- end of symbolic factorization ---------*/
3307 
3308   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3309   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3310   ics  = ic;
3311 
3312   /* linked list for storing column indices of the active row */
3313   nlnk = n + 1;
3314   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3315 
3316   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3317   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3318   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3319   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3320   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3321 
3322   bi[0]    = 0;
3323   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3324   bdiag_rev[n] = bdiag[0];
3325   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3326   for (i=0; i<n; i++) {
3327     /* copy initial fill into linked list */
3328     nzi = 0; /* nonzeros for active row i */
3329     nzi = ai[r[i]+1] - ai[r[i]];
3330     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3331     nzi_al = adiag[r[i]] - ai[r[i]];
3332     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3333     ajtmp = aj + ai[r[i]];
3334     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3335 
3336     /* load in initial (unfactored row) */
3337     aatmp = a->a + ai[r[i]];
3338     for (j=0; j<nzi; j++) {
3339       rtmp[ics[*ajtmp++]] = *aatmp++;
3340     }
3341 
3342     /* add pivot rows into linked list */
3343     row = lnk[n];
3344     while (row < i ) {
3345       nzi_bl = bi[row+1] - bi[row] + 1;
3346       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3347       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3348       nzi  += nlnk;
3349       row   = lnk[row];
3350     }
3351 
3352     /* copy data from lnk into jtmp, then initialize lnk */
3353     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3354 
3355     /* numerical factorization */
3356     bjtmp = jtmp;
3357     row   = *bjtmp++; /* 1st pivot row */
3358     while  ( row < i ) {
3359       pc         = rtmp + row;
3360       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3361       multiplier = (*pc) * (*pv);
3362       *pc        = multiplier;
3363       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3364         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3365         pv         = ba + bdiag[row+1] + 1;
3366         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3367         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3368         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3369         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
3370       }
3371       row = *bjtmp++;
3372     }
3373 
3374     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3375     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3376     nzi_bl = 0; j = 0;
3377     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3378       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3379       nzi_bl++; j++;
3380     }
3381     nzi_bu = nzi - nzi_bl -1;
3382     while (j < nzi){
3383       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3384       j++;
3385     }
3386 
3387     bjtmp = bj + bi[i];
3388     batmp = ba + bi[i];
3389     /* apply level dropping rule to L part */
3390     ncut = nzi_al + dtcount;
3391     if (ncut < nzi_bl){
3392       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3393       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3394     } else {
3395       ncut = nzi_bl;
3396     }
3397     for (j=0; j<ncut; j++){
3398       bjtmp[j] = jtmp[j];
3399       batmp[j] = vtmp[j];
3400       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3401     }
3402     bi[i+1] = bi[i] + ncut;
3403     nzi = ncut + 1;
3404 
3405     /* apply level dropping rule to U part */
3406     ncut = nzi_au + dtcount;
3407     if (ncut < nzi_bu){
3408       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3409       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3410     } else {
3411       ncut = nzi_bu;
3412     }
3413     nzi += ncut;
3414 
3415     /* mark bdiagonal */
3416     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3417     bdiag_rev[n-i-1] = bdiag[i+1];
3418     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3419     bjtmp = bj + bdiag[i];
3420     batmp = ba + bdiag[i];
3421     *bjtmp = i;
3422     *batmp = diag_tmp; /* rtmp[i]; */
3423     if (*batmp == 0.0) {
3424       *batmp = dt+shift;
3425       /* printf(" row %d add shift %g\n",i,shift); */
3426     }
3427     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3428     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3429 
3430     bjtmp = bj + bdiag[i+1]+1;
3431     batmp = ba + bdiag[i+1]+1;
3432     for (k=0; k<ncut; k++){
3433       bjtmp[k] = jtmp[nzi_bl+1+k];
3434       batmp[k] = vtmp[nzi_bl+1+k];
3435       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3436     }
3437     /* printf("\n"); */
3438 
3439     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3440     /*
3441     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3442     printf(" ----------------------------\n");
3443     */
3444   } /* for (i=0; i<n; i++) */
3445   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3446   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3447 
3448   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3449   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3450 
3451   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3452   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3453   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3454   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3455 
3456   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3457   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3458 
3459   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3460   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3461   if (row_identity && icol_identity) {
3462     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3463   } else {
3464     B->ops->solve = MatSolve_SeqAIJ;
3465   }
3466 
3467   B->ops->solveadd          = 0;
3468   B->ops->solvetranspose    = 0;
3469   B->ops->solvetransposeadd = 0;
3470   B->ops->matsolve          = 0;
3471   B->assembled              = PETSC_TRUE;
3472   B->preallocated           = PETSC_TRUE;
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 /* a wraper of MatILUDTFactor_SeqAIJ() */
3477 #undef __FUNCT__
3478 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3479 /*
3480     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3481 */
3482 
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3484 {
3485   PetscErrorCode     ierr;
3486 
3487   PetscFunctionBegin;
3488   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 /*
3493    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3494    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3495 */
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3498 /*
3499     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3500 */
3501 
3502 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3503 {
3504   Mat            C=fact;
3505   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3506   IS             isrow = b->row,isicol = b->icol;
3507   PetscErrorCode ierr;
3508   const PetscInt *r,*ic,*ics;
3509   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3510   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3511   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3512   PetscReal      dt=info->dt,shift=info->shiftamount;
3513   PetscTruth     row_identity, col_identity;
3514 
3515   PetscFunctionBegin;
3516   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3517   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3518   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3519   ics  = ic;
3520 
3521   for (i=0; i<n; i++){
3522     /* initialize rtmp array */
3523     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3524     bjtmp = bj + bi[i];
3525     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3526     rtmp[i] = 0.0;
3527     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3528     bjtmp = bj + bdiag[i+1] + 1;
3529     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3530 
3531     /* load in initial unfactored row of A */
3532     /* printf("row %d\n",i); */
3533     nz    = ai[r[i]+1] - ai[r[i]];
3534     ajtmp = aj + ai[r[i]];
3535     v     = aa + ai[r[i]];
3536     for (j=0; j<nz; j++) {
3537       rtmp[ics[*ajtmp++]] = v[j];
3538       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3539     }
3540     /* printf("\n"); */
3541 
3542     /* numerical factorization */
3543     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3544     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3545     k = 0;
3546     while (k < nzl){
3547       row   = *bjtmp++;
3548       /* printf("  prow %d\n",row); */
3549       pc         = rtmp + row;
3550       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3551       multiplier = (*pc) * (*pv);
3552       *pc        = multiplier;
3553       if (PetscAbsScalar(multiplier) > dt){
3554         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3555         pv         = b->a + bdiag[row+1] + 1;
3556         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3557         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3558         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
3559       }
3560       k++;
3561     }
3562 
3563     /* finished row so stick it into b->a */
3564     /* L-part */
3565     pv = b->a + bi[i] ;
3566     pj = bj + bi[i] ;
3567     nzl = bi[i+1] - bi[i];
3568     for (j=0; j<nzl; j++) {
3569       pv[j] = rtmp[pj[j]];
3570       /* printf(" (%d,%g),",pj[j],pv[j]); */
3571     }
3572 
3573     /* diagonal: invert diagonal entries for simplier triangular solves */
3574     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3575     b->a[bdiag[i]] = 1.0/rtmp[i];
3576     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3577 
3578     /* U-part */
3579     pv = b->a + bdiag[i+1] + 1;
3580     pj = bj + bdiag[i+1] + 1;
3581     nzu = bdiag[i] - bdiag[i+1] - 1;
3582     for (j=0; j<nzu; j++) {
3583       pv[j] = rtmp[pj[j]];
3584       /* printf(" (%d,%g),",pj[j],pv[j]); */
3585     }
3586     /* printf("\n"); */
3587   }
3588 
3589   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3590   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3591   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3592 
3593   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3594   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3595   if (row_identity && col_identity) {
3596     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3597   } else {
3598     C->ops->solve   = MatSolve_SeqAIJ;
3599   }
3600   C->ops->solveadd           = 0;
3601   C->ops->solvetranspose     = 0;
3602   C->ops->solvetransposeadd  = 0;
3603   C->ops->matsolve           = 0;
3604   C->assembled    = PETSC_TRUE;
3605   C->preallocated = PETSC_TRUE;
3606   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3607   PetscFunctionReturn(0);
3608 }
3609