xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e93c322d3f1dc90bbe56757b7dfbbdeffd9bb884)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
116   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
117   ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
118   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
119     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
120     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
121     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
122   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
123     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
124     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
125     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
126     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
127   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
128   (*B)->factortype = ftype;
129   PetscFunctionReturn(0);
130 }
131 EXTERN_C_END
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
136 {
137   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
138   IS                 isicol;
139   PetscErrorCode     ierr;
140   const PetscInt     *r,*ic;
141   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
142   PetscInt           *bi,*bj,*ajtmp;
143   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
144   PetscReal          f;
145   PetscInt           nlnk,*lnk,k,**bi_ptr;
146   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
147   PetscBT            lnkbt;
148 
149   PetscFunctionBegin;
150   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
151   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
152   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
153   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
154 
155   /* get new row pointers */
156   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
157   bi[0] = 0;
158 
159   /* bdiag is location of diagonal in factor */
160   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
161   bdiag[0] = 0;
162 
163   /* linked list for storing column indices of the active row */
164   nlnk = n + 1;
165   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
166 
167   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
168 
169   /* initial FreeSpace size is f*(ai[n]+1) */
170   f = info->fill;
171   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
172   current_space = free_space;
173 
174   for (i=0; i<n; i++) {
175     /* copy previous fill into linked list */
176     nzi = 0;
177     nnz = ai[r[i]+1] - ai[r[i]];
178     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
179     ajtmp = aj + ai[r[i]];
180     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
181     nzi += nlnk;
182 
183     /* add pivot rows into linked list */
184     row = lnk[n];
185     while (row < i) {
186       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
188       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
189       nzi += nlnk;
190       row  = lnk[row];
191     }
192     bi[i+1] = bi[i] + nzi;
193     im[i]   = nzi;
194 
195     /* mark bdiag */
196     nzbd = 0;
197     nnz  = nzi;
198     k    = lnk[n];
199     while (nnz-- && k < i){
200       nzbd++;
201       k = lnk[k];
202     }
203     bdiag[i] = bi[i] + nzbd;
204 
205     /* if free space is not available, make more free space */
206     if (current_space->local_remaining<nzi) {
207       nnz = (n - i)*nzi; /* estimated and max additional space needed */
208       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
209       reallocs++;
210     }
211 
212     /* copy data into free space, then initialize lnk */
213     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
214     bi_ptr[i] = current_space->array;
215     current_space->array           += nzi;
216     current_space->local_used      += nzi;
217     current_space->local_remaining -= nzi;
218   }
219 #if defined(PETSC_USE_INFO)
220   if (ai[n] != 0) {
221     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
222     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
223     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
224     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
225     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
226   } else {
227     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
228   }
229 #endif
230 
231   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
232   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
233 
234   /* destroy list of free space and other temporary array(s) */
235   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
236   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
237   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
238   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
239 
240   /* put together the new matrix */
241   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
242   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
243   b    = (Mat_SeqAIJ*)(B)->data;
244   b->free_a       = PETSC_TRUE;
245   b->free_ij      = PETSC_TRUE;
246   b->singlemalloc = PETSC_FALSE;
247   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
248   b->j          = bj;
249   b->i          = bi;
250   b->diag       = bdiag;
251   b->ilen       = 0;
252   b->imax       = 0;
253   b->row        = isrow;
254   b->col        = iscol;
255   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
256   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
257   b->icol       = isicol;
258   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
259 
260   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
261   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
262   b->maxnz = b->nz = bi[n] ;
263 
264   (B)->factortype            = MAT_FACTOR_LU;
265   (B)->info.factor_mallocs   = reallocs;
266   (B)->info.fill_ratio_given = f;
267 
268   if (ai[n]) {
269     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
270   } else {
271     (B)->info.fill_ratio_needed = 0.0;
272   }
273   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
274   if (a->inode.size) {
275     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
282 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
283 {
284   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
285   IS                 isicol;
286   PetscErrorCode     ierr;
287   const PetscInt     *r,*ic;
288   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
289   PetscInt           *bi,*bj,*ajtmp;
290   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
291   PetscReal          f;
292   PetscInt           nlnk,*lnk,k,**bi_ptr;
293   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
294   PetscBT            lnkbt;
295 
296   PetscFunctionBegin;
297   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298   /*
299   PetscBool          olddatastruct=PETSC_FALSE;
300   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
301   if(olddatastruct){
302     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
303     PetscFunctionReturn(0);
304   }
305   */
306   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
308   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
309   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
313   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
319 
320   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
325   current_space = free_space;
326 
327   for (i=0; i<n; i++) {
328     /* copy previous fill into linked list */
329     nzi = 0;
330     nnz = ai[r[i]+1] - ai[r[i]];
331     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
332     ajtmp = aj + ai[r[i]];
333     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i){
339       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
342       nzi  += nlnk;
343       row   = lnk[row];
344     }
345     bi[i+1] = bi[i] + nzi;
346     im[i]   = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i){
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining<nzi) {
360       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
361       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
367     bi_ptr[i] = current_space->array;
368     current_space->array           += nzi;
369     current_space->local_used      += nzi;
370     current_space->local_remaining -= nzi;
371   }
372 
373   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
374   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
375 
376   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
377   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
378   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
379   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
380   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
381 
382   /* put together the new matrix */
383   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
384   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
385   b    = (Mat_SeqAIJ*)(B)->data;
386   b->free_a       = PETSC_TRUE;
387   b->free_ij      = PETSC_TRUE;
388   b->singlemalloc = PETSC_FALSE;
389   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
390   b->j          = bj;
391   b->i          = bi;
392   b->diag       = bdiag;
393   b->ilen       = 0;
394   b->imax       = 0;
395   b->row        = isrow;
396   b->col        = iscol;
397   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
398   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
399   b->icol       = isicol;
400   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
404   b->maxnz = b->nz = bdiag[0]+1;
405   B->factortype            = MAT_FACTOR_LU;
406   B->info.factor_mallocs   = reallocs;
407   B->info.fill_ratio_given = f;
408 
409   if (ai[n]) {
410     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
411   } else {
412     B->info.fill_ratio_needed = 0.0;
413   }
414 #if defined(PETSC_USE_INFO)
415   if (ai[n] != 0) {
416     PetscReal af = B->info.fill_ratio_needed;
417     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
418     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
419     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
420     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
421   } else {
422     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
423   }
424 #endif
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size) {
427     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433     Trouble in factorization, should we dump the original matrix?
434 */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatFactorDumpMatrix"
437 PetscErrorCode MatFactorDumpMatrix(Mat A)
438 {
439   PetscErrorCode ierr;
440   PetscBool      flg = PETSC_FALSE;
441 
442   PetscFunctionBegin;
443   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
444   if (flg) {
445     PetscViewer viewer;
446     char        filename[PETSC_MAX_PATH_LEN];
447 
448     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
449     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
450     ierr = MatView(A,viewer);CHKERRQ(ierr);
451     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
459 {
460   Mat              C=B;
461   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
462   IS               isrow = b->row,isicol = b->icol;
463   PetscErrorCode   ierr;
464   const PetscInt   *r,*ic,*ics;
465   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466   PetscInt         i,j,k,nz,nzL,row,*pj;
467   const PetscInt   *ajtmp,*bjtmp;
468   MatScalar        *rtmp,*pc,multiplier,*pv;
469   const  MatScalar *aa=a->a,*v;
470   PetscBool        row_identity,col_identity;
471   FactorShiftCtx   sctx;
472   const PetscInt   *ddiag;
473   PetscReal        rs;
474   MatScalar        d;
475 
476   PetscFunctionBegin;
477   /* MatPivotSetUp(): initialize shift context sctx */
478   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
479 
480   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481     ddiag          = a->diag;
482     sctx.shift_top = info->zeropivot;
483     for (i=0; i<n; i++) {
484       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485       d  = (aa)[ddiag[i]];
486       rs = -PetscAbsScalar(d) - PetscRealPart(d);
487       v  = aa+ai[i];
488       nz = ai[i+1] - ai[i];
489       for (j=0; j<nz; j++)
490 	rs += PetscAbsScalar(v[j]);
491       if (rs>sctx.shift_top) sctx.shift_top = rs;
492     }
493     sctx.shift_top   *= 1.1;
494     sctx.nshift_max   = 5;
495     sctx.shift_lo     = 0.;
496     sctx.shift_hi     = 1.;
497   }
498 
499   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
500   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
501   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
502   ics  = ic;
503 
504   do {
505     sctx.newshift = PETSC_FALSE;
506     for (i=0; i<n; i++){
507       /* zero rtmp */
508       /* L part */
509       nz    = bi[i+1] - bi[i];
510       bjtmp = bj + bi[i];
511       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* U part */
514       nz = bdiag[i]-bdiag[i+1];
515       bjtmp = bj + bdiag[i+1]+1;
516       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* load in initial (unfactored row) */
519       nz    = ai[r[i]+1] - ai[r[i]];
520       ajtmp = aj + ai[r[i]];
521       v     = aa + ai[r[i]];
522       for (j=0; j<nz; j++) {
523         rtmp[ics[ajtmp[j]]] = v[j];
524       }
525       /* ZeropivotApply() */
526       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
527 
528       /* elimination */
529       bjtmp = bj + bi[i];
530       row   = *bjtmp++;
531       nzL   = bi[i+1] - bi[i];
532       for(k=0; k < nzL;k++) {
533         pc = rtmp + row;
534         if (*pc != 0.0) {
535           pv         = b->a + bdiag[row];
536           multiplier = *pc * (*pv);
537           *pc        = multiplier;
538           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539 	  pv = b->a + bdiag[row+1]+1;
540 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv   = b->a + bi[i] ;
551       pj   = b->j + bi[i] ;
552       nz   = bi[i+1] - bi[i];
553       for (j=0; j<nz; j++) {
554         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
555       }
556 
557       /* U part */
558       pv = b->a + bdiag[i+1]+1;
559       pj = b->j + bdiag[i+1]+1;
560       nz = bdiag[i] - bdiag[i+1]-1;
561       for (j=0; j<nz; j++) {
562         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563       }
564 
565       sctx.rs  = rs;
566       sctx.pv  = rtmp[i];
567       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
568       if(sctx.newshift) break; /* break for-loop */
569       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
570 
571       /* Mark diagonal and invert diagonal for simplier triangular solves */
572       pv  = b->a + bdiag[i];
573       *pv = 1.0/rtmp[i];
574 
575     } /* endof for (i=0; i<n; i++){ */
576 
577     /* MatPivotRefine() */
578     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
579       /*
580        * if no shift in this attempt & shifting & started shifting & can refine,
581        * then try lower shift
582        */
583       sctx.shift_hi       = sctx.shift_fraction;
584       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
585       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
586       sctx.newshift       = PETSC_TRUE;
587       sctx.nshift++;
588     }
589   } while (sctx.newshift);
590 
591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
594 
595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
597   if (row_identity && col_identity) {
598     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
599   } else {
600     C->ops->solve = MatSolve_SeqAIJ;
601   }
602   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
603   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
604   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
605   C->ops->matsolve           = MatMatSolve_SeqAIJ;
606   C->assembled    = PETSC_TRUE;
607   C->preallocated = PETSC_TRUE;
608   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
609 
610   /* MatShiftView(A,info,&sctx) */
611   if (sctx.nshift){
612     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
616     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
617       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
618     }
619   }
620   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
627 {
628   Mat             C=B;
629   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
630   IS              isrow = b->row,isicol = b->icol;
631   PetscErrorCode  ierr;
632   const PetscInt   *r,*ic,*ics;
633   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
634   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636   MatScalar       *pv,*rtmp,*pc,multiplier,d;
637   const MatScalar *v,*aa=a->a;
638   PetscReal       rs=0.0;
639   FactorShiftCtx  sctx;
640   const PetscInt  *ddiag;
641   PetscBool       row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
646 
647   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i=0; i<n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa+ai[i];
655       nz = ai[i+1] - ai[i];
656       for (j=0; j<nz; j++)
657 	rs += PetscAbsScalar(v[j]);
658       if (rs>sctx.shift_top) sctx.shift_top = rs;
659     }
660     sctx.shift_top   *= 1.1;
661     sctx.nshift_max   = 5;
662     sctx.shift_lo     = 0.;
663     sctx.shift_hi     = 1.;
664   }
665 
666   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
667   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
668   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
669   ics  = ic;
670 
671   do {
672     sctx.newshift = PETSC_FALSE;
673     for (i=0; i<n; i++){
674       nz    = bi[i+1] - bi[i];
675       bjtmp = bj + bi[i];
676       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
677 
678       /* load in initial (unfactored row) */
679       nz    = ai[r[i]+1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = aa + ai[r[i]];
682       for (j=0; j<nz; j++) {
683         rtmp[ics[ajtmp[j]]] = v[j];
684       }
685       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
686 
687       row = *bjtmp++;
688       while  (row < i) {
689         pc = rtmp + row;
690         if (*pc != 0.0) {
691           pv         = b->a + diag_offset[row];
692           pj         = b->j + diag_offset[row] + 1;
693           multiplier = *pc / *pv++;
694           *pc        = multiplier;
695           nz         = bi[row+1] - diag_offset[row] - 1;
696           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
698         }
699         row = *bjtmp++;
700       }
701       /* finished row so stick it into b->a */
702       pv   = b->a + bi[i] ;
703       pj   = b->j + bi[i] ;
704       nz   = bi[i+1] - bi[i];
705       diag = diag_offset[i] - bi[i];
706       rs   = 0.0;
707       for (j=0; j<nz; j++) {
708         pv[j] = rtmp[pj[j]];
709         rs   += PetscAbsScalar(pv[j]);
710       }
711       rs   -= PetscAbsScalar(pv[diag]);
712 
713       sctx.rs  = rs;
714       sctx.pv  = pv[diag];
715       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
716       if (sctx.newshift) break;
717       pv[diag] = sctx.pv;
718     }
719 
720     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721       /*
722        * if no shift in this attempt & shifting & started shifting & can refine,
723        * then try lower shift
724        */
725       sctx.shift_hi       = sctx.shift_fraction;
726       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
728       sctx.newshift       = PETSC_TRUE;
729       sctx.nshift++;
730     }
731   } while (sctx.newshift);
732 
733   /* invert diagonal entries for simplier triangular solves */
734   for (i=0; i<n; i++) {
735     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736   }
737   ierr = PetscFree(rtmp);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
739   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
740 
741   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
742   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
743   if (row_identity && col_identity) {
744     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745   } else {
746     C->ops->solve   = MatSolve_SeqAIJ_inplace;
747   }
748   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
749   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
750   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
751   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
752   C->assembled    = PETSC_TRUE;
753   C->preallocated = PETSC_TRUE;
754   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
755   if (sctx.nshift){
756      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
757       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
758     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
759       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
760     }
761   }
762   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
763   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
764   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 /*
769    This routine implements inplace ILU(0) with row or/and column permutations.
770    Input:
771      A - original matrix
772    Output;
773      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774          a->j (col index) is permuted by the inverse of colperm, then sorted
775          a->a reordered accordingly with a->j
776          a->diag (ptr to diagonal elements) is updated.
777 */
778 #undef __FUNCT__
779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781 {
782   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
783   IS             isrow = a->row,isicol = a->icol;
784   PetscErrorCode ierr;
785   const PetscInt *r,*ic,*ics;
786   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
787   PetscInt       *ajtmp,nz,row;
788   PetscInt       *diag = a->diag,nbdiag,*pj;
789   PetscScalar    *rtmp,*pc,multiplier,d;
790   MatScalar      *pv,*v;
791   PetscReal      rs;
792   FactorShiftCtx sctx;
793   const  MatScalar *aa=a->a,*vtmp;
794 
795   PetscFunctionBegin;
796   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
797 
798   /* MatPivotSetUp(): initialize shift context sctx */
799   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
800 
801   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
802     const PetscInt   *ddiag = a->diag;
803     sctx.shift_top = info->zeropivot;
804     for (i=0; i<n; i++) {
805       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
806       d  = (aa)[ddiag[i]];
807       rs = -PetscAbsScalar(d) - PetscRealPart(d);
808       vtmp  = aa+ai[i];
809       nz = ai[i+1] - ai[i];
810       for (j=0; j<nz; j++)
811 	rs += PetscAbsScalar(vtmp[j]);
812       if (rs>sctx.shift_top) sctx.shift_top = rs;
813     }
814     sctx.shift_top   *= 1.1;
815     sctx.nshift_max   = 5;
816     sctx.shift_lo     = 0.;
817     sctx.shift_hi     = 1.;
818   }
819 
820   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
821   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
822   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
823   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
824   ics = ic;
825 
826 #if defined(MV)
827   sctx.shift_top      = 0.;
828   sctx.nshift_max     = 0;
829   sctx.shift_lo       = 0.;
830   sctx.shift_hi       = 0.;
831   sctx.shift_fraction = 0.;
832 
833   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
834     sctx.shift_top = 0.;
835     for (i=0; i<n; i++) {
836       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
837       d  = (a->a)[diag[i]];
838       rs = -PetscAbsScalar(d) - PetscRealPart(d);
839       v  = a->a+ai[i];
840       nz = ai[i+1] - ai[i];
841       for (j=0; j<nz; j++)
842 	rs += PetscAbsScalar(v[j]);
843       if (rs>sctx.shift_top) sctx.shift_top = rs;
844     }
845     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846     sctx.shift_top    *= 1.1;
847     sctx.nshift_max   = 5;
848     sctx.shift_lo     = 0.;
849     sctx.shift_hi     = 1.;
850   }
851 
852   sctx.shift_amount = 0.;
853   sctx.nshift       = 0;
854 #endif
855 
856   do {
857     sctx.newshift = PETSC_FALSE;
858     for (i=0; i<n; i++){
859       /* load in initial unfactored row */
860       nz    = ai[r[i]+1] - ai[r[i]];
861       ajtmp = aj + ai[r[i]];
862       v     = a->a + ai[r[i]];
863       /* sort permuted ajtmp and values v accordingly */
864       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
865       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
866 
867       diag[r[i]] = ai[r[i]];
868       for (j=0; j<nz; j++) {
869         rtmp[ajtmp[j]] = v[j];
870         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871       }
872       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
873 
874       row = *ajtmp++;
875       while  (row < i) {
876         pc = rtmp + row;
877         if (*pc != 0.0) {
878           pv         = a->a + diag[r[row]];
879           pj         = aj + diag[r[row]] + 1;
880 
881           multiplier = *pc / *pv++;
882           *pc        = multiplier;
883           nz         = ai[r[row]+1] - diag[r[row]] - 1;
884           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
886         }
887         row = *ajtmp++;
888       }
889       /* finished row so overwrite it onto a->a */
890       pv   = a->a + ai[r[i]] ;
891       pj   = aj + ai[r[i]] ;
892       nz   = ai[r[i]+1] - ai[r[i]];
893       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894 
895       rs   = 0.0;
896       for (j=0; j<nz; j++) {
897         pv[j] = rtmp[pj[j]];
898         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899       }
900 
901       sctx.rs  = rs;
902       sctx.pv  = pv[nbdiag];
903       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
904       if (sctx.newshift) break;
905       pv[nbdiag] = sctx.pv;
906     }
907 
908     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
909       /*
910        * if no shift in this attempt & shifting & started shifting & can refine,
911        * then try lower shift
912        */
913       sctx.shift_hi        = sctx.shift_fraction;
914       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
915       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
916       sctx.newshift         = PETSC_TRUE;
917       sctx.nshift++;
918     }
919   } while (sctx.newshift);
920 
921   /* invert diagonal entries for simplier triangular solves */
922   for (i=0; i<n; i++) {
923     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
924   }
925 
926   ierr = PetscFree(rtmp);CHKERRQ(ierr);
927   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
928   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
929   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
930   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
931   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
932   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
933   A->assembled = PETSC_TRUE;
934   A->preallocated = PETSC_TRUE;
935   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
936   if (sctx.nshift){
937     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
939     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 /* ----------------------------------------------------------- */
947 #undef __FUNCT__
948 #define __FUNCT__ "MatLUFactor_SeqAIJ"
949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
950 {
951   PetscErrorCode ierr;
952   Mat            C;
953 
954   PetscFunctionBegin;
955   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
956   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
957   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
958   A->ops->solve            = C->ops->solve;
959   A->ops->solvetranspose   = C->ops->solvetranspose;
960   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
961   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 /* ----------------------------------------------------------- */
965 
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
970 {
971   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
972   IS                iscol = a->col,isrow = a->row;
973   PetscErrorCode    ierr;
974   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
975   PetscInt          nz;
976   const PetscInt    *rout,*cout,*r,*c;
977   PetscScalar       *x,*tmp,*tmps,sum;
978   const PetscScalar *b;
979   const MatScalar   *aa = a->a,*v;
980 
981   PetscFunctionBegin;
982   if (!n) PetscFunctionReturn(0);
983 
984   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
985   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
986   tmp  = a->solve_work;
987 
988   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
989   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
990 
991   /* forward solve the lower triangular */
992   tmp[0] = b[*r++];
993   tmps   = tmp;
994   for (i=1; i<n; i++) {
995     v   = aa + ai[i] ;
996     vi  = aj + ai[i] ;
997     nz  = a->diag[i] - ai[i];
998     sum = b[*r++];
999     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1000     tmp[i] = sum;
1001   }
1002 
1003   /* backward solve the upper triangular */
1004   for (i=n-1; i>=0; i--){
1005     v   = aa + a->diag[i] + 1;
1006     vi  = aj + a->diag[i] + 1;
1007     nz  = ai[i+1] - a->diag[i] - 1;
1008     sum = tmp[i];
1009     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1011   }
1012 
1013   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1014   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1015   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1016   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1017   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1024 {
1025   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1026   IS              iscol = a->col,isrow = a->row;
1027   PetscErrorCode  ierr;
1028   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1029   PetscInt        nz,neq;
1030   const PetscInt  *rout,*cout,*r,*c;
1031   PetscScalar     *x,*b,*tmp,*tmps,sum;
1032   const MatScalar *aa = a->a,*v;
1033   PetscBool       bisdense,xisdense;
1034 
1035   PetscFunctionBegin;
1036   if (!n) PetscFunctionReturn(0);
1037 
1038   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1039   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1040   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1041   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1042 
1043   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1044   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1045 
1046   tmp  = a->solve_work;
1047   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1048   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1049 
1050   for (neq=0; neq<B->cmap->n; neq++){
1051     /* forward solve the lower triangular */
1052     tmp[0] = b[r[0]];
1053     tmps   = tmp;
1054     for (i=1; i<n; i++) {
1055       v   = aa + ai[i] ;
1056       vi  = aj + ai[i] ;
1057       nz  = a->diag[i] - ai[i];
1058       sum = b[r[i]];
1059       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060       tmp[i] = sum;
1061     }
1062     /* backward solve the upper triangular */
1063     for (i=n-1; i>=0; i--){
1064       v   = aa + a->diag[i] + 1;
1065       vi  = aj + a->diag[i] + 1;
1066       nz  = ai[i+1] - a->diag[i] - 1;
1067       sum = tmp[i];
1068       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1069       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1070     }
1071 
1072     b += n;
1073     x += n;
1074   }
1075   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1076   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1077   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1078   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1079   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1086 {
1087   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1088   IS              iscol = a->col,isrow = a->row;
1089   PetscErrorCode  ierr;
1090   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1091   PetscInt        nz,neq;
1092   const PetscInt  *rout,*cout,*r,*c;
1093   PetscScalar     *x,*b,*tmp,sum;
1094   const MatScalar *aa = a->a,*v;
1095   PetscBool       bisdense,xisdense;
1096 
1097   PetscFunctionBegin;
1098   if (!n) PetscFunctionReturn(0);
1099 
1100   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1101   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1102   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1103   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1104 
1105   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1106   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1107 
1108   tmp  = a->solve_work;
1109   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1110   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1111 
1112   for (neq=0; neq<B->cmap->n; neq++){
1113     /* forward solve the lower triangular */
1114     tmp[0] = b[r[0]];
1115     v      = aa;
1116     vi     = aj;
1117     for (i=1; i<n; i++) {
1118       nz  = ai[i+1] - ai[i];
1119       sum = b[r[i]];
1120       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1121       tmp[i] = sum;
1122       v += nz; vi += nz;
1123     }
1124 
1125     /* backward solve the upper triangular */
1126     for (i=n-1; i>=0; i--){
1127       v   = aa + adiag[i+1]+1;
1128       vi  = aj + adiag[i+1]+1;
1129       nz  = adiag[i]-adiag[i+1]-1;
1130       sum = tmp[i];
1131       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1132       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1133     }
1134 
1135     b += n;
1136     x += n;
1137   }
1138   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1139   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1140   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1141   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1142   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1149 {
1150   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1151   IS              iscol = a->col,isrow = a->row;
1152   PetscErrorCode  ierr;
1153   const PetscInt  *r,*c,*rout,*cout;
1154   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1155   PetscInt        nz,row;
1156   PetscScalar     *x,*b,*tmp,*tmps,sum;
1157   const MatScalar *aa = a->a,*v;
1158 
1159   PetscFunctionBegin;
1160   if (!n) PetscFunctionReturn(0);
1161 
1162   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1163   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1164   tmp  = a->solve_work;
1165 
1166   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1167   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1168 
1169   /* forward solve the lower triangular */
1170   tmp[0] = b[*r++];
1171   tmps   = tmp;
1172   for (row=1; row<n; row++) {
1173     i   = rout[row]; /* permuted row */
1174     v   = aa + ai[i] ;
1175     vi  = aj + ai[i] ;
1176     nz  = a->diag[i] - ai[i];
1177     sum = b[*r++];
1178     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179     tmp[row] = sum;
1180   }
1181 
1182   /* backward solve the upper triangular */
1183   for (row=n-1; row>=0; row--){
1184     i   = rout[row]; /* permuted row */
1185     v   = aa + a->diag[i] + 1;
1186     vi  = aj + a->diag[i] + 1;
1187     nz  = ai[i+1] - a->diag[i] - 1;
1188     sum = tmp[row];
1189     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1190     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1191   }
1192 
1193   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1194   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1195   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1196   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1197   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /* ----------------------------------------------------------- */
1202 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1206 {
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1208   PetscErrorCode    ierr;
1209   PetscInt          n = A->rmap->n;
1210   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1211   PetscScalar       *x;
1212   const PetscScalar *b;
1213   const MatScalar   *aa = a->a;
1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1215   PetscInt          adiag_i,i,nz,ai_i;
1216   const PetscInt    *vi;
1217   const MatScalar   *v;
1218   PetscScalar       sum;
1219 #endif
1220 
1221   PetscFunctionBegin;
1222   if (!n) PetscFunctionReturn(0);
1223 
1224   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1225   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1226 
1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1228   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1229 #else
1230   /* forward solve the lower triangular */
1231   x[0] = b[0];
1232   for (i=1; i<n; i++) {
1233     ai_i = ai[i];
1234     v    = aa + ai_i;
1235     vi   = aj + ai_i;
1236     nz   = adiag[i] - ai_i;
1237     sum  = b[i];
1238     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1239     x[i] = sum;
1240   }
1241 
1242   /* backward solve the upper triangular */
1243   for (i=n-1; i>=0; i--){
1244     adiag_i = adiag[i];
1245     v       = aa + adiag_i + 1;
1246     vi      = aj + adiag_i + 1;
1247     nz      = ai[i+1] - adiag_i - 1;
1248     sum     = x[i];
1249     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1250     x[i]    = sum*aa[adiag_i];
1251   }
1252 #endif
1253   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1254   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1255   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1262 {
1263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1264   IS                iscol = a->col,isrow = a->row;
1265   PetscErrorCode    ierr;
1266   PetscInt          i, n = A->rmap->n,j;
1267   PetscInt          nz;
1268   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1269   PetscScalar       *x,*tmp,sum;
1270   const PetscScalar *b;
1271   const MatScalar   *aa = a->a,*v;
1272 
1273   PetscFunctionBegin;
1274   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1275 
1276   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1277   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1278   tmp  = a->solve_work;
1279 
1280   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1281   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1282 
1283   /* forward solve the lower triangular */
1284   tmp[0] = b[*r++];
1285   for (i=1; i<n; i++) {
1286     v   = aa + ai[i] ;
1287     vi  = aj + ai[i] ;
1288     nz  = a->diag[i] - ai[i];
1289     sum = b[*r++];
1290     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1291     tmp[i] = sum;
1292   }
1293 
1294   /* backward solve the upper triangular */
1295   for (i=n-1; i>=0; i--){
1296     v   = aa + a->diag[i] + 1;
1297     vi  = aj + a->diag[i] + 1;
1298     nz  = ai[i+1] - a->diag[i] - 1;
1299     sum = tmp[i];
1300     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301     tmp[i] = sum*aa[a->diag[i]];
1302     x[*c--] += tmp[i];
1303   }
1304 
1305   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1306   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1307   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1308   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1309   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1310 
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1317 {
1318   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1319   IS                iscol = a->col,isrow = a->row;
1320   PetscErrorCode    ierr;
1321   PetscInt          i, n = A->rmap->n,j;
1322   PetscInt          nz;
1323   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1324   PetscScalar       *x,*tmp,sum;
1325   const PetscScalar *b;
1326   const MatScalar   *aa = a->a,*v;
1327 
1328   PetscFunctionBegin;
1329   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1330 
1331   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1332   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1333   tmp  = a->solve_work;
1334 
1335   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1336   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1337 
1338   /* forward solve the lower triangular */
1339   tmp[0] = b[r[0]];
1340   v      = aa;
1341   vi     = aj;
1342   for (i=1; i<n; i++) {
1343     nz  = ai[i+1] - ai[i];
1344     sum = b[r[i]];
1345     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346     tmp[i] = sum;
1347     v += nz; vi += nz;
1348   }
1349 
1350   /* backward solve the upper triangular */
1351   v  = aa + adiag[n-1];
1352   vi = aj + adiag[n-1];
1353   for (i=n-1; i>=0; i--){
1354     nz  = adiag[i] - adiag[i+1] - 1;
1355     sum = tmp[i];
1356     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1357     tmp[i] = sum*v[nz];
1358     x[c[i]] += tmp[i];
1359     v += nz+1; vi += nz+1;
1360   }
1361 
1362   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1363   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1365   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1366   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1367 
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1374 {
1375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1376   IS                iscol = a->col,isrow = a->row;
1377   PetscErrorCode    ierr;
1378   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1379   PetscInt          i,n = A->rmap->n,j;
1380   PetscInt          nz;
1381   PetscScalar       *x,*tmp,s1;
1382   const MatScalar   *aa = a->a,*v;
1383   const PetscScalar *b;
1384 
1385   PetscFunctionBegin;
1386   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1388   tmp  = a->solve_work;
1389 
1390   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1391   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1392 
1393   /* copy the b into temp work space according to permutation */
1394   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1395 
1396   /* forward solve the U^T */
1397   for (i=0; i<n; i++) {
1398     v   = aa + diag[i] ;
1399     vi  = aj + diag[i] + 1;
1400     nz  = ai[i+1] - diag[i] - 1;
1401     s1  = tmp[i];
1402     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1403     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1404     tmp[i] = s1;
1405   }
1406 
1407   /* backward solve the L^T */
1408   for (i=n-1; i>=0; i--){
1409     v   = aa + diag[i] - 1 ;
1410     vi  = aj + diag[i] - 1 ;
1411     nz  = diag[i] - ai[i];
1412     s1  = tmp[i];
1413     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1414   }
1415 
1416   /* copy tmp into x according to permutation */
1417   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1418 
1419   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1420   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1421   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1423 
1424   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1431 {
1432   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1433   IS                iscol = a->col,isrow = a->row;
1434   PetscErrorCode    ierr;
1435   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1436   PetscInt          i,n = A->rmap->n,j;
1437   PetscInt          nz;
1438   PetscScalar       *x,*tmp,s1;
1439   const MatScalar   *aa = a->a,*v;
1440   const PetscScalar *b;
1441 
1442   PetscFunctionBegin;
1443   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1444   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1445   tmp  = a->solve_work;
1446 
1447   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1448   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1449 
1450   /* copy the b into temp work space according to permutation */
1451   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1452 
1453   /* forward solve the U^T */
1454   for (i=0; i<n; i++) {
1455     v   = aa + adiag[i+1] + 1;
1456     vi  = aj + adiag[i+1] + 1;
1457     nz  = adiag[i] - adiag[i+1] - 1;
1458     s1  = tmp[i];
1459     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1460     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1461     tmp[i] = s1;
1462   }
1463 
1464   /* backward solve the L^T */
1465   for (i=n-1; i>=0; i--){
1466     v   = aa + ai[i];
1467     vi  = aj + ai[i];
1468     nz  = ai[i+1] - ai[i];
1469     s1  = tmp[i];
1470     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1471   }
1472 
1473   /* copy tmp into x according to permutation */
1474   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1475 
1476   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1477   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1478   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1479   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1480 
1481   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1488 {
1489   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1490   IS                iscol = a->col,isrow = a->row;
1491   PetscErrorCode    ierr;
1492   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1493   PetscInt          i,n = A->rmap->n,j;
1494   PetscInt          nz;
1495   PetscScalar       *x,*tmp,s1;
1496   const MatScalar   *aa = a->a,*v;
1497   const PetscScalar *b;
1498 
1499   PetscFunctionBegin;
1500   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1501   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1502   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1503   tmp  = a->solve_work;
1504 
1505   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1506   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1507 
1508   /* copy the b into temp work space according to permutation */
1509   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1510 
1511   /* forward solve the U^T */
1512   for (i=0; i<n; i++) {
1513     v   = aa + diag[i] ;
1514     vi  = aj + diag[i] + 1;
1515     nz  = ai[i+1] - diag[i] - 1;
1516     s1  = tmp[i];
1517     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1518     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1519     tmp[i] = s1;
1520   }
1521 
1522   /* backward solve the L^T */
1523   for (i=n-1; i>=0; i--){
1524     v   = aa + diag[i] - 1 ;
1525     vi  = aj + diag[i] - 1 ;
1526     nz  = diag[i] - ai[i];
1527     s1  = tmp[i];
1528     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1529   }
1530 
1531   /* copy tmp into x according to permutation */
1532   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1533 
1534   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1535   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1536   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1538 
1539   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1546 {
1547   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1548   IS                iscol = a->col,isrow = a->row;
1549   PetscErrorCode    ierr;
1550   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1551   PetscInt          i,n = A->rmap->n,j;
1552   PetscInt          nz;
1553   PetscScalar       *x,*tmp,s1;
1554   const MatScalar   *aa = a->a,*v;
1555   const PetscScalar *b;
1556 
1557   PetscFunctionBegin;
1558   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1559   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1560   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1561   tmp  = a->solve_work;
1562 
1563   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1564   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1565 
1566   /* copy the b into temp work space according to permutation */
1567   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1568 
1569   /* forward solve the U^T */
1570   for (i=0; i<n; i++) {
1571     v   = aa + adiag[i+1] + 1;
1572     vi  = aj + adiag[i+1] + 1;
1573     nz  = adiag[i] - adiag[i+1] - 1;
1574     s1  = tmp[i];
1575     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1576     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1577     tmp[i] = s1;
1578   }
1579 
1580 
1581   /* backward solve the L^T */
1582   for (i=n-1; i>=0; i--){
1583     v   = aa + ai[i] ;
1584     vi  = aj + ai[i];
1585     nz  = ai[i+1] - ai[i];
1586     s1  = tmp[i];
1587     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1588   }
1589 
1590   /* copy tmp into x according to permutation */
1591   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1592 
1593   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1594   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1595   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1596   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1597 
1598   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* ----------------------------------------------------------------*/
1603 
1604 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1605 
1606 /*
1607    ilu() under revised new data structure.
1608    Factored arrays bj and ba are stored as
1609      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1610 
1611    bi=fact->i is an array of size n+1, in which
1612    bi+
1613      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1614      bi[n]:  points to L(n-1,n-1)+1
1615 
1616   bdiag=fact->diag is an array of size n+1,in which
1617      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1618      bdiag[n]: points to entry of U(n-1,0)-1
1619 
1620    U(i,:) contains bdiag[i] as its last entry, i.e.,
1621     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1622 */
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1626 {
1627 
1628   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1629   PetscErrorCode     ierr;
1630   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1631   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1632   PetscBool          missing;
1633   IS                 isicol;
1634 
1635   PetscFunctionBegin;
1636   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1637   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1638   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1639   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1640 
1641   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1642   b    = (Mat_SeqAIJ*)(fact)->data;
1643 
1644   /* allocate matrix arrays for new data structure */
1645   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1646   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1647   b->singlemalloc = PETSC_TRUE;
1648   if (!b->diag){
1649     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1650     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1651   }
1652   bdiag = b->diag;
1653 
1654   if (n > 0) {
1655     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1656   }
1657 
1658   /* set bi and bj with new data structure */
1659   bi = b->i;
1660   bj = b->j;
1661 
1662   /* L part */
1663   bi[0] = 0;
1664   for (i=0; i<n; i++){
1665     nz = adiag[i] - ai[i];
1666     bi[i+1] = bi[i] + nz;
1667     aj = a->j + ai[i];
1668     for (j=0; j<nz; j++){
1669       /*   *bj = aj[j]; bj++; */
1670       bj[k++] = aj[j];
1671     }
1672   }
1673 
1674   /* U part */
1675   bdiag[n] = bi[n]-1;
1676   for (i=n-1; i>=0; i--){
1677     nz = ai[i+1] - adiag[i] - 1;
1678     aj = a->j + adiag[i] + 1;
1679     for (j=0; j<nz; j++){
1680       /*      *bj = aj[j]; bj++; */
1681       bj[k++] = aj[j];
1682     }
1683     /* diag[i] */
1684     /*    *bj = i; bj++; */
1685     bj[k++] = i;
1686     bdiag[i] = bdiag[i+1] + nz + 1;
1687   }
1688 
1689   fact->factortype             = MAT_FACTOR_ILU;
1690   fact->info.factor_mallocs    = 0;
1691   fact->info.fill_ratio_given  = info->fill;
1692   fact->info.fill_ratio_needed = 1.0;
1693   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1694 
1695   b       = (Mat_SeqAIJ*)(fact)->data;
1696   b->row  = isrow;
1697   b->col  = iscol;
1698   b->icol = isicol;
1699   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1700   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1701   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1707 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1708 {
1709   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1710   IS                 isicol;
1711   PetscErrorCode     ierr;
1712   const PetscInt     *r,*ic;
1713   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1714   PetscInt           *bi,*cols,nnz,*cols_lvl;
1715   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1716   PetscInt           i,levels,diagonal_fill;
1717   PetscBool          col_identity,row_identity;
1718   PetscReal          f;
1719   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1720   PetscBT            lnkbt;
1721   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1722   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1723   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1724 
1725   PetscFunctionBegin;
1726   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1727   /*
1728   PetscBool          olddatastruct=PETSC_FALSE;
1729   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1730   if(olddatastruct){
1731     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1732     PetscFunctionReturn(0);
1733   }
1734   */
1735 
1736   levels = (PetscInt)info->levels;
1737   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1738   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1739 
1740   if (!levels && row_identity && col_identity) {
1741     /* special case: ilu(0) with natural ordering */
1742     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1743     if (a->inode.size) {
1744       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1745     }
1746     PetscFunctionReturn(0);
1747   }
1748 
1749   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1750   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1751   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1752   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1753 
1754   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1756   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1757   bi[0] = bdiag[0] = 0;
1758 
1759   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1760 
1761   /* create a linked list for storing column indices of the active row */
1762   nlnk = n + 1;
1763   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1764 
1765   /* initial FreeSpace size is f*(ai[n]+1) */
1766   f             = info->fill;
1767   diagonal_fill = (PetscInt)info->diagonal_fill;
1768   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1769   current_space = free_space;
1770   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1771   current_space_lvl = free_space_lvl;
1772 
1773   for (i=0; i<n; i++) {
1774     nzi = 0;
1775     /* copy current row into linked list */
1776     nnz  = ai[r[i]+1] - ai[r[i]];
1777     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1778     cols = aj + ai[r[i]];
1779     lnk[i] = -1; /* marker to indicate if diagonal exists */
1780     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1781     nzi += nlnk;
1782 
1783     /* make sure diagonal entry is included */
1784     if (diagonal_fill && lnk[i] == -1) {
1785       fm = n;
1786       while (lnk[fm] < i) fm = lnk[fm];
1787       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1788       lnk[fm]    = i;
1789       lnk_lvl[i] = 0;
1790       nzi++; dcount++;
1791     }
1792 
1793     /* add pivot rows into the active row */
1794     nzbd = 0;
1795     prow = lnk[n];
1796     while (prow < i) {
1797       nnz      = bdiag[prow];
1798       cols     = bj_ptr[prow] + nnz + 1;
1799       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1801       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1802       nzi += nlnk;
1803       prow = lnk[prow];
1804       nzbd++;
1805     }
1806     bdiag[i] = nzbd;
1807     bi[i+1]  = bi[i] + nzi;
1808 
1809     /* if free space is not available, make more free space */
1810     if (current_space->local_remaining<nzi) {
1811       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1812       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1813       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1814       reallocs++;
1815     }
1816 
1817     /* copy data into free_space and free_space_lvl, then initialize lnk */
1818     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1819     bj_ptr[i]    = current_space->array;
1820     bjlvl_ptr[i] = current_space_lvl->array;
1821 
1822     /* make sure the active row i has diagonal entry */
1823     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1824 
1825     current_space->array           += nzi;
1826     current_space->local_used      += nzi;
1827     current_space->local_remaining -= nzi;
1828     current_space_lvl->array           += nzi;
1829     current_space_lvl->local_used      += nzi;
1830     current_space_lvl->local_remaining -= nzi;
1831   }
1832 
1833   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1834   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1835 
1836   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1837   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1838   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1839 
1840   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1841   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1842   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1843 
1844 #if defined(PETSC_USE_INFO)
1845   {
1846     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1847     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1848     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1849     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1850     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1851     if (diagonal_fill) {
1852       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1853     }
1854   }
1855 #endif
1856 
1857   /* put together the new matrix */
1858   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1859   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1860   b = (Mat_SeqAIJ*)(fact)->data;
1861   b->free_a       = PETSC_TRUE;
1862   b->free_ij      = PETSC_TRUE;
1863   b->singlemalloc = PETSC_FALSE;
1864   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1865   b->j          = bj;
1866   b->i          = bi;
1867   b->diag       = bdiag;
1868   b->ilen       = 0;
1869   b->imax       = 0;
1870   b->row        = isrow;
1871   b->col        = iscol;
1872   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1873   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1874   b->icol       = isicol;
1875   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1876   /* In b structure:  Free imax, ilen, old a, old j.
1877      Allocate bdiag, solve_work, new a, new j */
1878   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1879   b->maxnz = b->nz = bdiag[0]+1;
1880   (fact)->info.factor_mallocs    = reallocs;
1881   (fact)->info.fill_ratio_given  = f;
1882   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1883   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1884   if (a->inode.size) {
1885     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1892 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1893 {
1894   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1895   IS                 isicol;
1896   PetscErrorCode     ierr;
1897   const PetscInt     *r,*ic;
1898   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1899   PetscInt           *bi,*cols,nnz,*cols_lvl;
1900   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1901   PetscInt           i,levels,diagonal_fill;
1902   PetscBool          col_identity,row_identity;
1903   PetscReal          f;
1904   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1905   PetscBT            lnkbt;
1906   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1907   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1908   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1909   PetscBool          missing;
1910 
1911   PetscFunctionBegin;
1912   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1913   f             = info->fill;
1914   levels        = (PetscInt)info->levels;
1915   diagonal_fill = (PetscInt)info->diagonal_fill;
1916   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1917 
1918   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1919   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1920   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1921     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1922     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1923     if (a->inode.size) {
1924       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1925     }
1926     fact->factortype = MAT_FACTOR_ILU;
1927     (fact)->info.factor_mallocs    = 0;
1928     (fact)->info.fill_ratio_given  = info->fill;
1929     (fact)->info.fill_ratio_needed = 1.0;
1930     b               = (Mat_SeqAIJ*)(fact)->data;
1931     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1932     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1933     b->row              = isrow;
1934     b->col              = iscol;
1935     b->icol             = isicol;
1936     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1937     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1938     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1939     PetscFunctionReturn(0);
1940   }
1941 
1942   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1943   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1944 
1945   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1946   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1947   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1948   bi[0] = bdiag[0] = 0;
1949 
1950   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1951 
1952   /* create a linked list for storing column indices of the active row */
1953   nlnk = n + 1;
1954   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1955 
1956   /* initial FreeSpace size is f*(ai[n]+1) */
1957   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1958   current_space = free_space;
1959   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1960   current_space_lvl = free_space_lvl;
1961 
1962   for (i=0; i<n; i++) {
1963     nzi = 0;
1964     /* copy current row into linked list */
1965     nnz  = ai[r[i]+1] - ai[r[i]];
1966     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1967     cols = aj + ai[r[i]];
1968     lnk[i] = -1; /* marker to indicate if diagonal exists */
1969     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1970     nzi += nlnk;
1971 
1972     /* make sure diagonal entry is included */
1973     if (diagonal_fill && lnk[i] == -1) {
1974       fm = n;
1975       while (lnk[fm] < i) fm = lnk[fm];
1976       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1977       lnk[fm]    = i;
1978       lnk_lvl[i] = 0;
1979       nzi++; dcount++;
1980     }
1981 
1982     /* add pivot rows into the active row */
1983     nzbd = 0;
1984     prow = lnk[n];
1985     while (prow < i) {
1986       nnz      = bdiag[prow];
1987       cols     = bj_ptr[prow] + nnz + 1;
1988       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1989       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1990       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1991       nzi += nlnk;
1992       prow = lnk[prow];
1993       nzbd++;
1994     }
1995     bdiag[i] = nzbd;
1996     bi[i+1]  = bi[i] + nzi;
1997 
1998     /* if free space is not available, make more free space */
1999     if (current_space->local_remaining<nzi) {
2000       nnz = nzi*(n - i); /* estimated and max additional space needed */
2001       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2002       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2003       reallocs++;
2004     }
2005 
2006     /* copy data into free_space and free_space_lvl, then initialize lnk */
2007     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2008     bj_ptr[i]    = current_space->array;
2009     bjlvl_ptr[i] = current_space_lvl->array;
2010 
2011     /* make sure the active row i has diagonal entry */
2012     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2013 
2014     current_space->array           += nzi;
2015     current_space->local_used      += nzi;
2016     current_space->local_remaining -= nzi;
2017     current_space_lvl->array           += nzi;
2018     current_space_lvl->local_used      += nzi;
2019     current_space_lvl->local_remaining -= nzi;
2020   }
2021 
2022   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2023   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2024 
2025   /* destroy list of free space and other temporary arrays */
2026   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2027   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2028   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2029   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2030   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2031 
2032 #if defined(PETSC_USE_INFO)
2033   {
2034     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2035     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2036     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2037     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2038     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2039     if (diagonal_fill) {
2040       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2041     }
2042   }
2043 #endif
2044 
2045   /* put together the new matrix */
2046   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2047   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2048   b = (Mat_SeqAIJ*)(fact)->data;
2049   b->free_a       = PETSC_TRUE;
2050   b->free_ij      = PETSC_TRUE;
2051   b->singlemalloc = PETSC_FALSE;
2052   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2053   b->j          = bj;
2054   b->i          = bi;
2055   for (i=0; i<n; i++) bdiag[i] += bi[i];
2056   b->diag       = bdiag;
2057   b->ilen       = 0;
2058   b->imax       = 0;
2059   b->row        = isrow;
2060   b->col        = iscol;
2061   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2062   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2063   b->icol       = isicol;
2064   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2065   /* In b structure:  Free imax, ilen, old a, old j.
2066      Allocate bdiag, solve_work, new a, new j */
2067   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2068   b->maxnz             = b->nz = bi[n] ;
2069   (fact)->info.factor_mallocs    = reallocs;
2070   (fact)->info.fill_ratio_given  = f;
2071   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2072   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2073   if (a->inode.size) {
2074     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2081 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2082 {
2083   Mat            C = B;
2084   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2085   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2086   IS             ip=b->row,iip = b->icol;
2087   PetscErrorCode ierr;
2088   const PetscInt *rip,*riip;
2089   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2090   PetscInt       *ai=a->i,*aj=a->j;
2091   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2092   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2093   PetscBool      perm_identity;
2094   FactorShiftCtx sctx;
2095   PetscReal      rs;
2096   MatScalar      d,*v;
2097 
2098   PetscFunctionBegin;
2099   /* MatPivotSetUp(): initialize shift context sctx */
2100   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2101 
2102   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2103     sctx.shift_top = info->zeropivot;
2104     for (i=0; i<mbs; i++) {
2105       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2106       d  = (aa)[a->diag[i]];
2107       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2108       v  = aa+ai[i];
2109       nz = ai[i+1] - ai[i];
2110       for (j=0; j<nz; j++)
2111 	rs += PetscAbsScalar(v[j]);
2112       if (rs>sctx.shift_top) sctx.shift_top = rs;
2113     }
2114     sctx.shift_top   *= 1.1;
2115     sctx.nshift_max   = 5;
2116     sctx.shift_lo     = 0.;
2117     sctx.shift_hi     = 1.;
2118   }
2119 
2120   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2121   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2122 
2123   /* allocate working arrays
2124      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2125      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2126   */
2127   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2128 
2129   do {
2130     sctx.newshift = PETSC_FALSE;
2131 
2132     for (i=0; i<mbs; i++) c2r[i] = mbs;
2133     il[0] = 0;
2134 
2135     for (k = 0; k<mbs; k++){
2136       /* zero rtmp */
2137       nz = bi[k+1] - bi[k];
2138       bjtmp = bj + bi[k];
2139       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2140 
2141       /* load in initial unfactored row */
2142       bval = ba + bi[k];
2143       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2144       for (j = jmin; j < jmax; j++){
2145         col = riip[aj[j]];
2146         if (col >= k){ /* only take upper triangular entry */
2147           rtmp[col] = aa[j];
2148           *bval++   = 0.0; /* for in-place factorization */
2149         }
2150       }
2151       /* shift the diagonal of the matrix: ZeropivotApply() */
2152       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2153 
2154       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155       dk = rtmp[k];
2156       i  = c2r[k]; /* first row to be added to k_th row  */
2157 
2158       while (i < k){
2159         nexti = c2r[i]; /* next row to be added to k_th row */
2160 
2161         /* compute multiplier, update diag(k) and U(i,k) */
2162         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2163         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2164         dk   += uikdi*ba[ili]; /* update diag[k] */
2165         ba[ili] = uikdi; /* -U(i,k) */
2166 
2167         /* add multiple of row i to k-th row */
2168         jmin = ili + 1; jmax = bi[i+1];
2169         if (jmin < jmax){
2170           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2171           /* update il and c2r for row i */
2172           il[i] = jmin;
2173           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2174         }
2175         i = nexti;
2176       }
2177 
2178       /* copy data into U(k,:) */
2179       rs   = 0.0;
2180       jmin = bi[k]; jmax = bi[k+1]-1;
2181       if (jmin < jmax) {
2182         for (j=jmin; j<jmax; j++){
2183           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2184         }
2185         /* add the k-th row into il and c2r */
2186         il[k] = jmin;
2187         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2188       }
2189 
2190       /* MatPivotCheck() */
2191       sctx.rs  = rs;
2192       sctx.pv  = dk;
2193       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2194       if(sctx.newshift) break;
2195       dk = sctx.pv;
2196 
2197       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2198     }
2199   } while (sctx.newshift);
2200 
2201   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2202   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2203   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2204 
2205   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2206   if (perm_identity){
2207     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2208     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2209     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2210     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2211   } else {
2212     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2213     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2214     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2215     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2216   }
2217 
2218   C->assembled    = PETSC_TRUE;
2219   C->preallocated = PETSC_TRUE;
2220   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2221 
2222   /* MatPivotView() */
2223   if (sctx.nshift){
2224     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2225       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2226     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2227       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2228     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2229       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2230     }
2231   }
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2237 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2238 {
2239   Mat            C = B;
2240   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2241   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2242   IS             ip=b->row,iip = b->icol;
2243   PetscErrorCode ierr;
2244   const PetscInt *rip,*riip;
2245   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2246   PetscInt       *ai=a->i,*aj=a->j;
2247   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2248   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2249   PetscBool      perm_identity;
2250   FactorShiftCtx sctx;
2251   PetscReal      rs;
2252   MatScalar      d,*v;
2253 
2254   PetscFunctionBegin;
2255   /* MatPivotSetUp(): initialize shift context sctx */
2256   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2257 
2258   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2259     sctx.shift_top = info->zeropivot;
2260     for (i=0; i<mbs; i++) {
2261       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2262       d  = (aa)[a->diag[i]];
2263       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2264       v  = aa+ai[i];
2265       nz = ai[i+1] - ai[i];
2266       for (j=0; j<nz; j++)
2267 	rs += PetscAbsScalar(v[j]);
2268       if (rs>sctx.shift_top) sctx.shift_top = rs;
2269     }
2270     sctx.shift_top   *= 1.1;
2271     sctx.nshift_max   = 5;
2272     sctx.shift_lo     = 0.;
2273     sctx.shift_hi     = 1.;
2274   }
2275 
2276   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2277   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2278 
2279   /* initialization */
2280   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2281 
2282   do {
2283     sctx.newshift = PETSC_FALSE;
2284 
2285     for (i=0; i<mbs; i++) jl[i] = mbs;
2286     il[0] = 0;
2287 
2288     for (k = 0; k<mbs; k++){
2289       /* zero rtmp */
2290       nz = bi[k+1] - bi[k];
2291       bjtmp = bj + bi[k];
2292       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2293 
2294       bval = ba + bi[k];
2295       /* initialize k-th row by the perm[k]-th row of A */
2296       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2297       for (j = jmin; j < jmax; j++){
2298         col = riip[aj[j]];
2299         if (col >= k){ /* only take upper triangular entry */
2300           rtmp[col] = aa[j];
2301           *bval++  = 0.0; /* for in-place factorization */
2302         }
2303       }
2304       /* shift the diagonal of the matrix */
2305       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2306 
2307       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2308       dk = rtmp[k];
2309       i = jl[k]; /* first row to be added to k_th row  */
2310 
2311       while (i < k){
2312         nexti = jl[i]; /* next row to be added to k_th row */
2313 
2314         /* compute multiplier, update diag(k) and U(i,k) */
2315         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2316         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2317         dk += uikdi*ba[ili];
2318         ba[ili] = uikdi; /* -U(i,k) */
2319 
2320         /* add multiple of row i to k-th row */
2321         jmin = ili + 1; jmax = bi[i+1];
2322         if (jmin < jmax){
2323           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2324           /* update il and jl for row i */
2325           il[i] = jmin;
2326           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2327         }
2328         i = nexti;
2329       }
2330 
2331       /* shift the diagonals when zero pivot is detected */
2332       /* compute rs=sum of abs(off-diagonal) */
2333       rs   = 0.0;
2334       jmin = bi[k]+1;
2335       nz   = bi[k+1] - jmin;
2336       bcol = bj + jmin;
2337       for (j=0; j<nz; j++) {
2338         rs += PetscAbsScalar(rtmp[bcol[j]]);
2339       }
2340 
2341       sctx.rs = rs;
2342       sctx.pv = dk;
2343       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2344       if (sctx.newshift) break;
2345       dk = sctx.pv;
2346 
2347       /* copy data into U(k,:) */
2348       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2349       jmin = bi[k]+1; jmax = bi[k+1];
2350       if (jmin < jmax) {
2351         for (j=jmin; j<jmax; j++){
2352           col = bj[j]; ba[j] = rtmp[col];
2353         }
2354         /* add the k-th row into il and jl */
2355         il[k] = jmin;
2356         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2357       }
2358     }
2359   } while (sctx.newshift);
2360 
2361   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2362   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2363   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2364 
2365   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2366   if (perm_identity){
2367     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371   } else {
2372     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2373     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2374     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2375     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2376   }
2377 
2378   C->assembled    = PETSC_TRUE;
2379   C->preallocated = PETSC_TRUE;
2380   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2381   if (sctx.nshift){
2382     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2383       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2384     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2385       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2386     }
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 /*
2392    icc() under revised new data structure.
2393    Factored arrays bj and ba are stored as
2394      U(0,:),...,U(i,:),U(n-1,:)
2395 
2396    ui=fact->i is an array of size n+1, in which
2397    ui+
2398      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2399      ui[n]:  points to U(n-1,n-1)+1
2400 
2401   udiag=fact->diag is an array of size n,in which
2402      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2403 
2404    U(i,:) contains udiag[i] as its last entry, i.e.,
2405     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2406 */
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2410 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2411 {
2412   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2413   Mat_SeqSBAIJ       *b;
2414   PetscErrorCode     ierr;
2415   PetscBool          perm_identity,missing;
2416   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2417   const PetscInt     *rip,*riip;
2418   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2419   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2420   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2421   PetscReal          fill=info->fill,levels=info->levels;
2422   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2423   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2424   PetscBT            lnkbt;
2425   IS                 iperm;
2426 
2427   PetscFunctionBegin;
2428   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2429   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2430   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2431   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2432   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2433 
2434   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2435   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2436   ui[0] = 0;
2437 
2438   /* ICC(0) without matrix ordering: simply rearrange column indices */
2439   if (!levels && perm_identity) {
2440     for (i=0; i<am; i++) {
2441       ncols    = ai[i+1] - a->diag[i];
2442       ui[i+1]  = ui[i] + ncols;
2443       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2444     }
2445     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2446     cols = uj;
2447     for (i=0; i<am; i++) {
2448       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2449       ncols = ai[i+1] - a->diag[i] -1;
2450       for (j=0; j<ncols; j++) *cols++ = aj[j];
2451       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2452     }
2453   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2454     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2455     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2456 
2457     /* initialization */
2458     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2459 
2460     /* jl: linked list for storing indices of the pivot rows
2461        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2462     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2463     for (i=0; i<am; i++){
2464       jl[i] = am; il[i] = 0;
2465     }
2466 
2467     /* create and initialize a linked list for storing column indices of the active row k */
2468     nlnk = am + 1;
2469     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2470 
2471     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2472     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2473     current_space = free_space;
2474     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2475     current_space_lvl = free_space_lvl;
2476 
2477     for (k=0; k<am; k++){  /* for each active row k */
2478       /* initialize lnk by the column indices of row rip[k] of A */
2479       nzk   = 0;
2480       ncols = ai[rip[k]+1] - ai[rip[k]];
2481       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2482       ncols_upper = 0;
2483       for (j=0; j<ncols; j++){
2484         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2485         if (riip[i] >= k){ /* only take upper triangular entry */
2486           ajtmp[ncols_upper] = i;
2487           ncols_upper++;
2488         }
2489       }
2490       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2491       nzk += nlnk;
2492 
2493       /* update lnk by computing fill-in for each pivot row to be merged in */
2494       prow = jl[k]; /* 1st pivot row */
2495 
2496       while (prow < k){
2497         nextprow = jl[prow];
2498 
2499         /* merge prow into k-th row */
2500         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2501         jmax = ui[prow+1];
2502         ncols = jmax-jmin;
2503         i     = jmin - ui[prow];
2504         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2505         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2506         j     = *(uj - 1);
2507         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2508         nzk += nlnk;
2509 
2510         /* update il and jl for prow */
2511         if (jmin < jmax){
2512           il[prow] = jmin;
2513           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2514         }
2515         prow = nextprow;
2516       }
2517 
2518       /* if free space is not available, make more free space */
2519       if (current_space->local_remaining<nzk) {
2520         i  = am - k + 1; /* num of unfactored rows */
2521         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2522         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2523         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2524         reallocs++;
2525       }
2526 
2527       /* copy data into free_space and free_space_lvl, then initialize lnk */
2528       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2529       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2530 
2531       /* add the k-th row into il and jl */
2532       if (nzk > 1){
2533         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2534         jl[k] = jl[i]; jl[i] = k;
2535         il[k] = ui[k] + 1;
2536       }
2537       uj_ptr[k]     = current_space->array;
2538       uj_lvl_ptr[k] = current_space_lvl->array;
2539 
2540       current_space->array           += nzk;
2541       current_space->local_used      += nzk;
2542       current_space->local_remaining -= nzk;
2543 
2544       current_space_lvl->array           += nzk;
2545       current_space_lvl->local_used      += nzk;
2546       current_space_lvl->local_remaining -= nzk;
2547 
2548       ui[k+1] = ui[k] + nzk;
2549     }
2550 
2551     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2552     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2553     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2554     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2555 
2556     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2557     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2558     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2559     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2560     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2561 
2562   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2563 
2564   /* put together the new matrix in MATSEQSBAIJ format */
2565   b    = (Mat_SeqSBAIJ*)(fact)->data;
2566   b->singlemalloc = PETSC_FALSE;
2567   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2568   b->j    = uj;
2569   b->i    = ui;
2570   b->diag = udiag;
2571   b->free_diag = PETSC_TRUE;
2572   b->ilen = 0;
2573   b->imax = 0;
2574   b->row  = perm;
2575   b->col  = perm;
2576   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2577   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2578   b->icol = iperm;
2579   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2580   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2581   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2582   b->maxnz   = b->nz = ui[am];
2583   b->free_a  = PETSC_TRUE;
2584   b->free_ij = PETSC_TRUE;
2585 
2586   fact->info.factor_mallocs   = reallocs;
2587   fact->info.fill_ratio_given = fill;
2588   if (ai[am] != 0) {
2589     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2590     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2591   } else {
2592     fact->info.fill_ratio_needed = 0.0;
2593   }
2594 #if defined(PETSC_USE_INFO)
2595     if (ai[am] != 0) {
2596       PetscReal af = fact->info.fill_ratio_needed;
2597       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2598       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2599       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2600     } else {
2601       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2602     }
2603 #endif
2604   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2610 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2611 {
2612   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2613   Mat_SeqSBAIJ       *b;
2614   PetscErrorCode     ierr;
2615   PetscBool          perm_identity,missing;
2616   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2617   const PetscInt     *rip,*riip;
2618   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2619   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2620   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2621   PetscReal          fill=info->fill,levels=info->levels;
2622   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2623   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2624   PetscBT            lnkbt;
2625   IS                 iperm;
2626 
2627   PetscFunctionBegin;
2628   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2629   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2630   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2631   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2632   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2633 
2634   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2635   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2636   ui[0] = 0;
2637 
2638   /* ICC(0) without matrix ordering: simply copies fill pattern */
2639   if (!levels && perm_identity) {
2640 
2641     for (i=0; i<am; i++) {
2642       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2643       udiag[i] = ui[i];
2644     }
2645     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2646     cols = uj;
2647     for (i=0; i<am; i++) {
2648       aj    = a->j + a->diag[i];
2649       ncols = ui[i+1] - ui[i];
2650       for (j=0; j<ncols; j++) *cols++ = *aj++;
2651     }
2652   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2653     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2654     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2655 
2656     /* initialization */
2657     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2658 
2659     /* jl: linked list for storing indices of the pivot rows
2660        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2661     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2662     for (i=0; i<am; i++){
2663       jl[i] = am; il[i] = 0;
2664     }
2665 
2666     /* create and initialize a linked list for storing column indices of the active row k */
2667     nlnk = am + 1;
2668     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2669 
2670     /* initial FreeSpace size is fill*(ai[am]+1) */
2671     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2672     current_space = free_space;
2673     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2674     current_space_lvl = free_space_lvl;
2675 
2676     for (k=0; k<am; k++){  /* for each active row k */
2677       /* initialize lnk by the column indices of row rip[k] of A */
2678       nzk   = 0;
2679       ncols = ai[rip[k]+1] - ai[rip[k]];
2680       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2681       ncols_upper = 0;
2682       for (j=0; j<ncols; j++){
2683         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2684         if (riip[i] >= k){ /* only take upper triangular entry */
2685           ajtmp[ncols_upper] = i;
2686           ncols_upper++;
2687         }
2688       }
2689       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2690       nzk += nlnk;
2691 
2692       /* update lnk by computing fill-in for each pivot row to be merged in */
2693       prow = jl[k]; /* 1st pivot row */
2694 
2695       while (prow < k){
2696         nextprow = jl[prow];
2697 
2698         /* merge prow into k-th row */
2699         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2700         jmax = ui[prow+1];
2701         ncols = jmax-jmin;
2702         i     = jmin - ui[prow];
2703         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2704         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2705         j     = *(uj - 1);
2706         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2707         nzk += nlnk;
2708 
2709         /* update il and jl for prow */
2710         if (jmin < jmax){
2711           il[prow] = jmin;
2712           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2713         }
2714         prow = nextprow;
2715       }
2716 
2717       /* if free space is not available, make more free space */
2718       if (current_space->local_remaining<nzk) {
2719         i = am - k + 1; /* num of unfactored rows */
2720         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2721         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2722         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2723         reallocs++;
2724       }
2725 
2726       /* copy data into free_space and free_space_lvl, then initialize lnk */
2727       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2728       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2729 
2730       /* add the k-th row into il and jl */
2731       if (nzk > 1){
2732         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2733         jl[k] = jl[i]; jl[i] = k;
2734         il[k] = ui[k] + 1;
2735       }
2736       uj_ptr[k]     = current_space->array;
2737       uj_lvl_ptr[k] = current_space_lvl->array;
2738 
2739       current_space->array           += nzk;
2740       current_space->local_used      += nzk;
2741       current_space->local_remaining -= nzk;
2742 
2743       current_space_lvl->array           += nzk;
2744       current_space_lvl->local_used      += nzk;
2745       current_space_lvl->local_remaining -= nzk;
2746 
2747       ui[k+1] = ui[k] + nzk;
2748     }
2749 
2750 #if defined(PETSC_USE_INFO)
2751     if (ai[am] != 0) {
2752       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2753       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2754       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2755       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2756     } else {
2757       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2758     }
2759 #endif
2760 
2761     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2762     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2763     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2764     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2765 
2766     /* destroy list of free space and other temporary array(s) */
2767     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2768     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2769     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2770     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2771 
2772   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2773 
2774   /* put together the new matrix in MATSEQSBAIJ format */
2775 
2776   b    = (Mat_SeqSBAIJ*)fact->data;
2777   b->singlemalloc = PETSC_FALSE;
2778   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2779   b->j    = uj;
2780   b->i    = ui;
2781   b->diag = udiag;
2782   b->free_diag = PETSC_TRUE;
2783   b->ilen = 0;
2784   b->imax = 0;
2785   b->row  = perm;
2786   b->col  = perm;
2787   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2788   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2789   b->icol = iperm;
2790   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2791   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2792   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2793   b->maxnz   = b->nz = ui[am];
2794   b->free_a  = PETSC_TRUE;
2795   b->free_ij = PETSC_TRUE;
2796 
2797   fact->info.factor_mallocs    = reallocs;
2798   fact->info.fill_ratio_given  = fill;
2799   if (ai[am] != 0) {
2800     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2801   } else {
2802     fact->info.fill_ratio_needed = 0.0;
2803   }
2804   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2810 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2811 {
2812   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2813   Mat_SeqSBAIJ       *b;
2814   PetscErrorCode     ierr;
2815   PetscBool          perm_identity;
2816   PetscReal          fill = info->fill;
2817   const PetscInt     *rip,*riip;
2818   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2819   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2820   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2821   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2822   PetscBT            lnkbt;
2823   IS                 iperm;
2824 
2825   PetscFunctionBegin;
2826   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2827   /* check whether perm is the identity mapping */
2828   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2829   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2830   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2831   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2832 
2833   /* initialization */
2834   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2835   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2836   ui[0] = 0;
2837 
2838   /* jl: linked list for storing indices of the pivot rows
2839      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2840   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2841   for (i=0; i<am; i++){
2842     jl[i] = am; il[i] = 0;
2843   }
2844 
2845   /* create and initialize a linked list for storing column indices of the active row k */
2846   nlnk = am + 1;
2847   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2848 
2849   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2850   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2851   current_space = free_space;
2852 
2853   for (k=0; k<am; k++){  /* for each active row k */
2854     /* initialize lnk by the column indices of row rip[k] of A */
2855     nzk   = 0;
2856     ncols = ai[rip[k]+1] - ai[rip[k]];
2857     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2858     ncols_upper = 0;
2859     for (j=0; j<ncols; j++){
2860       i = riip[*(aj + ai[rip[k]] + j)];
2861       if (i >= k){ /* only take upper triangular entry */
2862         cols[ncols_upper] = i;
2863         ncols_upper++;
2864       }
2865     }
2866     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2867     nzk += nlnk;
2868 
2869     /* update lnk by computing fill-in for each pivot row to be merged in */
2870     prow = jl[k]; /* 1st pivot row */
2871 
2872     while (prow < k){
2873       nextprow = jl[prow];
2874       /* merge prow into k-th row */
2875       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2876       jmax = ui[prow+1];
2877       ncols = jmax-jmin;
2878       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2879       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2880       nzk += nlnk;
2881 
2882       /* update il and jl for prow */
2883       if (jmin < jmax){
2884         il[prow] = jmin;
2885         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2886       }
2887       prow = nextprow;
2888     }
2889 
2890     /* if free space is not available, make more free space */
2891     if (current_space->local_remaining<nzk) {
2892       i  = am - k + 1; /* num of unfactored rows */
2893       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2894       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2895       reallocs++;
2896     }
2897 
2898     /* copy data into free space, then initialize lnk */
2899     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2900 
2901     /* add the k-th row into il and jl */
2902     if (nzk > 1){
2903       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2904       jl[k] = jl[i]; jl[i] = k;
2905       il[k] = ui[k] + 1;
2906     }
2907     ui_ptr[k] = current_space->array;
2908     current_space->array           += nzk;
2909     current_space->local_used      += nzk;
2910     current_space->local_remaining -= nzk;
2911 
2912     ui[k+1] = ui[k] + nzk;
2913   }
2914 
2915   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2916   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2917   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2918 
2919   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2920   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2921   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2922   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2923 
2924   /* put together the new matrix in MATSEQSBAIJ format */
2925 
2926   b = (Mat_SeqSBAIJ*)fact->data;
2927   b->singlemalloc = PETSC_FALSE;
2928   b->free_a       = PETSC_TRUE;
2929   b->free_ij      = PETSC_TRUE;
2930   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2931   b->j    = uj;
2932   b->i    = ui;
2933   b->diag = udiag;
2934   b->free_diag = PETSC_TRUE;
2935   b->ilen = 0;
2936   b->imax = 0;
2937   b->row  = perm;
2938   b->col  = perm;
2939   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2940   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2941   b->icol = iperm;
2942   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2943   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2944   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2945   b->maxnz = b->nz = ui[am];
2946 
2947   fact->info.factor_mallocs    = reallocs;
2948   fact->info.fill_ratio_given  = fill;
2949   if (ai[am] != 0) {
2950     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2951     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2952   } else {
2953     fact->info.fill_ratio_needed = 0.0;
2954   }
2955 #if defined(PETSC_USE_INFO)
2956   if (ai[am] != 0) {
2957     PetscReal af = fact->info.fill_ratio_needed;
2958     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2959     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2960     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2961   } else {
2962      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2963   }
2964 #endif
2965   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2971 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2972 {
2973   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2974   Mat_SeqSBAIJ       *b;
2975   PetscErrorCode     ierr;
2976   PetscBool          perm_identity;
2977   PetscReal          fill = info->fill;
2978   const PetscInt     *rip,*riip;
2979   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2980   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2981   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2982   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2983   PetscBT            lnkbt;
2984   IS                 iperm;
2985 
2986   PetscFunctionBegin;
2987   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2988   /* check whether perm is the identity mapping */
2989   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2990   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2991   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2992   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2993 
2994   /* initialization */
2995   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2996   ui[0] = 0;
2997 
2998   /* jl: linked list for storing indices of the pivot rows
2999      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3000   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
3001   for (i=0; i<am; i++){
3002     jl[i] = am; il[i] = 0;
3003   }
3004 
3005   /* create and initialize a linked list for storing column indices of the active row k */
3006   nlnk = am + 1;
3007   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3008 
3009   /* initial FreeSpace size is fill*(ai[am]+1) */
3010   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3011   current_space = free_space;
3012 
3013   for (k=0; k<am; k++){  /* for each active row k */
3014     /* initialize lnk by the column indices of row rip[k] of A */
3015     nzk   = 0;
3016     ncols = ai[rip[k]+1] - ai[rip[k]];
3017     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3018     ncols_upper = 0;
3019     for (j=0; j<ncols; j++){
3020       i = riip[*(aj + ai[rip[k]] + j)];
3021       if (i >= k){ /* only take upper triangular entry */
3022         cols[ncols_upper] = i;
3023         ncols_upper++;
3024       }
3025     }
3026     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3027     nzk += nlnk;
3028 
3029     /* update lnk by computing fill-in for each pivot row to be merged in */
3030     prow = jl[k]; /* 1st pivot row */
3031 
3032     while (prow < k){
3033       nextprow = jl[prow];
3034       /* merge prow into k-th row */
3035       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3036       jmax = ui[prow+1];
3037       ncols = jmax-jmin;
3038       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3039       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3040       nzk += nlnk;
3041 
3042       /* update il and jl for prow */
3043       if (jmin < jmax){
3044         il[prow] = jmin;
3045         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3046       }
3047       prow = nextprow;
3048     }
3049 
3050     /* if free space is not available, make more free space */
3051     if (current_space->local_remaining<nzk) {
3052       i = am - k + 1; /* num of unfactored rows */
3053       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3054       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3055       reallocs++;
3056     }
3057 
3058     /* copy data into free space, then initialize lnk */
3059     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3060 
3061     /* add the k-th row into il and jl */
3062     if (nzk-1 > 0){
3063       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3064       jl[k] = jl[i]; jl[i] = k;
3065       il[k] = ui[k] + 1;
3066     }
3067     ui_ptr[k] = current_space->array;
3068     current_space->array           += nzk;
3069     current_space->local_used      += nzk;
3070     current_space->local_remaining -= nzk;
3071 
3072     ui[k+1] = ui[k] + nzk;
3073   }
3074 
3075 #if defined(PETSC_USE_INFO)
3076   if (ai[am] != 0) {
3077     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3078     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3079     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3080     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3081   } else {
3082      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3083   }
3084 #endif
3085 
3086   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3087   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3088   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3089 
3090   /* destroy list of free space and other temporary array(s) */
3091   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3092   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3093   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3094 
3095   /* put together the new matrix in MATSEQSBAIJ format */
3096 
3097   b = (Mat_SeqSBAIJ*)fact->data;
3098   b->singlemalloc = PETSC_FALSE;
3099   b->free_a       = PETSC_TRUE;
3100   b->free_ij      = PETSC_TRUE;
3101   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3102   b->j    = uj;
3103   b->i    = ui;
3104   b->diag = 0;
3105   b->ilen = 0;
3106   b->imax = 0;
3107   b->row  = perm;
3108   b->col  = perm;
3109   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3110   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3111   b->icol = iperm;
3112   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3113   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3114   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3115   b->maxnz = b->nz = ui[am];
3116 
3117   fact->info.factor_mallocs    = reallocs;
3118   fact->info.fill_ratio_given  = fill;
3119   if (ai[am] != 0) {
3120     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3121   } else {
3122     fact->info.fill_ratio_needed = 0.0;
3123   }
3124   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 #undef __FUNCT__
3129 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3130 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3131 {
3132   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3133   PetscErrorCode    ierr;
3134   PetscInt          n = A->rmap->n;
3135   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3136   PetscScalar       *x,sum;
3137   const PetscScalar *b;
3138   const MatScalar   *aa = a->a,*v;
3139   PetscInt          i,nz;
3140 
3141   PetscFunctionBegin;
3142   if (!n) PetscFunctionReturn(0);
3143 
3144   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3145   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3146 
3147   /* forward solve the lower triangular */
3148   x[0] = b[0];
3149   v    = aa;
3150   vi   = aj;
3151   for (i=1; i<n; i++) {
3152     nz  = ai[i+1] - ai[i];
3153     sum = b[i];
3154     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3155     v  += nz;
3156     vi += nz;
3157     x[i] = sum;
3158   }
3159 
3160   /* backward solve the upper triangular */
3161   for (i=n-1; i>=0; i--){
3162     v   = aa + adiag[i+1] + 1;
3163     vi  = aj + adiag[i+1] + 1;
3164     nz = adiag[i] - adiag[i+1]-1;
3165     sum = x[i];
3166     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3167     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3168   }
3169 
3170   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3171   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3172   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatSolve_SeqAIJ"
3178 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3179 {
3180   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3181   IS                iscol = a->col,isrow = a->row;
3182   PetscErrorCode    ierr;
3183   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3184   const PetscInt    *rout,*cout,*r,*c;
3185   PetscScalar       *x,*tmp,sum;
3186   const PetscScalar *b;
3187   const MatScalar   *aa = a->a,*v;
3188 
3189   PetscFunctionBegin;
3190   if (!n) PetscFunctionReturn(0);
3191 
3192   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3193   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3194   tmp  = a->solve_work;
3195 
3196   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3197   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3198 
3199   /* forward solve the lower triangular */
3200   tmp[0] = b[r[0]];
3201   v      = aa;
3202   vi     = aj;
3203   for (i=1; i<n; i++) {
3204     nz  = ai[i+1] - ai[i];
3205     sum = b[r[i]];
3206     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3207     tmp[i] = sum;
3208     v += nz; vi += nz;
3209   }
3210 
3211   /* backward solve the upper triangular */
3212   for (i=n-1; i>=0; i--){
3213     v   = aa + adiag[i+1]+1;
3214     vi  = aj + adiag[i+1]+1;
3215     nz  = adiag[i]-adiag[i+1]-1;
3216     sum = tmp[i];
3217     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3218     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3219   }
3220 
3221   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3222   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3223   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3224   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3225   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3231 /*
3232     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3233 */
3234 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3235 {
3236   Mat                B = *fact;
3237   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3238   IS                 isicol;
3239   PetscErrorCode     ierr;
3240   const PetscInt     *r,*ic;
3241   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3242   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3243   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3244   PetscInt           nlnk,*lnk;
3245   PetscBT            lnkbt;
3246   PetscBool          row_identity,icol_identity;
3247   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3248   const PetscInt     *ics;
3249   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3250   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3251   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3252   PetscBool          missing;
3253 
3254   PetscFunctionBegin;
3255 
3256   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3257   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3258   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3259 
3260   /* ------- symbolic factorization, can be reused ---------*/
3261   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3262   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3263   adiag=a->diag;
3264 
3265   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3266 
3267   /* bdiag is location of diagonal in factor */
3268   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3269   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3270 
3271   /* allocate row pointers bi */
3272   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3273 
3274   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3275   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3276   nnz_max  = ai[n]+2*n*dtcount+2;
3277 
3278   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3279   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3280 
3281   /* put together the new matrix */
3282   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3283   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3284   b    = (Mat_SeqAIJ*)B->data;
3285   b->free_a       = PETSC_TRUE;
3286   b->free_ij      = PETSC_TRUE;
3287   b->singlemalloc = PETSC_FALSE;
3288   b->a          = ba;
3289   b->j          = bj;
3290   b->i          = bi;
3291   b->diag       = bdiag;
3292   b->ilen       = 0;
3293   b->imax       = 0;
3294   b->row        = isrow;
3295   b->col        = iscol;
3296   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3297   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3298   b->icol       = isicol;
3299   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3300 
3301   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3302   b->maxnz = nnz_max;
3303 
3304   B->factortype            = MAT_FACTOR_ILUDT;
3305   B->info.factor_mallocs   = 0;
3306   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3307   CHKMEMQ;
3308   /* ------- end of symbolic factorization ---------*/
3309 
3310   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3311   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3312   ics  = ic;
3313 
3314   /* linked list for storing column indices of the active row */
3315   nlnk = n + 1;
3316   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3317 
3318   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3319   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3320   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3321   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3322   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3323 
3324   bi[0]    = 0;
3325   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3326   bdiag_rev[n] = bdiag[0];
3327   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3328   for (i=0; i<n; i++) {
3329     /* copy initial fill into linked list */
3330     nzi = 0; /* nonzeros for active row i */
3331     nzi = ai[r[i]+1] - ai[r[i]];
3332     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3333     nzi_al = adiag[r[i]] - ai[r[i]];
3334     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3335     ajtmp = aj + ai[r[i]];
3336     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3337 
3338     /* load in initial (unfactored row) */
3339     aatmp = a->a + ai[r[i]];
3340     for (j=0; j<nzi; j++) {
3341       rtmp[ics[*ajtmp++]] = *aatmp++;
3342     }
3343 
3344     /* add pivot rows into linked list */
3345     row = lnk[n];
3346     while (row < i ) {
3347       nzi_bl = bi[row+1] - bi[row] + 1;
3348       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3349       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3350       nzi  += nlnk;
3351       row   = lnk[row];
3352     }
3353 
3354     /* copy data from lnk into jtmp, then initialize lnk */
3355     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3356 
3357     /* numerical factorization */
3358     bjtmp = jtmp;
3359     row   = *bjtmp++; /* 1st pivot row */
3360     while  ( row < i ) {
3361       pc         = rtmp + row;
3362       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3363       multiplier = (*pc) * (*pv);
3364       *pc        = multiplier;
3365       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3366         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3367         pv         = ba + bdiag[row+1] + 1;
3368         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3369         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3370         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3371         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
3372       }
3373       row = *bjtmp++;
3374     }
3375 
3376     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3377     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3378     nzi_bl = 0; j = 0;
3379     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3380       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3381       nzi_bl++; j++;
3382     }
3383     nzi_bu = nzi - nzi_bl -1;
3384     while (j < nzi){
3385       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3386       j++;
3387     }
3388 
3389     bjtmp = bj + bi[i];
3390     batmp = ba + bi[i];
3391     /* apply level dropping rule to L part */
3392     ncut = nzi_al + dtcount;
3393     if (ncut < nzi_bl){
3394       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3395       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3396     } else {
3397       ncut = nzi_bl;
3398     }
3399     for (j=0; j<ncut; j++){
3400       bjtmp[j] = jtmp[j];
3401       batmp[j] = vtmp[j];
3402       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3403     }
3404     bi[i+1] = bi[i] + ncut;
3405     nzi = ncut + 1;
3406 
3407     /* apply level dropping rule to U part */
3408     ncut = nzi_au + dtcount;
3409     if (ncut < nzi_bu){
3410       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3411       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3412     } else {
3413       ncut = nzi_bu;
3414     }
3415     nzi += ncut;
3416 
3417     /* mark bdiagonal */
3418     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3419     bdiag_rev[n-i-1] = bdiag[i+1];
3420     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3421     bjtmp = bj + bdiag[i];
3422     batmp = ba + bdiag[i];
3423     *bjtmp = i;
3424     *batmp = diag_tmp; /* rtmp[i]; */
3425     if (*batmp == 0.0) {
3426       *batmp = dt+shift;
3427       /* printf(" row %d add shift %g\n",i,shift); */
3428     }
3429     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3430     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3431 
3432     bjtmp = bj + bdiag[i+1]+1;
3433     batmp = ba + bdiag[i+1]+1;
3434     for (k=0; k<ncut; k++){
3435       bjtmp[k] = jtmp[nzi_bl+1+k];
3436       batmp[k] = vtmp[nzi_bl+1+k];
3437       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3438     }
3439     /* printf("\n"); */
3440 
3441     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3442     /*
3443     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3444     printf(" ----------------------------\n");
3445     */
3446   } /* for (i=0; i<n; i++) */
3447   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3448   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3449 
3450   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3451   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3452 
3453   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3454   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3455   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3456   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3457 
3458   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3459   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3460 
3461   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3462   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3463   if (row_identity && icol_identity) {
3464     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3465   } else {
3466     B->ops->solve = MatSolve_SeqAIJ;
3467   }
3468 
3469   B->ops->solveadd          = 0;
3470   B->ops->solvetranspose    = 0;
3471   B->ops->solvetransposeadd = 0;
3472   B->ops->matsolve          = 0;
3473   B->assembled              = PETSC_TRUE;
3474   B->preallocated           = PETSC_TRUE;
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 /* a wraper of MatILUDTFactor_SeqAIJ() */
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3481 /*
3482     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3483 */
3484 
3485 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3486 {
3487   PetscErrorCode     ierr;
3488 
3489   PetscFunctionBegin;
3490   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 /*
3495    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3496    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3497 */
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3500 /*
3501     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3502 */
3503 
3504 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3505 {
3506   Mat            C=fact;
3507   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3508   IS             isrow = b->row,isicol = b->icol;
3509   PetscErrorCode ierr;
3510   const PetscInt *r,*ic,*ics;
3511   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3512   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3513   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3514   PetscReal      dt=info->dt,shift=info->shiftamount;
3515   PetscBool      row_identity, col_identity;
3516 
3517   PetscFunctionBegin;
3518   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3519   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3520   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3521   ics  = ic;
3522 
3523   for (i=0; i<n; i++){
3524     /* initialize rtmp array */
3525     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3526     bjtmp = bj + bi[i];
3527     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3528     rtmp[i] = 0.0;
3529     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3530     bjtmp = bj + bdiag[i+1] + 1;
3531     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3532 
3533     /* load in initial unfactored row of A */
3534     /* printf("row %d\n",i); */
3535     nz    = ai[r[i]+1] - ai[r[i]];
3536     ajtmp = aj + ai[r[i]];
3537     v     = aa + ai[r[i]];
3538     for (j=0; j<nz; j++) {
3539       rtmp[ics[*ajtmp++]] = v[j];
3540       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3541     }
3542     /* printf("\n"); */
3543 
3544     /* numerical factorization */
3545     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3546     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3547     k = 0;
3548     while (k < nzl){
3549       row   = *bjtmp++;
3550       /* printf("  prow %d\n",row); */
3551       pc         = rtmp + row;
3552       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3553       multiplier = (*pc) * (*pv);
3554       *pc        = multiplier;
3555       if (PetscAbsScalar(multiplier) > dt){
3556         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3557         pv         = b->a + bdiag[row+1] + 1;
3558         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3559         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3560         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
3561       }
3562       k++;
3563     }
3564 
3565     /* finished row so stick it into b->a */
3566     /* L-part */
3567     pv = b->a + bi[i] ;
3568     pj = bj + bi[i] ;
3569     nzl = bi[i+1] - bi[i];
3570     for (j=0; j<nzl; j++) {
3571       pv[j] = rtmp[pj[j]];
3572       /* printf(" (%d,%g),",pj[j],pv[j]); */
3573     }
3574 
3575     /* diagonal: invert diagonal entries for simplier triangular solves */
3576     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3577     b->a[bdiag[i]] = 1.0/rtmp[i];
3578     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3579 
3580     /* U-part */
3581     pv = b->a + bdiag[i+1] + 1;
3582     pj = bj + bdiag[i+1] + 1;
3583     nzu = bdiag[i] - bdiag[i+1] - 1;
3584     for (j=0; j<nzu; j++) {
3585       pv[j] = rtmp[pj[j]];
3586       /* printf(" (%d,%g),",pj[j],pv[j]); */
3587     }
3588     /* printf("\n"); */
3589   }
3590 
3591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3594 
3595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3597   if (row_identity && col_identity) {
3598     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3599   } else {
3600     C->ops->solve   = MatSolve_SeqAIJ;
3601   }
3602   C->ops->solveadd           = 0;
3603   C->ops->solvetranspose     = 0;
3604   C->ops->solvetransposeadd  = 0;
3605   C->ops->matsolve           = 0;
3606   C->assembled    = PETSC_TRUE;
3607   C->preallocated = PETSC_TRUE;
3608   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3609   PetscFunctionReturn(0);
3610 }
3611