xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 312e372a7cf25c2aab02e3b974a12ea76c65fb8f)
1 #define PETSCMAT_DLL
2 
3 
4 #include "../src/mat/impls/aij/seq/aij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 EXTERN_C_BEGIN
10 #undef __FUNCT__
11 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
12 /*
13       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
14 
15       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
16 */
17 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
18 {
19   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
20   PetscErrorCode    ierr;
21   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
22   const PetscInt    *ai = a->i, *aj = a->j;
23   const PetscScalar *aa = a->a;
24   PetscTruth        *done;
25   PetscReal         best,past = 0,future;
26 
27   PetscFunctionBegin;
28   /* pick initial row */
29   best = -1;
30   for (i=0; i<n; i++) {
31     future = 0.0;
32     for (j=ai[i]; j<ai[i+1]; j++) {
33       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
34     }
35     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
36     if (past/future > best) {
37       best = past/future;
38       current = i;
39     }
40   }
41 
42   ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr);
43   ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr);
44   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
45   order[0] = current;
46   for (i=0; i<n-1; i++) {
47     done[current] = PETSC_TRUE;
48     best          = -1;
49     /* loop over all neighbors of current pivot */
50     for (j=ai[current]; j<ai[current+1]; j++) {
51       jj = aj[j];
52       if (done[jj]) continue;
53       /* loop over columns of potential next row computing weights for below and above diagonal */
54       past = future = 0.0;
55       for (k=ai[jj]; k<ai[jj+1]; k++) {
56         kk = aj[k];
57         if (done[kk]) past += PetscAbsScalar(aa[k]);
58         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
59       }
60       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
61       if (past/future > best) {
62         best = past/future;
63         newcurrent = jj;
64       }
65     }
66     if (best == -1) { /* no neighbors to select from so select best of all that remain */
67       best = -1;
68       for (k=0; k<n; k++) {
69         if (done[k]) continue;
70         future = 0.0;
71         past   = 0.0;
72         for (j=ai[k]; j<ai[k+1]; j++) {
73           kk = aj[j];
74           if (done[kk]) past += PetscAbsScalar(aa[j]);
75           else if (kk != k) future  += PetscAbsScalar(aa[j]);
76         }
77         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
78         if (past/future > best) {
79           best = past/future;
80           newcurrent = k;
81         }
82       }
83     }
84     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
85     current = newcurrent;
86     order[i+1] = current;
87   }
88   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr);
89   *icol = *irow;
90   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
91   ierr = PetscFree(done);CHKERRQ(ierr);
92   ierr = PetscFree(order);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 EXTERN_C_BEGIN
98 #undef __FUNCT__
99 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
100 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
101 {
102   PetscFunctionBegin;
103   *flg = PETSC_TRUE;
104   PetscFunctionReturn(0);
105 }
106 EXTERN_C_END
107 
108 EXTERN_C_BEGIN
109 #undef __FUNCT__
110 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
111 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
112 {
113   PetscInt           n = A->rmap->n;
114   PetscErrorCode     ierr;
115 
116   PetscFunctionBegin;
117   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
119   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
120     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
121     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
122     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
123   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
124     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
125     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
126     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
127     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
128   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
129   (*B)->factortype = ftype;
130   PetscFunctionReturn(0);
131 }
132 EXTERN_C_END
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137 {
138   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
139   IS                 isicol;
140   PetscErrorCode     ierr;
141   const PetscInt     *r,*ic;
142   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
143   PetscInt           *bi,*bj,*ajtmp;
144   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
145   PetscReal          f;
146   PetscInt           nlnk,*lnk,k,**bi_ptr;
147   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
148   PetscBT            lnkbt;
149 
150   PetscFunctionBegin;
151   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
153   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
154   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
155 
156   /* get new row pointers */
157   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
158   bi[0] = 0;
159 
160   /* bdiag is location of diagonal in factor */
161   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
162   bdiag[0] = 0;
163 
164   /* linked list for storing column indices of the active row */
165   nlnk = n + 1;
166   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
167 
168   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
169 
170   /* initial FreeSpace size is f*(ai[n]+1) */
171   f = info->fill;
172   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
173   current_space = free_space;
174 
175   for (i=0; i<n; i++) {
176     /* copy previous fill into linked list */
177     nzi = 0;
178     nnz = ai[r[i]+1] - ai[r[i]];
179     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
180     ajtmp = aj + ai[r[i]];
181     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
182     nzi += nlnk;
183 
184     /* add pivot rows into linked list */
185     row = lnk[n];
186     while (row < i) {
187       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
188       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
189       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
190       nzi += nlnk;
191       row  = lnk[row];
192     }
193     bi[i+1] = bi[i] + nzi;
194     im[i]   = nzi;
195 
196     /* mark bdiag */
197     nzbd = 0;
198     nnz  = nzi;
199     k    = lnk[n];
200     while (nnz-- && k < i){
201       nzbd++;
202       k = lnk[k];
203     }
204     bdiag[i] = bi[i] + nzbd;
205 
206     /* if free space is not available, make more free space */
207     if (current_space->local_remaining<nzi) {
208       nnz = (n - i)*nzi; /* estimated and max additional space needed */
209       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
210       reallocs++;
211     }
212 
213     /* copy data into free space, then initialize lnk */
214     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
215     bi_ptr[i] = current_space->array;
216     current_space->array           += nzi;
217     current_space->local_used      += nzi;
218     current_space->local_remaining -= nzi;
219   }
220 #if defined(PETSC_USE_INFO)
221   if (ai[n] != 0) {
222     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
223     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
224     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
225     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
226     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
227   } else {
228     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
229   }
230 #endif
231 
232   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
233   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
234 
235   /* destroy list of free space and other temporary array(s) */
236   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
237   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
238   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
239   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
240 
241   /* put together the new matrix */
242   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
243   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
244   b    = (Mat_SeqAIJ*)(B)->data;
245   b->free_a       = PETSC_TRUE;
246   b->free_ij      = PETSC_TRUE;
247   b->singlemalloc = PETSC_FALSE;
248   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
249   b->j          = bj;
250   b->i          = bi;
251   b->diag       = bdiag;
252   b->ilen       = 0;
253   b->imax       = 0;
254   b->row        = isrow;
255   b->col        = iscol;
256   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
257   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
258   b->icol       = isicol;
259   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
260 
261   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
262   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
263   b->maxnz = b->nz = bi[n] ;
264 
265   (B)->factortype            = MAT_FACTOR_LU;
266   (B)->info.factor_mallocs   = reallocs;
267   (B)->info.fill_ratio_given = f;
268 
269   if (ai[n]) {
270     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
271   } else {
272     (B)->info.fill_ratio_needed = 0.0;
273   }
274   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
275   if (a->inode.size) {
276     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284 {
285   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
286   IS                 isicol;
287   PetscErrorCode     ierr;
288   const PetscInt     *r,*ic;
289   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
290   PetscInt           *bi,*bj,*ajtmp;
291   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
292   PetscReal          f;
293   PetscInt           nlnk,*lnk,k,**bi_ptr;
294   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
295   PetscBT            lnkbt;
296   PetscTruth         olddatastruct=PETSC_FALSE;
297 
298   PetscFunctionBegin;
299   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
300   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
301   if(olddatastruct){
302     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
303     PetscFunctionReturn(0);
304   }
305 
306   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
308   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
309   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
313   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
319 
320   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
325   current_space = free_space;
326 
327   for (i=0; i<n; i++) {
328     /* copy previous fill into linked list */
329     nzi = 0;
330     nnz = ai[r[i]+1] - ai[r[i]];
331     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
332     ajtmp = aj + ai[r[i]];
333     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i){
339       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
342       nzi  += nlnk;
343       row   = lnk[row];
344     }
345     bi[i+1] = bi[i] + nzi;
346     im[i]   = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i){
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining<nzi) {
360       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
361       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
367     bi_ptr[i] = current_space->array;
368     current_space->array           += nzi;
369     current_space->local_used      += nzi;
370     current_space->local_remaining -= nzi;
371   }
372 #if defined(PETSC_USE_INFO)
373   if (ai[n] != 0) {
374     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
375     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
376     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
377     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
378     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
379   } else {
380     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
381   }
382 #endif
383 
384   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
385   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
386 
387   /* destroy list of free space and other temporary array(s) */
388   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
389   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
390   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
391   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
392 
393   /* put together the new matrix */
394   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
395   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
396   b    = (Mat_SeqAIJ*)(B)->data;
397   b->free_a       = PETSC_TRUE;
398   b->free_ij      = PETSC_TRUE;
399   b->singlemalloc = PETSC_FALSE;
400   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
401   b->j          = bj;
402   b->i          = bi;
403   b->diag       = bdiag;
404   b->ilen       = 0;
405   b->imax       = 0;
406   b->row        = isrow;
407   b->col        = iscol;
408   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
409   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
410   b->icol       = isicol;
411   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
412 
413   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
414   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
415   b->maxnz = b->nz = bdiag[0]+1;
416   B->factortype            = MAT_FACTOR_LU;
417   B->info.factor_mallocs   = reallocs;
418   B->info.fill_ratio_given = f;
419 
420   if (ai[n]) {
421     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
422   } else {
423     B->info.fill_ratio_needed = 0.0;
424   }
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size) {
427     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433     Trouble in factorization, should we dump the original matrix?
434 */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatFactorDumpMatrix"
437 PetscErrorCode MatFactorDumpMatrix(Mat A)
438 {
439   PetscErrorCode ierr;
440   PetscTruth     flg = PETSC_FALSE;
441 
442   PetscFunctionBegin;
443   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
444   if (flg) {
445     PetscViewer viewer;
446     char        filename[PETSC_MAX_PATH_LEN];
447 
448     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
449     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
450     ierr = MatView(A,viewer);CHKERRQ(ierr);
451     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
459 {
460   Mat              C=B;
461   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
462   IS               isrow = b->row,isicol = b->icol;
463   PetscErrorCode   ierr;
464   const PetscInt   *r,*ic,*ics;
465   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466   PetscInt         i,j,k,nz,nzL,row,*pj;
467   const PetscInt   *ajtmp,*bjtmp;
468   MatScalar        *rtmp,*pc,multiplier,*pv;
469   const  MatScalar *aa=a->a,*v;
470   PetscTruth       row_identity,col_identity;
471   FactorShiftCtx   sctx;
472   const PetscInt   *ddiag;
473   PetscReal        rs;
474   MatScalar        d;
475 
476   PetscFunctionBegin;
477   /* MatPivotSetUp(): initialize shift context sctx */
478   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
479 
480   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481     ddiag          = a->diag;
482     sctx.shift_top = info->zeropivot;
483     for (i=0; i<n; i++) {
484       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485       d  = (aa)[ddiag[i]];
486       rs = -PetscAbsScalar(d) - PetscRealPart(d);
487       v  = aa+ai[i];
488       nz = ai[i+1] - ai[i];
489       for (j=0; j<nz; j++)
490 	rs += PetscAbsScalar(v[j]);
491       if (rs>sctx.shift_top) sctx.shift_top = rs;
492     }
493     sctx.shift_top   *= 1.1;
494     sctx.nshift_max   = 5;
495     sctx.shift_lo     = 0.;
496     sctx.shift_hi     = 1.;
497   }
498 
499   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
500   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
501   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
502   ics  = ic;
503 
504   do {
505     sctx.newshift = PETSC_FALSE;
506     for (i=0; i<n; i++){
507       /* zero rtmp */
508       /* L part */
509       nz    = bi[i+1] - bi[i];
510       bjtmp = bj + bi[i];
511       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* U part */
514       nz = bdiag[i]-bdiag[i+1];
515       bjtmp = bj + bdiag[i+1]+1;
516       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* load in initial (unfactored row) */
519       nz    = ai[r[i]+1] - ai[r[i]];
520       ajtmp = aj + ai[r[i]];
521       v     = aa + ai[r[i]];
522       for (j=0; j<nz; j++) {
523         rtmp[ics[ajtmp[j]]] = v[j];
524       }
525       /* ZeropivotApply() */
526       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
527 
528       /* elimination */
529       bjtmp = bj + bi[i];
530       row   = *bjtmp++;
531       nzL   = bi[i+1] - bi[i];
532       for(k=0; k < nzL;k++) {
533         pc = rtmp + row;
534         if (*pc != 0.0) {
535           pv         = b->a + bdiag[row];
536           multiplier = *pc * (*pv);
537           *pc        = multiplier;
538           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539 	  pv = b->a + bdiag[row+1]+1;
540 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv   = b->a + bi[i] ;
551       pj   = b->j + bi[i] ;
552       nz   = bi[i+1] - bi[i];
553       for (j=0; j<nz; j++) {
554         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
555       }
556 
557       /* U part */
558       pv = b->a + bdiag[i+1]+1;
559       pj = b->j + bdiag[i+1]+1;
560       nz = bdiag[i] - bdiag[i+1]-1;
561       for (j=0; j<nz; j++) {
562         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563       }
564 
565       sctx.rs  = rs;
566       sctx.pv  = rtmp[i];
567       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
568       if(sctx.newshift) break; /* break for-loop */
569       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
570 
571       /* Mark diagonal and invert diagonal for simplier triangular solves */
572       pv  = b->a + bdiag[i];
573       *pv = 1.0/rtmp[i];
574 
575     } /* endof for (i=0; i<n; i++){ */
576 
577     /* MatPivotRefine() */
578     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
579       /*
580        * if no shift in this attempt & shifting & started shifting & can refine,
581        * then try lower shift
582        */
583       sctx.shift_hi       = sctx.shift_fraction;
584       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
585       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
586       sctx.newshift       = PETSC_TRUE;
587       sctx.nshift++;
588     }
589   } while (sctx.newshift);
590 
591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
594 
595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
597   if (row_identity && col_identity) {
598     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
599   } else {
600     C->ops->solve = MatSolve_SeqAIJ;
601   }
602   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
603   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
604   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
605   C->ops->matsolve           = MatMatSolve_SeqAIJ;
606   C->assembled    = PETSC_TRUE;
607   C->preallocated = PETSC_TRUE;
608   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
609 
610   /* MatShiftView(A,info,&sctx) */
611   if (sctx.nshift){
612     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
616     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
617       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
618     }
619   }
620   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
627 {
628   Mat             C=B;
629   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
630   IS              isrow = b->row,isicol = b->icol;
631   PetscErrorCode  ierr;
632   const PetscInt   *r,*ic,*ics;
633   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
634   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636   MatScalar       *pv,*rtmp,*pc,multiplier,d;
637   const MatScalar *v,*aa=a->a;
638   PetscReal       rs=0.0;
639   FactorShiftCtx  sctx;
640   const PetscInt  *ddiag;
641   PetscTruth      row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
646 
647   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i=0; i<n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa+ai[i];
655       nz = ai[i+1] - ai[i];
656       for (j=0; j<nz; j++)
657 	rs += PetscAbsScalar(v[j]);
658       if (rs>sctx.shift_top) sctx.shift_top = rs;
659     }
660     sctx.shift_top   *= 1.1;
661     sctx.nshift_max   = 5;
662     sctx.shift_lo     = 0.;
663     sctx.shift_hi     = 1.;
664   }
665 
666   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
667   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
668   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
669   ics  = ic;
670 
671   do {
672     sctx.newshift = PETSC_FALSE;
673     for (i=0; i<n; i++){
674       nz    = bi[i+1] - bi[i];
675       bjtmp = bj + bi[i];
676       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
677 
678       /* load in initial (unfactored row) */
679       nz    = ai[r[i]+1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = aa + ai[r[i]];
682       for (j=0; j<nz; j++) {
683         rtmp[ics[ajtmp[j]]] = v[j];
684       }
685       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
686 
687       row = *bjtmp++;
688       while  (row < i) {
689         pc = rtmp + row;
690         if (*pc != 0.0) {
691           pv         = b->a + diag_offset[row];
692           pj         = b->j + diag_offset[row] + 1;
693           multiplier = *pc / *pv++;
694           *pc        = multiplier;
695           nz         = bi[row+1] - diag_offset[row] - 1;
696           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
698         }
699         row = *bjtmp++;
700       }
701       /* finished row so stick it into b->a */
702       pv   = b->a + bi[i] ;
703       pj   = b->j + bi[i] ;
704       nz   = bi[i+1] - bi[i];
705       diag = diag_offset[i] - bi[i];
706       rs   = 0.0;
707       for (j=0; j<nz; j++) {
708         pv[j] = rtmp[pj[j]];
709         rs   += PetscAbsScalar(pv[j]);
710       }
711       rs   -= PetscAbsScalar(pv[diag]);
712 
713       sctx.rs  = rs;
714       sctx.pv  = pv[diag];
715       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
716       if (sctx.newshift) break;
717       pv[diag] = sctx.pv;
718     }
719 
720     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721       /*
722        * if no shift in this attempt & shifting & started shifting & can refine,
723        * then try lower shift
724        */
725       sctx.shift_hi       = sctx.shift_fraction;
726       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
728       sctx.newshift       = PETSC_TRUE;
729       sctx.nshift++;
730     }
731   } while (sctx.newshift);
732 
733   /* invert diagonal entries for simplier triangular solves */
734   for (i=0; i<n; i++) {
735     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736   }
737   ierr = PetscFree(rtmp);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
739   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
740 
741   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
742   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
743   if (row_identity && col_identity) {
744     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745   } else {
746     C->ops->solve   = MatSolve_SeqAIJ_inplace;
747   }
748   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
749   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
750   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
751   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
752   C->assembled    = PETSC_TRUE;
753   C->preallocated = PETSC_TRUE;
754   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
755   if (sctx.nshift){
756      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
757       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
758     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
759       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
760     }
761   }
762   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
763   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
764   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 /*
769    This routine implements inplace ILU(0) with row or/and column permutations.
770    Input:
771      A - original matrix
772    Output;
773      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774          a->j (col index) is permuted by the inverse of colperm, then sorted
775          a->a reordered accordingly with a->j
776          a->diag (ptr to diagonal elements) is updated.
777 */
778 #undef __FUNCT__
779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781 {
782   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
783   IS             isrow = a->row,isicol = a->icol;
784   PetscErrorCode ierr;
785   const PetscInt *r,*ic,*ics;
786   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
787   PetscInt       *ajtmp,nz,row;
788   PetscInt       *diag = a->diag,nbdiag,*pj;
789   PetscScalar    *rtmp,*pc,multiplier,d;
790   MatScalar      *pv,*v;
791   PetscReal      rs;
792   FactorShiftCtx sctx;
793   const  MatScalar *aa=a->a,*vtmp;
794 
795   PetscFunctionBegin;
796   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
797 
798   /* MatPivotSetUp(): initialize shift context sctx */
799   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
800 
801   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
802     const PetscInt   *ddiag = a->diag;
803     sctx.shift_top = info->zeropivot;
804     for (i=0; i<n; i++) {
805       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
806       d  = (aa)[ddiag[i]];
807       rs = -PetscAbsScalar(d) - PetscRealPart(d);
808       vtmp  = aa+ai[i];
809       nz = ai[i+1] - ai[i];
810       for (j=0; j<nz; j++)
811 	rs += PetscAbsScalar(vtmp[j]);
812       if (rs>sctx.shift_top) sctx.shift_top = rs;
813     }
814     sctx.shift_top   *= 1.1;
815     sctx.nshift_max   = 5;
816     sctx.shift_lo     = 0.;
817     sctx.shift_hi     = 1.;
818   }
819 
820   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
821   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
822   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
823   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
824   ics = ic;
825 
826 #if defined(MV)
827   sctx.shift_top      = 0.;
828   sctx.nshift_max     = 0;
829   sctx.shift_lo       = 0.;
830   sctx.shift_hi       = 0.;
831   sctx.shift_fraction = 0.;
832 
833   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
834     sctx.shift_top = 0.;
835     for (i=0; i<n; i++) {
836       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
837       d  = (a->a)[diag[i]];
838       rs = -PetscAbsScalar(d) - PetscRealPart(d);
839       v  = a->a+ai[i];
840       nz = ai[i+1] - ai[i];
841       for (j=0; j<nz; j++)
842 	rs += PetscAbsScalar(v[j]);
843       if (rs>sctx.shift_top) sctx.shift_top = rs;
844     }
845     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846     sctx.shift_top    *= 1.1;
847     sctx.nshift_max   = 5;
848     sctx.shift_lo     = 0.;
849     sctx.shift_hi     = 1.;
850   }
851 
852   sctx.shift_amount = 0.;
853   sctx.nshift       = 0;
854 #endif
855 
856   do {
857     sctx.newshift = PETSC_FALSE;
858     for (i=0; i<n; i++){
859       /* load in initial unfactored row */
860       nz    = ai[r[i]+1] - ai[r[i]];
861       ajtmp = aj + ai[r[i]];
862       v     = a->a + ai[r[i]];
863       /* sort permuted ajtmp and values v accordingly */
864       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
865       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
866 
867       diag[r[i]] = ai[r[i]];
868       for (j=0; j<nz; j++) {
869         rtmp[ajtmp[j]] = v[j];
870         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871       }
872       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
873 
874       row = *ajtmp++;
875       while  (row < i) {
876         pc = rtmp + row;
877         if (*pc != 0.0) {
878           pv         = a->a + diag[r[row]];
879           pj         = aj + diag[r[row]] + 1;
880 
881           multiplier = *pc / *pv++;
882           *pc        = multiplier;
883           nz         = ai[r[row]+1] - diag[r[row]] - 1;
884           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
886         }
887         row = *ajtmp++;
888       }
889       /* finished row so overwrite it onto a->a */
890       pv   = a->a + ai[r[i]] ;
891       pj   = aj + ai[r[i]] ;
892       nz   = ai[r[i]+1] - ai[r[i]];
893       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894 
895       rs   = 0.0;
896       for (j=0; j<nz; j++) {
897         pv[j] = rtmp[pj[j]];
898         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899       }
900 
901       sctx.rs  = rs;
902       sctx.pv  = pv[nbdiag];
903       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
904       if (sctx.newshift) break;
905       pv[nbdiag] = sctx.pv;
906     }
907 
908     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
909       /*
910        * if no shift in this attempt & shifting & started shifting & can refine,
911        * then try lower shift
912        */
913       sctx.shift_hi        = sctx.shift_fraction;
914       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
915       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
916       sctx.newshift         = PETSC_TRUE;
917       sctx.nshift++;
918     }
919   } while (sctx.newshift);
920 
921   /* invert diagonal entries for simplier triangular solves */
922   for (i=0; i<n; i++) {
923     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
924   }
925 
926   ierr = PetscFree(rtmp);CHKERRQ(ierr);
927   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
928   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
929   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
930   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
931   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
932   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
933   A->assembled = PETSC_TRUE;
934   A->preallocated = PETSC_TRUE;
935   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
936   if (sctx.nshift){
937     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
939     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 /* ----------------------------------------------------------- */
947 #undef __FUNCT__
948 #define __FUNCT__ "MatLUFactor_SeqAIJ"
949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
950 {
951   PetscErrorCode ierr;
952   Mat            C;
953 
954   PetscFunctionBegin;
955   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
956   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
957   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
958   A->ops->solve            = C->ops->solve;
959   A->ops->solvetranspose   = C->ops->solvetranspose;
960   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
961   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 /* ----------------------------------------------------------- */
965 
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
970 {
971   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
972   IS                iscol = a->col,isrow = a->row;
973   PetscErrorCode    ierr;
974   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
975   PetscInt          nz;
976   const PetscInt    *rout,*cout,*r,*c;
977   PetscScalar       *x,*tmp,*tmps,sum;
978   const PetscScalar *b;
979   const MatScalar   *aa = a->a,*v;
980 
981   PetscFunctionBegin;
982   if (!n) PetscFunctionReturn(0);
983 
984   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
985   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
986   tmp  = a->solve_work;
987 
988   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
989   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
990 
991   /* forward solve the lower triangular */
992   tmp[0] = b[*r++];
993   tmps   = tmp;
994   for (i=1; i<n; i++) {
995     v   = aa + ai[i] ;
996     vi  = aj + ai[i] ;
997     nz  = a->diag[i] - ai[i];
998     sum = b[*r++];
999     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1000     tmp[i] = sum;
1001   }
1002 
1003   /* backward solve the upper triangular */
1004   for (i=n-1; i>=0; i--){
1005     v   = aa + a->diag[i] + 1;
1006     vi  = aj + a->diag[i] + 1;
1007     nz  = ai[i+1] - a->diag[i] - 1;
1008     sum = tmp[i];
1009     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1011   }
1012 
1013   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1014   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1015   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1016   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1017   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1024 {
1025   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1026   IS              iscol = a->col,isrow = a->row;
1027   PetscErrorCode  ierr;
1028   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1029   PetscInt        nz,neq;
1030   const PetscInt  *rout,*cout,*r,*c;
1031   PetscScalar     *x,*b,*tmp,*tmps,sum;
1032   const MatScalar *aa = a->a,*v;
1033   PetscTruth      bisdense,xisdense;
1034 
1035   PetscFunctionBegin;
1036   if (!n) PetscFunctionReturn(0);
1037 
1038   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1039   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1040   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1041   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1042 
1043   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1044   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1045 
1046   tmp  = a->solve_work;
1047   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1048   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1049 
1050   for (neq=0; neq<B->cmap->n; neq++){
1051     /* forward solve the lower triangular */
1052     tmp[0] = b[r[0]];
1053     tmps   = tmp;
1054     for (i=1; i<n; i++) {
1055       v   = aa + ai[i] ;
1056       vi  = aj + ai[i] ;
1057       nz  = a->diag[i] - ai[i];
1058       sum = b[r[i]];
1059       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060       tmp[i] = sum;
1061     }
1062     /* backward solve the upper triangular */
1063     for (i=n-1; i>=0; i--){
1064       v   = aa + a->diag[i] + 1;
1065       vi  = aj + a->diag[i] + 1;
1066       nz  = ai[i+1] - a->diag[i] - 1;
1067       sum = tmp[i];
1068       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1069       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1070     }
1071 
1072     b += n;
1073     x += n;
1074   }
1075   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1076   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1077   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1078   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1079   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1086 {
1087   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1088   IS              iscol = a->col,isrow = a->row;
1089   PetscErrorCode  ierr;
1090   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1091   PetscInt        nz,neq;
1092   const PetscInt  *rout,*cout,*r,*c;
1093   PetscScalar     *x,*b,*tmp,sum;
1094   const MatScalar *aa = a->a,*v;
1095   PetscTruth      bisdense,xisdense;
1096 
1097   PetscFunctionBegin;
1098   if (!n) PetscFunctionReturn(0);
1099 
1100   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1101   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1102   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1103   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1104 
1105   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1106   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1107 
1108   tmp  = a->solve_work;
1109   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1110   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1111 
1112   for (neq=0; neq<B->cmap->n; neq++){
1113     /* forward solve the lower triangular */
1114     tmp[0] = b[r[0]];
1115     v      = aa;
1116     vi     = aj;
1117     for (i=1; i<n; i++) {
1118       nz  = ai[i+1] - ai[i];
1119       sum = b[r[i]];
1120       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1121       tmp[i] = sum;
1122       v += nz; vi += nz;
1123     }
1124 
1125     /* backward solve the upper triangular */
1126     for (i=n-1; i>=0; i--){
1127       v   = aa + adiag[i+1]+1;
1128       vi  = aj + adiag[i+1]+1;
1129       nz  = adiag[i]-adiag[i+1]-1;
1130       sum = tmp[i];
1131       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1132       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1133     }
1134 
1135     b += n;
1136     x += n;
1137   }
1138   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1139   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1140   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1141   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1142   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1149 {
1150   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1151   IS              iscol = a->col,isrow = a->row;
1152   PetscErrorCode  ierr;
1153   const PetscInt  *r,*c,*rout,*cout;
1154   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1155   PetscInt        nz,row;
1156   PetscScalar     *x,*b,*tmp,*tmps,sum;
1157   const MatScalar *aa = a->a,*v;
1158 
1159   PetscFunctionBegin;
1160   if (!n) PetscFunctionReturn(0);
1161 
1162   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1163   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1164   tmp  = a->solve_work;
1165 
1166   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1167   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1168 
1169   /* forward solve the lower triangular */
1170   tmp[0] = b[*r++];
1171   tmps   = tmp;
1172   for (row=1; row<n; row++) {
1173     i   = rout[row]; /* permuted row */
1174     v   = aa + ai[i] ;
1175     vi  = aj + ai[i] ;
1176     nz  = a->diag[i] - ai[i];
1177     sum = b[*r++];
1178     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179     tmp[row] = sum;
1180   }
1181 
1182   /* backward solve the upper triangular */
1183   for (row=n-1; row>=0; row--){
1184     i   = rout[row]; /* permuted row */
1185     v   = aa + a->diag[i] + 1;
1186     vi  = aj + a->diag[i] + 1;
1187     nz  = ai[i+1] - a->diag[i] - 1;
1188     sum = tmp[row];
1189     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1190     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1191   }
1192 
1193   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1194   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1195   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1196   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1197   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /* ----------------------------------------------------------- */
1202 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h"
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1206 {
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1208   PetscErrorCode    ierr;
1209   PetscInt          n = A->rmap->n;
1210   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1211   PetscScalar       *x;
1212   const PetscScalar *b;
1213   const MatScalar   *aa = a->a;
1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1215   PetscInt          adiag_i,i,nz,ai_i;
1216   const PetscInt    *vi;
1217   const MatScalar   *v;
1218   PetscScalar       sum;
1219 #endif
1220 
1221   PetscFunctionBegin;
1222   if (!n) PetscFunctionReturn(0);
1223 
1224   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1225   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1226 
1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1228   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1229 #else
1230   /* forward solve the lower triangular */
1231   x[0] = b[0];
1232   for (i=1; i<n; i++) {
1233     ai_i = ai[i];
1234     v    = aa + ai_i;
1235     vi   = aj + ai_i;
1236     nz   = adiag[i] - ai_i;
1237     sum  = b[i];
1238     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1239     x[i] = sum;
1240   }
1241 
1242   /* backward solve the upper triangular */
1243   for (i=n-1; i>=0; i--){
1244     adiag_i = adiag[i];
1245     v       = aa + adiag_i + 1;
1246     vi      = aj + adiag_i + 1;
1247     nz      = ai[i+1] - adiag_i - 1;
1248     sum     = x[i];
1249     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1250     x[i]    = sum*aa[adiag_i];
1251   }
1252 #endif
1253   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1254   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1255   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1262 {
1263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1264   IS                iscol = a->col,isrow = a->row;
1265   PetscErrorCode    ierr;
1266   PetscInt          i, n = A->rmap->n,j;
1267   PetscInt          nz;
1268   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1269   PetscScalar       *x,*tmp,sum;
1270   const PetscScalar *b;
1271   const MatScalar   *aa = a->a,*v;
1272 
1273   PetscFunctionBegin;
1274   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1275 
1276   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1277   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1278   tmp  = a->solve_work;
1279 
1280   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1281   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1282 
1283   /* forward solve the lower triangular */
1284   tmp[0] = b[*r++];
1285   for (i=1; i<n; i++) {
1286     v   = aa + ai[i] ;
1287     vi  = aj + ai[i] ;
1288     nz  = a->diag[i] - ai[i];
1289     sum = b[*r++];
1290     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1291     tmp[i] = sum;
1292   }
1293 
1294   /* backward solve the upper triangular */
1295   for (i=n-1; i>=0; i--){
1296     v   = aa + a->diag[i] + 1;
1297     vi  = aj + a->diag[i] + 1;
1298     nz  = ai[i+1] - a->diag[i] - 1;
1299     sum = tmp[i];
1300     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301     tmp[i] = sum*aa[a->diag[i]];
1302     x[*c--] += tmp[i];
1303   }
1304 
1305   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1306   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1307   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1308   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1309   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1310 
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1317 {
1318   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1319   IS                iscol = a->col,isrow = a->row;
1320   PetscErrorCode    ierr;
1321   PetscInt          i, n = A->rmap->n,j;
1322   PetscInt          nz;
1323   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1324   PetscScalar       *x,*tmp,sum;
1325   const PetscScalar *b;
1326   const MatScalar   *aa = a->a,*v;
1327 
1328   PetscFunctionBegin;
1329   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1330 
1331   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1332   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1333   tmp  = a->solve_work;
1334 
1335   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1336   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1337 
1338   /* forward solve the lower triangular */
1339   tmp[0] = b[r[0]];
1340   v      = aa;
1341   vi     = aj;
1342   for (i=1; i<n; i++) {
1343     nz  = ai[i+1] - ai[i];
1344     sum = b[r[i]];
1345     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346     tmp[i] = sum;
1347     v += nz; vi += nz;
1348   }
1349 
1350   /* backward solve the upper triangular */
1351   v  = aa + adiag[n-1];
1352   vi = aj + adiag[n-1];
1353   for (i=n-1; i>=0; i--){
1354     nz  = adiag[i] - adiag[i+1] - 1;
1355     sum = tmp[i];
1356     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1357     tmp[i] = sum*v[nz];
1358     x[c[i]] += tmp[i];
1359     v += nz+1; vi += nz+1;
1360   }
1361 
1362   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1363   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1365   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1366   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1367 
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1374 {
1375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1376   IS                iscol = a->col,isrow = a->row;
1377   PetscErrorCode    ierr;
1378   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1379   PetscInt          i,n = A->rmap->n,j;
1380   PetscInt          nz;
1381   PetscScalar       *x,*tmp,s1;
1382   const MatScalar   *aa = a->a,*v;
1383   const PetscScalar *b;
1384 
1385   PetscFunctionBegin;
1386   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1388   tmp  = a->solve_work;
1389 
1390   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1391   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1392 
1393   /* copy the b into temp work space according to permutation */
1394   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1395 
1396   /* forward solve the U^T */
1397   for (i=0; i<n; i++) {
1398     v   = aa + diag[i] ;
1399     vi  = aj + diag[i] + 1;
1400     nz  = ai[i+1] - diag[i] - 1;
1401     s1  = tmp[i];
1402     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1403     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1404     tmp[i] = s1;
1405   }
1406 
1407   /* backward solve the L^T */
1408   for (i=n-1; i>=0; i--){
1409     v   = aa + diag[i] - 1 ;
1410     vi  = aj + diag[i] - 1 ;
1411     nz  = diag[i] - ai[i];
1412     s1  = tmp[i];
1413     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1414   }
1415 
1416   /* copy tmp into x according to permutation */
1417   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1418 
1419   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1420   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1421   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1423 
1424   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1431 {
1432   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1433   IS                iscol = a->col,isrow = a->row;
1434   PetscErrorCode    ierr;
1435   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1436   PetscInt          i,n = A->rmap->n,j;
1437   PetscInt          nz;
1438   PetscScalar       *x,*tmp,s1;
1439   const MatScalar   *aa = a->a,*v;
1440   const PetscScalar *b;
1441 
1442   PetscFunctionBegin;
1443   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1444   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1445   tmp  = a->solve_work;
1446 
1447   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1448   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1449 
1450   /* copy the b into temp work space according to permutation */
1451   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1452 
1453   /* forward solve the U^T */
1454   for (i=0; i<n; i++) {
1455     v   = aa + adiag[i+1] + 1;
1456     vi  = aj + adiag[i+1] + 1;
1457     nz  = adiag[i] - adiag[i+1] - 1;
1458     s1  = tmp[i];
1459     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1460     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1461     tmp[i] = s1;
1462   }
1463 
1464   /* backward solve the L^T */
1465   for (i=n-1; i>=0; i--){
1466     v   = aa + ai[i];
1467     vi  = aj + ai[i];
1468     nz  = ai[i+1] - ai[i];
1469     s1  = tmp[i];
1470     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1471   }
1472 
1473   /* copy tmp into x according to permutation */
1474   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1475 
1476   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1477   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1478   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1479   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1480 
1481   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1488 {
1489   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1490   IS                iscol = a->col,isrow = a->row;
1491   PetscErrorCode    ierr;
1492   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1493   PetscInt          i,n = A->rmap->n,j;
1494   PetscInt          nz;
1495   PetscScalar       *x,*tmp,s1;
1496   const MatScalar   *aa = a->a,*v;
1497   const PetscScalar *b;
1498 
1499   PetscFunctionBegin;
1500   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1501   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1502   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1503   tmp  = a->solve_work;
1504 
1505   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1506   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1507 
1508   /* copy the b into temp work space according to permutation */
1509   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1510 
1511   /* forward solve the U^T */
1512   for (i=0; i<n; i++) {
1513     v   = aa + diag[i] ;
1514     vi  = aj + diag[i] + 1;
1515     nz  = ai[i+1] - diag[i] - 1;
1516     s1  = tmp[i];
1517     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1518     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1519     tmp[i] = s1;
1520   }
1521 
1522   /* backward solve the L^T */
1523   for (i=n-1; i>=0; i--){
1524     v   = aa + diag[i] - 1 ;
1525     vi  = aj + diag[i] - 1 ;
1526     nz  = diag[i] - ai[i];
1527     s1  = tmp[i];
1528     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1529   }
1530 
1531   /* copy tmp into x according to permutation */
1532   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1533 
1534   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1535   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1536   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1538 
1539   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1546 {
1547   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1548   IS                iscol = a->col,isrow = a->row;
1549   PetscErrorCode    ierr;
1550   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1551   PetscInt          i,n = A->rmap->n,j;
1552   PetscInt          nz;
1553   PetscScalar       *x,*tmp,s1;
1554   const MatScalar   *aa = a->a,*v;
1555   const PetscScalar *b;
1556 
1557   PetscFunctionBegin;
1558   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1559   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1560   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1561   tmp  = a->solve_work;
1562 
1563   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1564   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1565 
1566   /* copy the b into temp work space according to permutation */
1567   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1568 
1569   /* forward solve the U^T */
1570   for (i=0; i<n; i++) {
1571     v   = aa + adiag[i+1] + 1;
1572     vi  = aj + adiag[i+1] + 1;
1573     nz  = adiag[i] - adiag[i+1] - 1;
1574     s1  = tmp[i];
1575     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1576     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1577     tmp[i] = s1;
1578   }
1579 
1580 
1581   /* backward solve the L^T */
1582   for (i=n-1; i>=0; i--){
1583     v   = aa + ai[i] ;
1584     vi  = aj + ai[i];
1585     nz  = ai[i+1] - ai[i];
1586     s1  = tmp[i];
1587     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1588   }
1589 
1590   /* copy tmp into x according to permutation */
1591   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1592 
1593   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1594   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1595   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1596   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1597 
1598   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* ----------------------------------------------------------------*/
1603 
1604 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth);
1605 
1606 /*
1607    ilu() under revised new data structure.
1608    Factored arrays bj and ba are stored as
1609      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1610 
1611    bi=fact->i is an array of size n+1, in which
1612    bi+
1613      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1614      bi[n]:  points to L(n-1,n-1)+1
1615 
1616   bdiag=fact->diag is an array of size n+1,in which
1617      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1618      bdiag[n]: points to entry of U(n-1,0)-1
1619 
1620    U(i,:) contains bdiag[i] as its last entry, i.e.,
1621     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1622 */
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1626 {
1627 
1628   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1629   PetscErrorCode     ierr;
1630   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1631   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1632   PetscTruth         missing;
1633   IS                 isicol;
1634 
1635   PetscFunctionBegin;
1636   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1637   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1638   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1639   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1640 
1641   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1642   b    = (Mat_SeqAIJ*)(fact)->data;
1643 
1644   /* allocate matrix arrays for new data structure */
1645   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1646   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1647   b->singlemalloc = PETSC_TRUE;
1648   if (!b->diag){
1649     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1650     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1651   }
1652   bdiag = b->diag;
1653 
1654   if (n > 0) {
1655     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1656   }
1657 
1658   /* set bi and bj with new data structure */
1659   bi = b->i;
1660   bj = b->j;
1661 
1662   /* L part */
1663   bi[0] = 0;
1664   for (i=0; i<n; i++){
1665     nz = adiag[i] - ai[i];
1666     bi[i+1] = bi[i] + nz;
1667     aj = a->j + ai[i];
1668     for (j=0; j<nz; j++){
1669       /*   *bj = aj[j]; bj++; */
1670       bj[k++] = aj[j];
1671     }
1672   }
1673 
1674   /* U part */
1675   bdiag[n] = bi[n]-1;
1676   for (i=n-1; i>=0; i--){
1677     nz = ai[i+1] - adiag[i] - 1;
1678     aj = a->j + adiag[i] + 1;
1679     for (j=0; j<nz; j++){
1680       /*      *bj = aj[j]; bj++; */
1681       bj[k++] = aj[j];
1682     }
1683     /* diag[i] */
1684     /*    *bj = i; bj++; */
1685     bj[k++] = i;
1686     bdiag[i] = bdiag[i+1] + nz + 1;
1687   }
1688 
1689   fact->factortype             = MAT_FACTOR_ILU;
1690   fact->info.factor_mallocs    = 0;
1691   fact->info.fill_ratio_given  = info->fill;
1692   fact->info.fill_ratio_needed = 1.0;
1693   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1694 
1695   b       = (Mat_SeqAIJ*)(fact)->data;
1696   b->row  = isrow;
1697   b->col  = iscol;
1698   b->icol = isicol;
1699   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1700   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1701   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1707 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1708 {
1709   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1710   IS                 isicol;
1711   PetscErrorCode     ierr;
1712   const PetscInt     *r,*ic;
1713   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1714   PetscInt           *bi,*cols,nnz,*cols_lvl;
1715   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1716   PetscInt           i,levels,diagonal_fill;
1717   PetscTruth         col_identity,row_identity;
1718   PetscReal          f;
1719   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1720   PetscBT            lnkbt;
1721   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1722   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1723   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1724 
1725   PetscFunctionBegin;
1726   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1727   /*
1728   PetscTruth         olddatastruct=PETSC_FALSE;
1729   ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1730   if(olddatastruct){
1731     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1732     PetscFunctionReturn(0);
1733   }
1734   */
1735 
1736   levels = (PetscInt)info->levels;
1737   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1738   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1739 
1740   if (!levels && row_identity && col_identity) {
1741     /* special case: ilu(0) with natural ordering */
1742     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1743     if (a->inode.size) {
1744       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1745     }
1746     PetscFunctionReturn(0);
1747   }
1748 
1749   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1750   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1751   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1752   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1753 
1754   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1756   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1757   bi[0] = bdiag[0] = 0;
1758 
1759   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1760 
1761   /* create a linked list for storing column indices of the active row */
1762   nlnk = n + 1;
1763   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1764 
1765   /* initial FreeSpace size is f*(ai[n]+1) */
1766   f             = info->fill;
1767   diagonal_fill = (PetscInt)info->diagonal_fill;
1768   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1769   current_space = free_space;
1770   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1771   current_space_lvl = free_space_lvl;
1772 
1773   for (i=0; i<n; i++) {
1774     nzi = 0;
1775     /* copy current row into linked list */
1776     nnz  = ai[r[i]+1] - ai[r[i]];
1777     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1778     cols = aj + ai[r[i]];
1779     lnk[i] = -1; /* marker to indicate if diagonal exists */
1780     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1781     nzi += nlnk;
1782 
1783     /* make sure diagonal entry is included */
1784     if (diagonal_fill && lnk[i] == -1) {
1785       fm = n;
1786       while (lnk[fm] < i) fm = lnk[fm];
1787       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1788       lnk[fm]    = i;
1789       lnk_lvl[i] = 0;
1790       nzi++; dcount++;
1791     }
1792 
1793     /* add pivot rows into the active row */
1794     nzbd = 0;
1795     prow = lnk[n];
1796     while (prow < i) {
1797       nnz      = bdiag[prow];
1798       cols     = bj_ptr[prow] + nnz + 1;
1799       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1801       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1802       nzi += nlnk;
1803       prow = lnk[prow];
1804       nzbd++;
1805     }
1806     bdiag[i] = nzbd;
1807     bi[i+1]  = bi[i] + nzi;
1808 
1809     /* if free space is not available, make more free space */
1810     if (current_space->local_remaining<nzi) {
1811       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1812       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1813       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1814       reallocs++;
1815     }
1816 
1817     /* copy data into free_space and free_space_lvl, then initialize lnk */
1818     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1819     bj_ptr[i]    = current_space->array;
1820     bjlvl_ptr[i] = current_space_lvl->array;
1821 
1822     /* make sure the active row i has diagonal entry */
1823     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1824 
1825     current_space->array           += nzi;
1826     current_space->local_used      += nzi;
1827     current_space->local_remaining -= nzi;
1828     current_space_lvl->array           += nzi;
1829     current_space_lvl->local_used      += nzi;
1830     current_space_lvl->local_remaining -= nzi;
1831   }
1832 
1833   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1834   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1835 
1836   /* destroy list of free space and other temporary arrays */
1837   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1838 
1839   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1840   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1841 
1842   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1843   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1844   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1845 
1846 #if defined(PETSC_USE_INFO)
1847   {
1848     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1849     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1850     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1851     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1852     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1853     if (diagonal_fill) {
1854       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1855     }
1856   }
1857 #endif
1858 
1859   /* put together the new matrix */
1860   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1861   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1862   b = (Mat_SeqAIJ*)(fact)->data;
1863   b->free_a       = PETSC_TRUE;
1864   b->free_ij      = PETSC_TRUE;
1865   b->singlemalloc = PETSC_FALSE;
1866   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1867   b->j          = bj;
1868   b->i          = bi;
1869   b->diag       = bdiag;
1870   b->ilen       = 0;
1871   b->imax       = 0;
1872   b->row        = isrow;
1873   b->col        = iscol;
1874   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1875   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1876   b->icol       = isicol;
1877   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1878   /* In b structure:  Free imax, ilen, old a, old j.
1879      Allocate bdiag, solve_work, new a, new j */
1880   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1881   b->maxnz = b->nz = bdiag[0]+1;
1882   (fact)->info.factor_mallocs    = reallocs;
1883   (fact)->info.fill_ratio_given  = f;
1884   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1885   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1886   if (a->inode.size) {
1887     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1894 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1895 {
1896   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1897   IS                 isicol;
1898   PetscErrorCode     ierr;
1899   const PetscInt     *r,*ic;
1900   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1901   PetscInt           *bi,*cols,nnz,*cols_lvl;
1902   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1903   PetscInt           i,levels,diagonal_fill;
1904   PetscTruth         col_identity,row_identity;
1905   PetscReal          f;
1906   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1907   PetscBT            lnkbt;
1908   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1909   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1910   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1911   PetscTruth         missing;
1912 
1913   PetscFunctionBegin;
1914   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1915   f             = info->fill;
1916   levels        = (PetscInt)info->levels;
1917   diagonal_fill = (PetscInt)info->diagonal_fill;
1918   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1919 
1920   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1921   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1922   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1923     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1924     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1925     if (a->inode.size) {
1926       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1927     }
1928     fact->factortype = MAT_FACTOR_ILU;
1929     (fact)->info.factor_mallocs    = 0;
1930     (fact)->info.fill_ratio_given  = info->fill;
1931     (fact)->info.fill_ratio_needed = 1.0;
1932     b               = (Mat_SeqAIJ*)(fact)->data;
1933     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1934     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1935     b->row              = isrow;
1936     b->col              = iscol;
1937     b->icol             = isicol;
1938     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1939     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1940     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1941     PetscFunctionReturn(0);
1942   }
1943 
1944   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1945   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1946 
1947   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1948   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1949   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1950   bi[0] = bdiag[0] = 0;
1951 
1952   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1953 
1954   /* create a linked list for storing column indices of the active row */
1955   nlnk = n + 1;
1956   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1957 
1958   /* initial FreeSpace size is f*(ai[n]+1) */
1959   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1960   current_space = free_space;
1961   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1962   current_space_lvl = free_space_lvl;
1963 
1964   for (i=0; i<n; i++) {
1965     nzi = 0;
1966     /* copy current row into linked list */
1967     nnz  = ai[r[i]+1] - ai[r[i]];
1968     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1969     cols = aj + ai[r[i]];
1970     lnk[i] = -1; /* marker to indicate if diagonal exists */
1971     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1972     nzi += nlnk;
1973 
1974     /* make sure diagonal entry is included */
1975     if (diagonal_fill && lnk[i] == -1) {
1976       fm = n;
1977       while (lnk[fm] < i) fm = lnk[fm];
1978       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1979       lnk[fm]    = i;
1980       lnk_lvl[i] = 0;
1981       nzi++; dcount++;
1982     }
1983 
1984     /* add pivot rows into the active row */
1985     nzbd = 0;
1986     prow = lnk[n];
1987     while (prow < i) {
1988       nnz      = bdiag[prow];
1989       cols     = bj_ptr[prow] + nnz + 1;
1990       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1991       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1992       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1993       nzi += nlnk;
1994       prow = lnk[prow];
1995       nzbd++;
1996     }
1997     bdiag[i] = nzbd;
1998     bi[i+1]  = bi[i] + nzi;
1999 
2000     /* if free space is not available, make more free space */
2001     if (current_space->local_remaining<nzi) {
2002       nnz = nzi*(n - i); /* estimated and max additional space needed */
2003       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2004       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2005       reallocs++;
2006     }
2007 
2008     /* copy data into free_space and free_space_lvl, then initialize lnk */
2009     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2010     bj_ptr[i]    = current_space->array;
2011     bjlvl_ptr[i] = current_space_lvl->array;
2012 
2013     /* make sure the active row i has diagonal entry */
2014     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2015 
2016     current_space->array           += nzi;
2017     current_space->local_used      += nzi;
2018     current_space->local_remaining -= nzi;
2019     current_space_lvl->array           += nzi;
2020     current_space_lvl->local_used      += nzi;
2021     current_space_lvl->local_remaining -= nzi;
2022   }
2023 
2024   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2025   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2026 
2027   /* destroy list of free space and other temporary arrays */
2028   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2029   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2030   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2031   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2032   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2033 
2034 #if defined(PETSC_USE_INFO)
2035   {
2036     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2037     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2038     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2039     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2040     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2041     if (diagonal_fill) {
2042       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2043     }
2044   }
2045 #endif
2046 
2047   /* put together the new matrix */
2048   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2049   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2050   b = (Mat_SeqAIJ*)(fact)->data;
2051   b->free_a       = PETSC_TRUE;
2052   b->free_ij      = PETSC_TRUE;
2053   b->singlemalloc = PETSC_FALSE;
2054   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2055   b->j          = bj;
2056   b->i          = bi;
2057   for (i=0; i<n; i++) bdiag[i] += bi[i];
2058   b->diag       = bdiag;
2059   b->ilen       = 0;
2060   b->imax       = 0;
2061   b->row        = isrow;
2062   b->col        = iscol;
2063   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2064   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2065   b->icol       = isicol;
2066   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2067   /* In b structure:  Free imax, ilen, old a, old j.
2068      Allocate bdiag, solve_work, new a, new j */
2069   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2070   b->maxnz             = b->nz = bi[n] ;
2071   (fact)->info.factor_mallocs    = reallocs;
2072   (fact)->info.fill_ratio_given  = f;
2073   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2074   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2075   if (a->inode.size) {
2076     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2083 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2084 {
2085   Mat            C = B;
2086   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2087   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2088   IS             ip=b->row,iip = b->icol;
2089   PetscErrorCode ierr;
2090   const PetscInt *rip,*riip;
2091   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2092   PetscInt       *ai=a->i,*aj=a->j;
2093   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2094   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2095   PetscTruth     perm_identity;
2096   FactorShiftCtx sctx;
2097   PetscReal      rs;
2098   MatScalar      d,*v;
2099 
2100   PetscFunctionBegin;
2101   /* MatPivotSetUp(): initialize shift context sctx */
2102   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2103 
2104   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2105     sctx.shift_top = info->zeropivot;
2106     for (i=0; i<mbs; i++) {
2107       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2108       d  = (aa)[a->diag[i]];
2109       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2110       v  = aa+ai[i];
2111       nz = ai[i+1] - ai[i];
2112       for (j=0; j<nz; j++)
2113 	rs += PetscAbsScalar(v[j]);
2114       if (rs>sctx.shift_top) sctx.shift_top = rs;
2115     }
2116     sctx.shift_top   *= 1.1;
2117     sctx.nshift_max   = 5;
2118     sctx.shift_lo     = 0.;
2119     sctx.shift_hi     = 1.;
2120   }
2121 
2122   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2123   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2124 
2125   /* allocate working arrays
2126      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2127      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2128   */
2129   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2130 
2131   do {
2132     sctx.newshift = PETSC_FALSE;
2133 
2134     for (i=0; i<mbs; i++) c2r[i] = mbs;
2135     il[0] = 0;
2136 
2137     for (k = 0; k<mbs; k++){
2138       /* zero rtmp */
2139       nz = bi[k+1] - bi[k];
2140       bjtmp = bj + bi[k];
2141       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2142 
2143       /* load in initial unfactored row */
2144       bval = ba + bi[k];
2145       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2146       for (j = jmin; j < jmax; j++){
2147         col = riip[aj[j]];
2148         if (col >= k){ /* only take upper triangular entry */
2149           rtmp[col] = aa[j];
2150           *bval++   = 0.0; /* for in-place factorization */
2151         }
2152       }
2153       /* shift the diagonal of the matrix: ZeropivotApply() */
2154       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2155 
2156       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2157       dk = rtmp[k];
2158       i  = c2r[k]; /* first row to be added to k_th row  */
2159 
2160       while (i < k){
2161         nexti = c2r[i]; /* next row to be added to k_th row */
2162 
2163         /* compute multiplier, update diag(k) and U(i,k) */
2164         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2165         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2166         dk   += uikdi*ba[ili]; /* update diag[k] */
2167         ba[ili] = uikdi; /* -U(i,k) */
2168 
2169         /* add multiple of row i to k-th row */
2170         jmin = ili + 1; jmax = bi[i+1];
2171         if (jmin < jmax){
2172           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2173           /* update il and c2r for row i */
2174           il[i] = jmin;
2175           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2176         }
2177         i = nexti;
2178       }
2179 
2180       /* copy data into U(k,:) */
2181       rs   = 0.0;
2182       jmin = bi[k]; jmax = bi[k+1]-1;
2183       if (jmin < jmax) {
2184         for (j=jmin; j<jmax; j++){
2185           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2186         }
2187         /* add the k-th row into il and c2r */
2188         il[k] = jmin;
2189         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2190       }
2191 
2192       /* MatPivotCheck() */
2193       sctx.rs  = rs;
2194       sctx.pv  = dk;
2195       ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr);
2196       if(sctx.newshift) break;
2197       dk = sctx.pv;
2198 
2199       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2200     }
2201   } while (sctx.newshift);
2202 
2203   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2204   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2205   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2206 
2207   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2208   if (perm_identity){
2209     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2210     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2211     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2212     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2213   } else {
2214     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2215     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2216     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2217     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2218   }
2219 
2220   C->assembled    = PETSC_TRUE;
2221   C->preallocated = PETSC_TRUE;
2222   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2223 
2224   /* MatPivotView() */
2225   if (sctx.nshift){
2226     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2227       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2228     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2229       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2230     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2231       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2232     }
2233   }
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2239 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2240 {
2241   Mat            C = B;
2242   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2243   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2244   IS             ip=b->row,iip = b->icol;
2245   PetscErrorCode ierr;
2246   const PetscInt *rip,*riip;
2247   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2248   PetscInt       *ai=a->i,*aj=a->j;
2249   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2250   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2251   PetscTruth     perm_identity;
2252   FactorShiftCtx sctx;
2253   PetscReal      rs;
2254   MatScalar      d,*v;
2255 
2256   PetscFunctionBegin;
2257   /* MatPivotSetUp(): initialize shift context sctx */
2258   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2259 
2260   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2261     sctx.shift_top = info->zeropivot;
2262     for (i=0; i<mbs; i++) {
2263       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2264       d  = (aa)[a->diag[i]];
2265       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2266       v  = aa+ai[i];
2267       nz = ai[i+1] - ai[i];
2268       for (j=0; j<nz; j++)
2269 	rs += PetscAbsScalar(v[j]);
2270       if (rs>sctx.shift_top) sctx.shift_top = rs;
2271     }
2272     sctx.shift_top   *= 1.1;
2273     sctx.nshift_max   = 5;
2274     sctx.shift_lo     = 0.;
2275     sctx.shift_hi     = 1.;
2276   }
2277 
2278   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2279   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2280 
2281   /* initialization */
2282   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2283 
2284   do {
2285     sctx.newshift = PETSC_FALSE;
2286 
2287     for (i=0; i<mbs; i++) jl[i] = mbs;
2288     il[0] = 0;
2289 
2290     for (k = 0; k<mbs; k++){
2291       /* zero rtmp */
2292       nz = bi[k+1] - bi[k];
2293       bjtmp = bj + bi[k];
2294       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2295 
2296       bval = ba + bi[k];
2297       /* initialize k-th row by the perm[k]-th row of A */
2298       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2299       for (j = jmin; j < jmax; j++){
2300         col = riip[aj[j]];
2301         if (col >= k){ /* only take upper triangular entry */
2302           rtmp[col] = aa[j];
2303           *bval++  = 0.0; /* for in-place factorization */
2304         }
2305       }
2306       /* shift the diagonal of the matrix */
2307       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2308 
2309       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2310       dk = rtmp[k];
2311       i = jl[k]; /* first row to be added to k_th row  */
2312 
2313       while (i < k){
2314         nexti = jl[i]; /* next row to be added to k_th row */
2315 
2316         /* compute multiplier, update diag(k) and U(i,k) */
2317         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2318         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2319         dk += uikdi*ba[ili];
2320         ba[ili] = uikdi; /* -U(i,k) */
2321 
2322         /* add multiple of row i to k-th row */
2323         jmin = ili + 1; jmax = bi[i+1];
2324         if (jmin < jmax){
2325           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2326           /* update il and jl for row i */
2327           il[i] = jmin;
2328           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2329         }
2330         i = nexti;
2331       }
2332 
2333       /* shift the diagonals when zero pivot is detected */
2334       /* compute rs=sum of abs(off-diagonal) */
2335       rs   = 0.0;
2336       jmin = bi[k]+1;
2337       nz   = bi[k+1] - jmin;
2338       bcol = bj + jmin;
2339       for (j=0; j<nz; j++) {
2340         rs += PetscAbsScalar(rtmp[bcol[j]]);
2341       }
2342 
2343       sctx.rs = rs;
2344       sctx.pv = dk;
2345       ierr = MatPivotCheck(info,&sctx,k);CHKERRQ(ierr);
2346       if (sctx.newshift) break;
2347       dk = sctx.pv;
2348 
2349       /* copy data into U(k,:) */
2350       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2351       jmin = bi[k]+1; jmax = bi[k+1];
2352       if (jmin < jmax) {
2353         for (j=jmin; j<jmax; j++){
2354           col = bj[j]; ba[j] = rtmp[col];
2355         }
2356         /* add the k-th row into il and jl */
2357         il[k] = jmin;
2358         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2359       }
2360     }
2361   } while (sctx.newshift);
2362 
2363   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2364   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2365   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2366 
2367   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2368   if (perm_identity){
2369     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2373   } else {
2374     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2375     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2376     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2377     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2378   }
2379 
2380   C->assembled    = PETSC_TRUE;
2381   C->preallocated = PETSC_TRUE;
2382   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2383   if (sctx.nshift){
2384     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2385       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2386     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2387       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2388     }
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 /*
2394    icc() under revised new data structure.
2395    Factored arrays bj and ba are stored as
2396      U(0,:),...,U(i,:),U(n-1,:)
2397 
2398    ui=fact->i is an array of size n+1, in which
2399    ui+
2400      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2401      ui[n]:  points to U(n-1,n-1)+1
2402 
2403   udiag=fact->diag is an array of size n,in which
2404      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2405 
2406    U(i,:) contains udiag[i] as its last entry, i.e.,
2407     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2408 */
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2412 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2413 {
2414   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2415   Mat_SeqSBAIJ       *b;
2416   PetscErrorCode     ierr;
2417   PetscTruth         perm_identity,missing;
2418   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2419   const PetscInt     *rip,*riip;
2420   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2421   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2422   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2423   PetscReal          fill=info->fill,levels=info->levels;
2424   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2425   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2426   PetscBT            lnkbt;
2427   IS                 iperm;
2428 
2429   PetscFunctionBegin;
2430   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2431   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2432   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2433   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2434   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2435 
2436   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2437   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2438   ui[0] = 0;
2439 
2440   /* ICC(0) without matrix ordering: simply rearrange column indices */
2441   if (!levels && perm_identity) {
2442     for (i=0; i<am; i++) {
2443       ncols    = ai[i+1] - a->diag[i];
2444       ui[i+1]  = ui[i] + ncols;
2445       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2446     }
2447     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2448     cols = uj;
2449     for (i=0; i<am; i++) {
2450       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2451       ncols = ai[i+1] - a->diag[i] -1;
2452       for (j=0; j<ncols; j++) *cols++ = aj[j];
2453       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2454     }
2455   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2456     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2457     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2458 
2459     /* initialization */
2460     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2461 
2462     /* jl: linked list for storing indices of the pivot rows
2463        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2464     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2465     for (i=0; i<am; i++){
2466       jl[i] = am; il[i] = 0;
2467     }
2468 
2469     /* create and initialize a linked list for storing column indices of the active row k */
2470     nlnk = am + 1;
2471     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2472 
2473     /* initial FreeSpace size is fill*(ai[am]+1) */
2474     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2475     current_space = free_space;
2476     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2477     current_space_lvl = free_space_lvl;
2478 
2479     for (k=0; k<am; k++){  /* for each active row k */
2480       /* initialize lnk by the column indices of row rip[k] of A */
2481       nzk   = 0;
2482       ncols = ai[rip[k]+1] - ai[rip[k]];
2483       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2484       ncols_upper = 0;
2485       for (j=0; j<ncols; j++){
2486         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2487         if (riip[i] >= k){ /* only take upper triangular entry */
2488           ajtmp[ncols_upper] = i;
2489           ncols_upper++;
2490         }
2491       }
2492       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2493       nzk += nlnk;
2494 
2495       /* update lnk by computing fill-in for each pivot row to be merged in */
2496       prow = jl[k]; /* 1st pivot row */
2497 
2498       while (prow < k){
2499         nextprow = jl[prow];
2500 
2501         /* merge prow into k-th row */
2502         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2503         jmax = ui[prow+1];
2504         ncols = jmax-jmin;
2505         i     = jmin - ui[prow];
2506         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2507         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2508         j     = *(uj - 1);
2509         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2510         nzk += nlnk;
2511 
2512         /* update il and jl for prow */
2513         if (jmin < jmax){
2514           il[prow] = jmin;
2515           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2516         }
2517         prow = nextprow;
2518       }
2519 
2520       /* if free space is not available, make more free space */
2521       if (current_space->local_remaining<nzk) {
2522         i  = am - k + 1; /* num of unfactored rows */
2523         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2524         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2525         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2526         reallocs++;
2527       }
2528 
2529       /* copy data into free_space and free_space_lvl, then initialize lnk */
2530       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2531       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2532 
2533       /* add the k-th row into il and jl */
2534       if (nzk > 1){
2535         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2536         jl[k] = jl[i]; jl[i] = k;
2537         il[k] = ui[k] + 1;
2538       }
2539       uj_ptr[k]     = current_space->array;
2540       uj_lvl_ptr[k] = current_space_lvl->array;
2541 
2542       current_space->array           += nzk;
2543       current_space->local_used      += nzk;
2544       current_space->local_remaining -= nzk;
2545 
2546       current_space_lvl->array           += nzk;
2547       current_space_lvl->local_used      += nzk;
2548       current_space_lvl->local_remaining -= nzk;
2549 
2550       ui[k+1] = ui[k] + nzk;
2551     }
2552 
2553 #if defined(PETSC_USE_INFO)
2554     if (ai[am] != 0) {
2555       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2556       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2557       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2558       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2559     } else {
2560       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2561     }
2562 #endif
2563 
2564     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2565     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2566     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2567     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2568 
2569     /* destroy list of free space and other temporary array(s) */
2570     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2571     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2572     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2573     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2574 
2575   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2576 
2577   /* put together the new matrix in MATSEQSBAIJ format */
2578   b    = (Mat_SeqSBAIJ*)(fact)->data;
2579   b->singlemalloc = PETSC_FALSE;
2580   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2581   b->j    = uj;
2582   b->i    = ui;
2583   b->diag = udiag;
2584   b->free_diag = PETSC_TRUE;
2585   b->ilen = 0;
2586   b->imax = 0;
2587   b->row  = perm;
2588   b->col  = perm;
2589   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2590   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2591   b->icol = iperm;
2592   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2593   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2594   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2595   b->maxnz   = b->nz = ui[am];
2596   b->free_a  = PETSC_TRUE;
2597   b->free_ij = PETSC_TRUE;
2598 
2599   fact->info.factor_mallocs    = reallocs;
2600   fact->info.fill_ratio_given  = fill;
2601   if (ai[am] != 0) {
2602     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2603   } else {
2604     fact->info.fill_ratio_needed = 0.0;
2605   }
2606   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 #undef __FUNCT__
2611 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2612 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2613 {
2614   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2615   Mat_SeqSBAIJ       *b;
2616   PetscErrorCode     ierr;
2617   PetscTruth         perm_identity,missing;
2618   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2619   const PetscInt     *rip,*riip;
2620   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2621   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2622   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2623   PetscReal          fill=info->fill,levels=info->levels;
2624   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2625   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2626   PetscBT            lnkbt;
2627   IS                 iperm;
2628 
2629   PetscFunctionBegin;
2630   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2631   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2632   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2633   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2634   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2635 
2636   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2637   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2638   ui[0] = 0;
2639 
2640   /* ICC(0) without matrix ordering: simply copies fill pattern */
2641   if (!levels && perm_identity) {
2642 
2643     for (i=0; i<am; i++) {
2644       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2645       udiag[i] = ui[i];
2646     }
2647     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2648     cols = uj;
2649     for (i=0; i<am; i++) {
2650       aj    = a->j + a->diag[i];
2651       ncols = ui[i+1] - ui[i];
2652       for (j=0; j<ncols; j++) *cols++ = *aj++;
2653     }
2654   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2655     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2656     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2657 
2658     /* initialization */
2659     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2660 
2661     /* jl: linked list for storing indices of the pivot rows
2662        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2663     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2664     for (i=0; i<am; i++){
2665       jl[i] = am; il[i] = 0;
2666     }
2667 
2668     /* create and initialize a linked list for storing column indices of the active row k */
2669     nlnk = am + 1;
2670     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2671 
2672     /* initial FreeSpace size is fill*(ai[am]+1) */
2673     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2674     current_space = free_space;
2675     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2676     current_space_lvl = free_space_lvl;
2677 
2678     for (k=0; k<am; k++){  /* for each active row k */
2679       /* initialize lnk by the column indices of row rip[k] of A */
2680       nzk   = 0;
2681       ncols = ai[rip[k]+1] - ai[rip[k]];
2682       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2683       ncols_upper = 0;
2684       for (j=0; j<ncols; j++){
2685         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2686         if (riip[i] >= k){ /* only take upper triangular entry */
2687           ajtmp[ncols_upper] = i;
2688           ncols_upper++;
2689         }
2690       }
2691       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2692       nzk += nlnk;
2693 
2694       /* update lnk by computing fill-in for each pivot row to be merged in */
2695       prow = jl[k]; /* 1st pivot row */
2696 
2697       while (prow < k){
2698         nextprow = jl[prow];
2699 
2700         /* merge prow into k-th row */
2701         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2702         jmax = ui[prow+1];
2703         ncols = jmax-jmin;
2704         i     = jmin - ui[prow];
2705         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2706         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2707         j     = *(uj - 1);
2708         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2709         nzk += nlnk;
2710 
2711         /* update il and jl for prow */
2712         if (jmin < jmax){
2713           il[prow] = jmin;
2714           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2715         }
2716         prow = nextprow;
2717       }
2718 
2719       /* if free space is not available, make more free space */
2720       if (current_space->local_remaining<nzk) {
2721         i = am - k + 1; /* num of unfactored rows */
2722         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2723         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2724         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2725         reallocs++;
2726       }
2727 
2728       /* copy data into free_space and free_space_lvl, then initialize lnk */
2729       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2730       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2731 
2732       /* add the k-th row into il and jl */
2733       if (nzk > 1){
2734         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2735         jl[k] = jl[i]; jl[i] = k;
2736         il[k] = ui[k] + 1;
2737       }
2738       uj_ptr[k]     = current_space->array;
2739       uj_lvl_ptr[k] = current_space_lvl->array;
2740 
2741       current_space->array           += nzk;
2742       current_space->local_used      += nzk;
2743       current_space->local_remaining -= nzk;
2744 
2745       current_space_lvl->array           += nzk;
2746       current_space_lvl->local_used      += nzk;
2747       current_space_lvl->local_remaining -= nzk;
2748 
2749       ui[k+1] = ui[k] + nzk;
2750     }
2751 
2752 #if defined(PETSC_USE_INFO)
2753     if (ai[am] != 0) {
2754       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2755       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2756       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2757       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2758     } else {
2759       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2760     }
2761 #endif
2762 
2763     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2764     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2765     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2766     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2767 
2768     /* destroy list of free space and other temporary array(s) */
2769     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2770     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2771     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2772     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2773 
2774   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2775 
2776   /* put together the new matrix in MATSEQSBAIJ format */
2777 
2778   b    = (Mat_SeqSBAIJ*)fact->data;
2779   b->singlemalloc = PETSC_FALSE;
2780   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2781   b->j    = uj;
2782   b->i    = ui;
2783   b->diag = udiag;
2784   b->free_diag = PETSC_TRUE;
2785   b->ilen = 0;
2786   b->imax = 0;
2787   b->row  = perm;
2788   b->col  = perm;
2789   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2790   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2791   b->icol = iperm;
2792   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2793   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2794   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2795   b->maxnz   = b->nz = ui[am];
2796   b->free_a  = PETSC_TRUE;
2797   b->free_ij = PETSC_TRUE;
2798 
2799   fact->info.factor_mallocs    = reallocs;
2800   fact->info.fill_ratio_given  = fill;
2801   if (ai[am] != 0) {
2802     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2803   } else {
2804     fact->info.fill_ratio_needed = 0.0;
2805   }
2806   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2811 {
2812   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2813   Mat_SeqSBAIJ       *b;
2814   PetscErrorCode     ierr;
2815   PetscTruth         perm_identity;
2816   PetscReal          fill = info->fill;
2817   const PetscInt     *rip,*riip;
2818   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2819   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2820   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2821   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2822   PetscBT            lnkbt;
2823   IS                 iperm;
2824 
2825   PetscFunctionBegin;
2826   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2827   /* check whether perm is the identity mapping */
2828   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2829   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2830   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2831   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2832 
2833   /* initialization */
2834   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2835   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2836   ui[0] = 0;
2837 
2838   /* jl: linked list for storing indices of the pivot rows
2839      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2840   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2841   for (i=0; i<am; i++){
2842     jl[i] = am; il[i] = 0;
2843   }
2844 
2845   /* create and initialize a linked list for storing column indices of the active row k */
2846   nlnk = am + 1;
2847   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2848 
2849   /* initial FreeSpace size is fill*(ai[am]+1) */
2850   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2851   current_space = free_space;
2852 
2853   for (k=0; k<am; k++){  /* for each active row k */
2854     /* initialize lnk by the column indices of row rip[k] of A */
2855     nzk   = 0;
2856     ncols = ai[rip[k]+1] - ai[rip[k]];
2857     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2858     ncols_upper = 0;
2859     for (j=0; j<ncols; j++){
2860       i = riip[*(aj + ai[rip[k]] + j)];
2861       if (i >= k){ /* only take upper triangular entry */
2862         cols[ncols_upper] = i;
2863         ncols_upper++;
2864       }
2865     }
2866     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2867     nzk += nlnk;
2868 
2869     /* update lnk by computing fill-in for each pivot row to be merged in */
2870     prow = jl[k]; /* 1st pivot row */
2871 
2872     while (prow < k){
2873       nextprow = jl[prow];
2874       /* merge prow into k-th row */
2875       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2876       jmax = ui[prow+1];
2877       ncols = jmax-jmin;
2878       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2879       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2880       nzk += nlnk;
2881 
2882       /* update il and jl for prow */
2883       if (jmin < jmax){
2884         il[prow] = jmin;
2885         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2886       }
2887       prow = nextprow;
2888     }
2889 
2890     /* if free space is not available, make more free space */
2891     if (current_space->local_remaining<nzk) {
2892       i  = am - k + 1; /* num of unfactored rows */
2893       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2894       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2895       reallocs++;
2896     }
2897 
2898     /* copy data into free space, then initialize lnk */
2899     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2900 
2901     /* add the k-th row into il and jl */
2902     if (nzk > 1){
2903       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2904       jl[k] = jl[i]; jl[i] = k;
2905       il[k] = ui[k] + 1;
2906     }
2907     ui_ptr[k] = current_space->array;
2908     current_space->array           += nzk;
2909     current_space->local_used      += nzk;
2910     current_space->local_remaining -= nzk;
2911 
2912     ui[k+1] = ui[k] + nzk;
2913   }
2914 
2915 #if defined(PETSC_USE_INFO)
2916   if (ai[am] != 0) {
2917     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
2918     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2919     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2920     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2921   } else {
2922      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2923   }
2924 #endif
2925 
2926   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2927   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2928   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2929 
2930   /* destroy list of free space and other temporary array(s) */
2931   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2932   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2933   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2934 
2935   /* put together the new matrix in MATSEQSBAIJ format */
2936 
2937   b = (Mat_SeqSBAIJ*)fact->data;
2938   b->singlemalloc = PETSC_FALSE;
2939   b->free_a       = PETSC_TRUE;
2940   b->free_ij      = PETSC_TRUE;
2941   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2942   b->j    = uj;
2943   b->i    = ui;
2944   b->diag = udiag;
2945   b->free_diag = PETSC_TRUE;
2946   b->ilen = 0;
2947   b->imax = 0;
2948   b->row  = perm;
2949   b->col  = perm;
2950   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2951   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2952   b->icol = iperm;
2953   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2954   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2955   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2956   b->maxnz = b->nz = ui[am];
2957 
2958   fact->info.factor_mallocs    = reallocs;
2959   fact->info.fill_ratio_given  = fill;
2960   if (ai[am] != 0) {
2961     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2962   } else {
2963     fact->info.fill_ratio_needed = 0.0;
2964   }
2965   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2971 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2972 {
2973   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2974   Mat_SeqSBAIJ       *b;
2975   PetscErrorCode     ierr;
2976   PetscTruth         perm_identity;
2977   PetscReal          fill = info->fill;
2978   const PetscInt     *rip,*riip;
2979   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2980   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2981   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2982   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2983   PetscBT            lnkbt;
2984   IS                 iperm;
2985 
2986   PetscFunctionBegin;
2987   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2988   /* check whether perm is the identity mapping */
2989   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2990   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2991   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2992   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2993 
2994   /* initialization */
2995   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2996   ui[0] = 0;
2997 
2998   /* jl: linked list for storing indices of the pivot rows
2999      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3000   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
3001   for (i=0; i<am; i++){
3002     jl[i] = am; il[i] = 0;
3003   }
3004 
3005   /* create and initialize a linked list for storing column indices of the active row k */
3006   nlnk = am + 1;
3007   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3008 
3009   /* initial FreeSpace size is fill*(ai[am]+1) */
3010   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3011   current_space = free_space;
3012 
3013   for (k=0; k<am; k++){  /* for each active row k */
3014     /* initialize lnk by the column indices of row rip[k] of A */
3015     nzk   = 0;
3016     ncols = ai[rip[k]+1] - ai[rip[k]];
3017     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3018     ncols_upper = 0;
3019     for (j=0; j<ncols; j++){
3020       i = riip[*(aj + ai[rip[k]] + j)];
3021       if (i >= k){ /* only take upper triangular entry */
3022         cols[ncols_upper] = i;
3023         ncols_upper++;
3024       }
3025     }
3026     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3027     nzk += nlnk;
3028 
3029     /* update lnk by computing fill-in for each pivot row to be merged in */
3030     prow = jl[k]; /* 1st pivot row */
3031 
3032     while (prow < k){
3033       nextprow = jl[prow];
3034       /* merge prow into k-th row */
3035       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3036       jmax = ui[prow+1];
3037       ncols = jmax-jmin;
3038       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3039       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3040       nzk += nlnk;
3041 
3042       /* update il and jl for prow */
3043       if (jmin < jmax){
3044         il[prow] = jmin;
3045         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3046       }
3047       prow = nextprow;
3048     }
3049 
3050     /* if free space is not available, make more free space */
3051     if (current_space->local_remaining<nzk) {
3052       i = am - k + 1; /* num of unfactored rows */
3053       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3054       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3055       reallocs++;
3056     }
3057 
3058     /* copy data into free space, then initialize lnk */
3059     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3060 
3061     /* add the k-th row into il and jl */
3062     if (nzk-1 > 0){
3063       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3064       jl[k] = jl[i]; jl[i] = k;
3065       il[k] = ui[k] + 1;
3066     }
3067     ui_ptr[k] = current_space->array;
3068     current_space->array           += nzk;
3069     current_space->local_used      += nzk;
3070     current_space->local_remaining -= nzk;
3071 
3072     ui[k+1] = ui[k] + nzk;
3073   }
3074 
3075 #if defined(PETSC_USE_INFO)
3076   if (ai[am] != 0) {
3077     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3078     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3079     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3080     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3081   } else {
3082      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3083   }
3084 #endif
3085 
3086   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3087   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3088   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3089 
3090   /* destroy list of free space and other temporary array(s) */
3091   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3092   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3093   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3094 
3095   /* put together the new matrix in MATSEQSBAIJ format */
3096 
3097   b = (Mat_SeqSBAIJ*)fact->data;
3098   b->singlemalloc = PETSC_FALSE;
3099   b->free_a       = PETSC_TRUE;
3100   b->free_ij      = PETSC_TRUE;
3101   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3102   b->j    = uj;
3103   b->i    = ui;
3104   b->diag = 0;
3105   b->ilen = 0;
3106   b->imax = 0;
3107   b->row  = perm;
3108   b->col  = perm;
3109   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3110   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3111   b->icol = iperm;
3112   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3113   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3114   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3115   b->maxnz = b->nz = ui[am];
3116 
3117   fact->info.factor_mallocs    = reallocs;
3118   fact->info.fill_ratio_given  = fill;
3119   if (ai[am] != 0) {
3120     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3121   } else {
3122     fact->info.fill_ratio_needed = 0.0;
3123   }
3124   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 #undef __FUNCT__
3129 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3130 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3131 {
3132   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3133   PetscErrorCode    ierr;
3134   PetscInt          n = A->rmap->n;
3135   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3136   PetscScalar       *x,sum;
3137   const PetscScalar *b;
3138   const MatScalar   *aa = a->a,*v;
3139   PetscInt          i,nz;
3140 
3141   PetscFunctionBegin;
3142   if (!n) PetscFunctionReturn(0);
3143 
3144   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3145   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3146 
3147   /* forward solve the lower triangular */
3148   x[0] = b[0];
3149   v    = aa;
3150   vi   = aj;
3151   for (i=1; i<n; i++) {
3152     nz  = ai[i+1] - ai[i];
3153     sum = b[i];
3154     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3155     v  += nz;
3156     vi += nz;
3157     x[i] = sum;
3158   }
3159 
3160   /* backward solve the upper triangular */
3161   for (i=n-1; i>=0; i--){
3162     v   = aa + adiag[i+1] + 1;
3163     vi  = aj + adiag[i+1] + 1;
3164     nz = adiag[i] - adiag[i+1]-1;
3165     sum = x[i];
3166     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3167     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3168   }
3169 
3170   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3171   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3172   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatSolve_SeqAIJ"
3178 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3179 {
3180   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3181   IS                iscol = a->col,isrow = a->row;
3182   PetscErrorCode    ierr;
3183   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3184   const PetscInt    *rout,*cout,*r,*c;
3185   PetscScalar       *x,*tmp,sum;
3186   const PetscScalar *b;
3187   const MatScalar   *aa = a->a,*v;
3188 
3189   PetscFunctionBegin;
3190   if (!n) PetscFunctionReturn(0);
3191 
3192   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3193   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3194   tmp  = a->solve_work;
3195 
3196   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3197   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3198 
3199   /* forward solve the lower triangular */
3200   tmp[0] = b[r[0]];
3201   v      = aa;
3202   vi     = aj;
3203   for (i=1; i<n; i++) {
3204     nz  = ai[i+1] - ai[i];
3205     sum = b[r[i]];
3206     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3207     tmp[i] = sum;
3208     v += nz; vi += nz;
3209   }
3210 
3211   /* backward solve the upper triangular */
3212   for (i=n-1; i>=0; i--){
3213     v   = aa + adiag[i+1]+1;
3214     vi  = aj + adiag[i+1]+1;
3215     nz  = adiag[i]-adiag[i+1]-1;
3216     sum = tmp[i];
3217     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3218     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3219   }
3220 
3221   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3222   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3223   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3224   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3225   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3231 /*
3232     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3233 */
3234 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3235 {
3236   Mat                B = *fact;
3237   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3238   IS                 isicol;
3239   PetscErrorCode     ierr;
3240   const PetscInt     *r,*ic;
3241   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3242   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3243   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3244   PetscInt           nlnk,*lnk;
3245   PetscBT            lnkbt;
3246   PetscTruth         row_identity,icol_identity;
3247   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3248   const PetscInt     *ics;
3249   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3250   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3251   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3252   PetscTruth         missing;
3253 
3254   PetscFunctionBegin;
3255 
3256   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3257   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3258   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3259 
3260   /* ------- symbolic factorization, can be reused ---------*/
3261   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3262   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3263   adiag=a->diag;
3264 
3265   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3266 
3267   /* bdiag is location of diagonal in factor */
3268   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3269   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3270 
3271   /* allocate row pointers bi */
3272   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3273 
3274   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3275   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3276   nnz_max  = ai[n]+2*n*dtcount+2;
3277 
3278   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3279   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3280 
3281   /* put together the new matrix */
3282   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3283   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3284   b    = (Mat_SeqAIJ*)B->data;
3285   b->free_a       = PETSC_TRUE;
3286   b->free_ij      = PETSC_TRUE;
3287   b->singlemalloc = PETSC_FALSE;
3288   b->a          = ba;
3289   b->j          = bj;
3290   b->i          = bi;
3291   b->diag       = bdiag;
3292   b->ilen       = 0;
3293   b->imax       = 0;
3294   b->row        = isrow;
3295   b->col        = iscol;
3296   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3297   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3298   b->icol       = isicol;
3299   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3300 
3301   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3302   b->maxnz = nnz_max;
3303 
3304   B->factortype            = MAT_FACTOR_ILUDT;
3305   B->info.factor_mallocs   = 0;
3306   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3307   CHKMEMQ;
3308   /* ------- end of symbolic factorization ---------*/
3309 
3310   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3311   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3312   ics  = ic;
3313 
3314   /* linked list for storing column indices of the active row */
3315   nlnk = n + 1;
3316   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3317 
3318   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3319   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3320   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3321   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3322   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3323 
3324   bi[0]    = 0;
3325   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3326   bdiag_rev[n] = bdiag[0];
3327   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3328   for (i=0; i<n; i++) {
3329     /* copy initial fill into linked list */
3330     nzi = 0; /* nonzeros for active row i */
3331     nzi = ai[r[i]+1] - ai[r[i]];
3332     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3333     nzi_al = adiag[r[i]] - ai[r[i]];
3334     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3335     ajtmp = aj + ai[r[i]];
3336     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3337 
3338     /* load in initial (unfactored row) */
3339     aatmp = a->a + ai[r[i]];
3340     for (j=0; j<nzi; j++) {
3341       rtmp[ics[*ajtmp++]] = *aatmp++;
3342     }
3343 
3344     /* add pivot rows into linked list */
3345     row = lnk[n];
3346     while (row < i ) {
3347       nzi_bl = bi[row+1] - bi[row] + 1;
3348       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3349       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3350       nzi  += nlnk;
3351       row   = lnk[row];
3352     }
3353 
3354     /* copy data from lnk into jtmp, then initialize lnk */
3355     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3356 
3357     /* numerical factorization */
3358     bjtmp = jtmp;
3359     row   = *bjtmp++; /* 1st pivot row */
3360     while  ( row < i ) {
3361       pc         = rtmp + row;
3362       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3363       multiplier = (*pc) * (*pv);
3364       *pc        = multiplier;
3365       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3366         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3367         pv         = ba + bdiag[row+1] + 1;
3368         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3369         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3370         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3371         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
3372       }
3373       row = *bjtmp++;
3374     }
3375 
3376     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3377     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3378     nzi_bl = 0; j = 0;
3379     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3380       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3381       nzi_bl++; j++;
3382     }
3383     nzi_bu = nzi - nzi_bl -1;
3384     while (j < nzi){
3385       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3386       j++;
3387     }
3388 
3389     bjtmp = bj + bi[i];
3390     batmp = ba + bi[i];
3391     /* apply level dropping rule to L part */
3392     ncut = nzi_al + dtcount;
3393     if (ncut < nzi_bl){
3394       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3395       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3396     } else {
3397       ncut = nzi_bl;
3398     }
3399     for (j=0; j<ncut; j++){
3400       bjtmp[j] = jtmp[j];
3401       batmp[j] = vtmp[j];
3402       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3403     }
3404     bi[i+1] = bi[i] + ncut;
3405     nzi = ncut + 1;
3406 
3407     /* apply level dropping rule to U part */
3408     ncut = nzi_au + dtcount;
3409     if (ncut < nzi_bu){
3410       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3411       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3412     } else {
3413       ncut = nzi_bu;
3414     }
3415     nzi += ncut;
3416 
3417     /* mark bdiagonal */
3418     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3419     bdiag_rev[n-i-1] = bdiag[i+1];
3420     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3421     bjtmp = bj + bdiag[i];
3422     batmp = ba + bdiag[i];
3423     *bjtmp = i;
3424     *batmp = diag_tmp; /* rtmp[i]; */
3425     if (*batmp == 0.0) {
3426       *batmp = dt+shift;
3427       /* printf(" row %d add shift %g\n",i,shift); */
3428     }
3429     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3430     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3431 
3432     bjtmp = bj + bdiag[i+1]+1;
3433     batmp = ba + bdiag[i+1]+1;
3434     for (k=0; k<ncut; k++){
3435       bjtmp[k] = jtmp[nzi_bl+1+k];
3436       batmp[k] = vtmp[nzi_bl+1+k];
3437       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3438     }
3439     /* printf("\n"); */
3440 
3441     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3442     /*
3443     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3444     printf(" ----------------------------\n");
3445     */
3446   } /* for (i=0; i<n; i++) */
3447   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3448   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3449 
3450   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3451   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3452 
3453   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3454   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3455   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3456   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3457 
3458   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3459   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3460 
3461   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3462   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3463   if (row_identity && icol_identity) {
3464     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3465   } else {
3466     B->ops->solve = MatSolve_SeqAIJ;
3467   }
3468 
3469   B->ops->solveadd          = 0;
3470   B->ops->solvetranspose    = 0;
3471   B->ops->solvetransposeadd = 0;
3472   B->ops->matsolve          = 0;
3473   B->assembled              = PETSC_TRUE;
3474   B->preallocated           = PETSC_TRUE;
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 /* a wraper of MatILUDTFactor_SeqAIJ() */
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3481 /*
3482     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3483 */
3484 
3485 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3486 {
3487   PetscErrorCode     ierr;
3488 
3489   PetscFunctionBegin;
3490   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 /*
3495    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3496    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3497 */
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3500 /*
3501     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3502 */
3503 
3504 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3505 {
3506   Mat            C=fact;
3507   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3508   IS             isrow = b->row,isicol = b->icol;
3509   PetscErrorCode ierr;
3510   const PetscInt *r,*ic,*ics;
3511   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3512   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3513   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3514   PetscReal      dt=info->dt,shift=info->shiftamount;
3515   PetscTruth     row_identity, col_identity;
3516 
3517   PetscFunctionBegin;
3518   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3519   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3520   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3521   ics  = ic;
3522 
3523   for (i=0; i<n; i++){
3524     /* initialize rtmp array */
3525     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3526     bjtmp = bj + bi[i];
3527     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3528     rtmp[i] = 0.0;
3529     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3530     bjtmp = bj + bdiag[i+1] + 1;
3531     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3532 
3533     /* load in initial unfactored row of A */
3534     /* printf("row %d\n",i); */
3535     nz    = ai[r[i]+1] - ai[r[i]];
3536     ajtmp = aj + ai[r[i]];
3537     v     = aa + ai[r[i]];
3538     for (j=0; j<nz; j++) {
3539       rtmp[ics[*ajtmp++]] = v[j];
3540       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3541     }
3542     /* printf("\n"); */
3543 
3544     /* numerical factorization */
3545     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3546     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3547     k = 0;
3548     while (k < nzl){
3549       row   = *bjtmp++;
3550       /* printf("  prow %d\n",row); */
3551       pc         = rtmp + row;
3552       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3553       multiplier = (*pc) * (*pv);
3554       *pc        = multiplier;
3555       if (PetscAbsScalar(multiplier) > dt){
3556         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3557         pv         = b->a + bdiag[row+1] + 1;
3558         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3559         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3560         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
3561       }
3562       k++;
3563     }
3564 
3565     /* finished row so stick it into b->a */
3566     /* L-part */
3567     pv = b->a + bi[i] ;
3568     pj = bj + bi[i] ;
3569     nzl = bi[i+1] - bi[i];
3570     for (j=0; j<nzl; j++) {
3571       pv[j] = rtmp[pj[j]];
3572       /* printf(" (%d,%g),",pj[j],pv[j]); */
3573     }
3574 
3575     /* diagonal: invert diagonal entries for simplier triangular solves */
3576     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3577     b->a[bdiag[i]] = 1.0/rtmp[i];
3578     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3579 
3580     /* U-part */
3581     pv = b->a + bdiag[i+1] + 1;
3582     pj = bj + bdiag[i+1] + 1;
3583     nzu = bdiag[i] - bdiag[i+1] - 1;
3584     for (j=0; j<nzu; j++) {
3585       pv[j] = rtmp[pj[j]];
3586       /* printf(" (%d,%g),",pj[j],pv[j]); */
3587     }
3588     /* printf("\n"); */
3589   }
3590 
3591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3594 
3595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3597   if (row_identity && col_identity) {
3598     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3599   } else {
3600     C->ops->solve   = MatSolve_SeqAIJ;
3601   }
3602   C->ops->solveadd           = 0;
3603   C->ops->solvetranspose     = 0;
3604   C->ops->solvetransposeadd  = 0;
3605   C->ops->matsolve           = 0;
3606   C->assembled    = PETSC_TRUE;
3607   C->preallocated = PETSC_TRUE;
3608   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3609   PetscFunctionReturn(0);
3610 }
3611