xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 794bee337f0d5af71ccfe93f8f7f88922f7fb150)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
116   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
117   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
119     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
121   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
122     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
123     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
124     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
125     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
127   (*B)->factortype = ftype;
128   PetscFunctionReturn(0);
129 }
130 EXTERN_C_END
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
134 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
135 {
136   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
137   IS                 isicol;
138   PetscErrorCode     ierr;
139   const PetscInt     *r,*ic;
140   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
141   PetscInt           *bi,*bj,*ajtmp;
142   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
143   PetscReal          f;
144   PetscInt           nlnk,*lnk,k,**bi_ptr;
145   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
146   PetscBT            lnkbt;
147 
148   PetscFunctionBegin;
149   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
150   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
151   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
152   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
153 
154   /* get new row pointers */
155   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
156   bi[0] = 0;
157 
158   /* bdiag is location of diagonal in factor */
159   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
160   bdiag[0] = 0;
161 
162   /* linked list for storing column indices of the active row */
163   nlnk = n + 1;
164   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
165 
166   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
167 
168   /* initial FreeSpace size is f*(ai[n]+1) */
169   f = info->fill;
170   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
171   current_space = free_space;
172 
173   for (i=0; i<n; i++) {
174     /* copy previous fill into linked list */
175     nzi = 0;
176     nnz = ai[r[i]+1] - ai[r[i]];
177     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
178     ajtmp = aj + ai[r[i]];
179     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
180     nzi += nlnk;
181 
182     /* add pivot rows into linked list */
183     row = lnk[n];
184     while (row < i) {
185       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
186       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
187       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
188       nzi += nlnk;
189       row  = lnk[row];
190     }
191     bi[i+1] = bi[i] + nzi;
192     im[i]   = nzi;
193 
194     /* mark bdiag */
195     nzbd = 0;
196     nnz  = nzi;
197     k    = lnk[n];
198     while (nnz-- && k < i){
199       nzbd++;
200       k = lnk[k];
201     }
202     bdiag[i] = bi[i] + nzbd;
203 
204     /* if free space is not available, make more free space */
205     if (current_space->local_remaining<nzi) {
206       nnz = (n - i)*nzi; /* estimated and max additional space needed */
207       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
208       reallocs++;
209     }
210 
211     /* copy data into free space, then initialize lnk */
212     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
213     bi_ptr[i] = current_space->array;
214     current_space->array           += nzi;
215     current_space->local_used      += nzi;
216     current_space->local_remaining -= nzi;
217   }
218 #if defined(PETSC_USE_INFO)
219   if (ai[n] != 0) {
220     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
222     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
223     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
224     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
225   } else {
226     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
227   }
228 #endif
229 
230   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
231   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
232 
233   /* destroy list of free space and other temporary array(s) */
234   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
235   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
236   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
237   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
238 
239   /* put together the new matrix */
240   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
241   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
242   b    = (Mat_SeqAIJ*)(B)->data;
243   b->free_a       = PETSC_TRUE;
244   b->free_ij      = PETSC_TRUE;
245   b->singlemalloc = PETSC_FALSE;
246   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
247   b->j          = bj;
248   b->i          = bi;
249   b->diag       = bdiag;
250   b->ilen       = 0;
251   b->imax       = 0;
252   b->row        = isrow;
253   b->col        = iscol;
254   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
255   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
256   b->icol       = isicol;
257   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
258 
259   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
260   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
261   b->maxnz = b->nz = bi[n] ;
262 
263   (B)->factortype            = MAT_FACTOR_LU;
264   (B)->info.factor_mallocs   = reallocs;
265   (B)->info.fill_ratio_given = f;
266 
267   if (ai[n]) {
268     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
269   } else {
270     (B)->info.fill_ratio_needed = 0.0;
271   }
272   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
273   if (a->inode.size) {
274     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
281 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
282 {
283   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
284   IS                 isicol;
285   PetscErrorCode     ierr;
286   const PetscInt     *r,*ic;
287   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
288   PetscInt           *bi,*bj,*ajtmp;
289   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
290   PetscReal          f;
291   PetscInt           nlnk,*lnk,k,**bi_ptr;
292   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
293   PetscBT            lnkbt;
294 
295   PetscFunctionBegin;
296   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
297   /*
298   PetscBool          olddatastruct=PETSC_FALSE;
299   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
300   if(olddatastruct){
301     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
302     PetscFunctionReturn(0);
303   }
304   */
305   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
306   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
307   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
308   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
309 
310   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
312   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
313   bi[0] = bdiag[0] = 0;
314 
315   /* linked list for storing column indices of the active row */
316   nlnk = n + 1;
317   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
318 
319   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
320 
321   /* initial FreeSpace size is f*(ai[n]+1) */
322   f = info->fill;
323   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
324   current_space = free_space;
325 
326   for (i=0; i<n; i++) {
327     /* copy previous fill into linked list */
328     nzi = 0;
329     nnz = ai[r[i]+1] - ai[r[i]];
330     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
331     ajtmp = aj + ai[r[i]];
332     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
333     nzi += nlnk;
334 
335     /* add pivot rows into linked list */
336     row = lnk[n];
337     while (row < i){
338       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
339       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
341       nzi  += nlnk;
342       row   = lnk[row];
343     }
344     bi[i+1] = bi[i] + nzi;
345     im[i]   = nzi;
346 
347     /* mark bdiag */
348     nzbd = 0;
349     nnz  = nzi;
350     k    = lnk[n];
351     while (nnz-- && k < i){
352       nzbd++;
353       k = lnk[k];
354     }
355     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
356 
357     /* if free space is not available, make more free space */
358     if (current_space->local_remaining<nzi) {
359       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
360       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
361       reallocs++;
362     }
363 
364     /* copy data into free space, then initialize lnk */
365     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
366     bi_ptr[i] = current_space->array;
367     current_space->array           += nzi;
368     current_space->local_used      += nzi;
369     current_space->local_remaining -= nzi;
370   }
371 
372   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
373   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
374 
375   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
376   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
377   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
378   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
379   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
380 
381   /* put together the new matrix */
382   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
383   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
384   b    = (Mat_SeqAIJ*)(B)->data;
385   b->free_a       = PETSC_TRUE;
386   b->free_ij      = PETSC_TRUE;
387   b->singlemalloc = PETSC_FALSE;
388   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
389   b->j          = bj;
390   b->i          = bi;
391   b->diag       = bdiag;
392   b->ilen       = 0;
393   b->imax       = 0;
394   b->row        = isrow;
395   b->col        = iscol;
396   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
397   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
398   b->icol       = isicol;
399   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
400 
401   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
402   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
403   b->maxnz = b->nz = bdiag[0]+1;
404   B->factortype            = MAT_FACTOR_LU;
405   B->info.factor_mallocs   = reallocs;
406   B->info.fill_ratio_given = f;
407 
408   if (ai[n]) {
409     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
410   } else {
411     B->info.fill_ratio_needed = 0.0;
412   }
413 #if defined(PETSC_USE_INFO)
414   if (ai[n] != 0) {
415     PetscReal af = B->info.fill_ratio_needed;
416     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
417     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
418     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
419     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
420   } else {
421     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
422   }
423 #endif
424   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
425   if (a->inode.size) {
426     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 /*
432     Trouble in factorization, should we dump the original matrix?
433 */
434 #undef __FUNCT__
435 #define __FUNCT__ "MatFactorDumpMatrix"
436 PetscErrorCode MatFactorDumpMatrix(Mat A)
437 {
438   PetscErrorCode ierr;
439   PetscBool      flg = PETSC_FALSE;
440 
441   PetscFunctionBegin;
442   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
443   if (flg) {
444     PetscViewer viewer;
445     char        filename[PETSC_MAX_PATH_LEN];
446 
447     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
448     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
449     ierr = MatView(A,viewer);CHKERRQ(ierr);
450     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
451   }
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
457 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
458 {
459   Mat              C=B;
460   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
461   IS               isrow = b->row,isicol = b->icol;
462   PetscErrorCode   ierr;
463   const PetscInt   *r,*ic,*ics;
464   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
465   PetscInt         i,j,k,nz,nzL,row,*pj;
466   const PetscInt   *ajtmp,*bjtmp;
467   MatScalar        *rtmp,*pc,multiplier,*pv;
468   const  MatScalar *aa=a->a,*v;
469   PetscBool        row_identity,col_identity;
470   FactorShiftCtx   sctx;
471   const PetscInt   *ddiag;
472   PetscReal        rs;
473   MatScalar        d;
474 
475   PetscFunctionBegin;
476   /* MatPivotSetUp(): initialize shift context sctx */
477   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
478 
479   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
480     ddiag          = a->diag;
481     sctx.shift_top = info->zeropivot;
482     for (i=0; i<n; i++) {
483       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
484       d  = (aa)[ddiag[i]];
485       rs = -PetscAbsScalar(d) - PetscRealPart(d);
486       v  = aa+ai[i];
487       nz = ai[i+1] - ai[i];
488       for (j=0; j<nz; j++)
489 	rs += PetscAbsScalar(v[j]);
490       if (rs>sctx.shift_top) sctx.shift_top = rs;
491     }
492     sctx.shift_top   *= 1.1;
493     sctx.nshift_max   = 5;
494     sctx.shift_lo     = 0.;
495     sctx.shift_hi     = 1.;
496   }
497 
498   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
499   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
500   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
501   ics  = ic;
502 
503   do {
504     sctx.newshift = PETSC_FALSE;
505     for (i=0; i<n; i++){
506       /* zero rtmp */
507       /* L part */
508       nz    = bi[i+1] - bi[i];
509       bjtmp = bj + bi[i];
510       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
511 
512       /* U part */
513       nz = bdiag[i]-bdiag[i+1];
514       bjtmp = bj + bdiag[i+1]+1;
515       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516 
517       /* load in initial (unfactored row) */
518       nz    = ai[r[i]+1] - ai[r[i]];
519       ajtmp = aj + ai[r[i]];
520       v     = aa + ai[r[i]];
521       for (j=0; j<nz; j++) {
522         rtmp[ics[ajtmp[j]]] = v[j];
523       }
524       /* ZeropivotApply() */
525       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
526 
527       /* elimination */
528       bjtmp = bj + bi[i];
529       row   = *bjtmp++;
530       nzL   = bi[i+1] - bi[i];
531       for(k=0; k < nzL;k++) {
532         pc = rtmp + row;
533         if (*pc != 0.0) {
534           pv         = b->a + bdiag[row];
535           multiplier = *pc * (*pv);
536           *pc        = multiplier;
537           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
538 	  pv = b->a + bdiag[row+1]+1;
539 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
540           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
541           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
542         }
543         row = *bjtmp++;
544       }
545 
546       /* finished row so stick it into b->a */
547       rs = 0.0;
548       /* L part */
549       pv   = b->a + bi[i] ;
550       pj   = b->j + bi[i] ;
551       nz   = bi[i+1] - bi[i];
552       for (j=0; j<nz; j++) {
553         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
554       }
555 
556       /* U part */
557       pv = b->a + bdiag[i+1]+1;
558       pj = b->j + bdiag[i+1]+1;
559       nz = bdiag[i] - bdiag[i+1]-1;
560       for (j=0; j<nz; j++) {
561         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
562       }
563 
564       sctx.rs  = rs;
565       sctx.pv  = rtmp[i];
566       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
567       if(sctx.newshift) break; /* break for-loop */
568       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
569 
570       /* Mark diagonal and invert diagonal for simplier triangular solves */
571       pv  = b->a + bdiag[i];
572       *pv = 1.0/rtmp[i];
573 
574     } /* endof for (i=0; i<n; i++){ */
575 
576     /* MatPivotRefine() */
577     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
578       /*
579        * if no shift in this attempt & shifting & started shifting & can refine,
580        * then try lower shift
581        */
582       sctx.shift_hi       = sctx.shift_fraction;
583       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
584       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
585       sctx.newshift       = PETSC_TRUE;
586       sctx.nshift++;
587     }
588   } while (sctx.newshift);
589 
590   ierr = PetscFree(rtmp);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
593 
594   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
595   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
596   if (row_identity && col_identity) {
597     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
598   } else {
599     C->ops->solve = MatSolve_SeqAIJ;
600   }
601   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
602   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
603   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
604   C->ops->matsolve           = MatMatSolve_SeqAIJ;
605   C->assembled    = PETSC_TRUE;
606   C->preallocated = PETSC_TRUE;
607   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
608 
609   /* MatShiftView(A,info,&sctx) */
610   if (sctx.nshift){
611     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
612       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
613     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
614       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
615     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
616       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
617     }
618   }
619   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
626 {
627   Mat             C=B;
628   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
629   IS              isrow = b->row,isicol = b->icol;
630   PetscErrorCode  ierr;
631   const PetscInt   *r,*ic,*ics;
632   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
633   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
634   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
635   MatScalar       *pv,*rtmp,*pc,multiplier,d;
636   const MatScalar *v,*aa=a->a;
637   PetscReal       rs=0.0;
638   FactorShiftCtx  sctx;
639   const PetscInt  *ddiag;
640   PetscBool       row_identity, col_identity;
641 
642   PetscFunctionBegin;
643   /* MatPivotSetUp(): initialize shift context sctx */
644   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
645 
646   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
647     ddiag          = a->diag;
648     sctx.shift_top = info->zeropivot;
649     for (i=0; i<n; i++) {
650       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
651       d  = (aa)[ddiag[i]];
652       rs = -PetscAbsScalar(d) - PetscRealPart(d);
653       v  = aa+ai[i];
654       nz = ai[i+1] - ai[i];
655       for (j=0; j<nz; j++)
656 	rs += PetscAbsScalar(v[j]);
657       if (rs>sctx.shift_top) sctx.shift_top = rs;
658     }
659     sctx.shift_top   *= 1.1;
660     sctx.nshift_max   = 5;
661     sctx.shift_lo     = 0.;
662     sctx.shift_hi     = 1.;
663   }
664 
665   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
666   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
667   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
668   ics  = ic;
669 
670   do {
671     sctx.newshift = PETSC_FALSE;
672     for (i=0; i<n; i++){
673       nz    = bi[i+1] - bi[i];
674       bjtmp = bj + bi[i];
675       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
676 
677       /* load in initial (unfactored row) */
678       nz    = ai[r[i]+1] - ai[r[i]];
679       ajtmp = aj + ai[r[i]];
680       v     = aa + ai[r[i]];
681       for (j=0; j<nz; j++) {
682         rtmp[ics[ajtmp[j]]] = v[j];
683       }
684       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
685 
686       row = *bjtmp++;
687       while  (row < i) {
688         pc = rtmp + row;
689         if (*pc != 0.0) {
690           pv         = b->a + diag_offset[row];
691           pj         = b->j + diag_offset[row] + 1;
692           multiplier = *pc / *pv++;
693           *pc        = multiplier;
694           nz         = bi[row+1] - diag_offset[row] - 1;
695           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
696           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
697         }
698         row = *bjtmp++;
699       }
700       /* finished row so stick it into b->a */
701       pv   = b->a + bi[i] ;
702       pj   = b->j + bi[i] ;
703       nz   = bi[i+1] - bi[i];
704       diag = diag_offset[i] - bi[i];
705       rs   = 0.0;
706       for (j=0; j<nz; j++) {
707         pv[j] = rtmp[pj[j]];
708         rs   += PetscAbsScalar(pv[j]);
709       }
710       rs   -= PetscAbsScalar(pv[diag]);
711 
712       sctx.rs  = rs;
713       sctx.pv  = pv[diag];
714       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
715       if (sctx.newshift) break;
716       pv[diag] = sctx.pv;
717     }
718 
719     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
720       /*
721        * if no shift in this attempt & shifting & started shifting & can refine,
722        * then try lower shift
723        */
724       sctx.shift_hi       = sctx.shift_fraction;
725       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
726       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
727       sctx.newshift       = PETSC_TRUE;
728       sctx.nshift++;
729     }
730   } while (sctx.newshift);
731 
732   /* invert diagonal entries for simplier triangular solves */
733   for (i=0; i<n; i++) {
734     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
735   }
736   ierr = PetscFree(rtmp);CHKERRQ(ierr);
737   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
739 
740   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
741   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
742   if (row_identity && col_identity) {
743     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
744   } else {
745     C->ops->solve   = MatSolve_SeqAIJ_inplace;
746   }
747   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
748   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
749   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
750   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
751   C->assembled    = PETSC_TRUE;
752   C->preallocated = PETSC_TRUE;
753   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
754   if (sctx.nshift){
755      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
756       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
757     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
758       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
759     }
760   }
761   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
762   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
763   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 /*
768    This routine implements inplace ILU(0) with row or/and column permutations.
769    Input:
770      A - original matrix
771    Output;
772      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
773          a->j (col index) is permuted by the inverse of colperm, then sorted
774          a->a reordered accordingly with a->j
775          a->diag (ptr to diagonal elements) is updated.
776 */
777 #undef __FUNCT__
778 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
779 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
780 {
781   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
782   IS             isrow = a->row,isicol = a->icol;
783   PetscErrorCode ierr;
784   const PetscInt *r,*ic,*ics;
785   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
786   PetscInt       *ajtmp,nz,row;
787   PetscInt       *diag = a->diag,nbdiag,*pj;
788   PetscScalar    *rtmp,*pc,multiplier,d;
789   MatScalar      *pv,*v;
790   PetscReal      rs;
791   FactorShiftCtx sctx;
792   const  MatScalar *aa=a->a,*vtmp;
793 
794   PetscFunctionBegin;
795   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
796 
797   /* MatPivotSetUp(): initialize shift context sctx */
798   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
799 
800   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
801     const PetscInt   *ddiag = a->diag;
802     sctx.shift_top = info->zeropivot;
803     for (i=0; i<n; i++) {
804       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
805       d  = (aa)[ddiag[i]];
806       rs = -PetscAbsScalar(d) - PetscRealPart(d);
807       vtmp  = aa+ai[i];
808       nz = ai[i+1] - ai[i];
809       for (j=0; j<nz; j++)
810 	rs += PetscAbsScalar(vtmp[j]);
811       if (rs>sctx.shift_top) sctx.shift_top = rs;
812     }
813     sctx.shift_top   *= 1.1;
814     sctx.nshift_max   = 5;
815     sctx.shift_lo     = 0.;
816     sctx.shift_hi     = 1.;
817   }
818 
819   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
820   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
821   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
822   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
823   ics = ic;
824 
825 #if defined(MV)
826   sctx.shift_top      = 0.;
827   sctx.nshift_max     = 0;
828   sctx.shift_lo       = 0.;
829   sctx.shift_hi       = 0.;
830   sctx.shift_fraction = 0.;
831 
832   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
833     sctx.shift_top = 0.;
834     for (i=0; i<n; i++) {
835       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
836       d  = (a->a)[diag[i]];
837       rs = -PetscAbsScalar(d) - PetscRealPart(d);
838       v  = a->a+ai[i];
839       nz = ai[i+1] - ai[i];
840       for (j=0; j<nz; j++)
841 	rs += PetscAbsScalar(v[j]);
842       if (rs>sctx.shift_top) sctx.shift_top = rs;
843     }
844     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
845     sctx.shift_top    *= 1.1;
846     sctx.nshift_max   = 5;
847     sctx.shift_lo     = 0.;
848     sctx.shift_hi     = 1.;
849   }
850 
851   sctx.shift_amount = 0.;
852   sctx.nshift       = 0;
853 #endif
854 
855   do {
856     sctx.newshift = PETSC_FALSE;
857     for (i=0; i<n; i++){
858       /* load in initial unfactored row */
859       nz    = ai[r[i]+1] - ai[r[i]];
860       ajtmp = aj + ai[r[i]];
861       v     = a->a + ai[r[i]];
862       /* sort permuted ajtmp and values v accordingly */
863       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
864       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
865 
866       diag[r[i]] = ai[r[i]];
867       for (j=0; j<nz; j++) {
868         rtmp[ajtmp[j]] = v[j];
869         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
870       }
871       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
872 
873       row = *ajtmp++;
874       while  (row < i) {
875         pc = rtmp + row;
876         if (*pc != 0.0) {
877           pv         = a->a + diag[r[row]];
878           pj         = aj + diag[r[row]] + 1;
879 
880           multiplier = *pc / *pv++;
881           *pc        = multiplier;
882           nz         = ai[r[row]+1] - diag[r[row]] - 1;
883           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
884           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
885         }
886         row = *ajtmp++;
887       }
888       /* finished row so overwrite it onto a->a */
889       pv   = a->a + ai[r[i]] ;
890       pj   = aj + ai[r[i]] ;
891       nz   = ai[r[i]+1] - ai[r[i]];
892       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
893 
894       rs   = 0.0;
895       for (j=0; j<nz; j++) {
896         pv[j] = rtmp[pj[j]];
897         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
898       }
899 
900       sctx.rs  = rs;
901       sctx.pv  = pv[nbdiag];
902       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
903       if (sctx.newshift) break;
904       pv[nbdiag] = sctx.pv;
905     }
906 
907     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
908       /*
909        * if no shift in this attempt & shifting & started shifting & can refine,
910        * then try lower shift
911        */
912       sctx.shift_hi        = sctx.shift_fraction;
913       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
914       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
915       sctx.newshift         = PETSC_TRUE;
916       sctx.nshift++;
917     }
918   } while (sctx.newshift);
919 
920   /* invert diagonal entries for simplier triangular solves */
921   for (i=0; i<n; i++) {
922     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
923   }
924 
925   ierr = PetscFree(rtmp);CHKERRQ(ierr);
926   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
927   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
928   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
929   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
930   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
931   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
932   A->assembled = PETSC_TRUE;
933   A->preallocated = PETSC_TRUE;
934   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
935   if (sctx.nshift){
936     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
937       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
938     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
939       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 /* ----------------------------------------------------------- */
946 #undef __FUNCT__
947 #define __FUNCT__ "MatLUFactor_SeqAIJ"
948 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
949 {
950   PetscErrorCode ierr;
951   Mat            C;
952 
953   PetscFunctionBegin;
954   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
955   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
956   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
957   A->ops->solve            = C->ops->solve;
958   A->ops->solvetranspose   = C->ops->solvetranspose;
959   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
960   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 /* ----------------------------------------------------------- */
964 
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
968 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
969 {
970   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
971   IS                iscol = a->col,isrow = a->row;
972   PetscErrorCode    ierr;
973   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
974   PetscInt          nz;
975   const PetscInt    *rout,*cout,*r,*c;
976   PetscScalar       *x,*tmp,*tmps,sum;
977   const PetscScalar *b;
978   const MatScalar   *aa = a->a,*v;
979 
980   PetscFunctionBegin;
981   if (!n) PetscFunctionReturn(0);
982 
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
985   tmp  = a->solve_work;
986 
987   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
988   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
989 
990   /* forward solve the lower triangular */
991   tmp[0] = b[*r++];
992   tmps   = tmp;
993   for (i=1; i<n; i++) {
994     v   = aa + ai[i] ;
995     vi  = aj + ai[i] ;
996     nz  = a->diag[i] - ai[i];
997     sum = b[*r++];
998     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
999     tmp[i] = sum;
1000   }
1001 
1002   /* backward solve the upper triangular */
1003   for (i=n-1; i>=0; i--){
1004     v   = aa + a->diag[i] + 1;
1005     vi  = aj + a->diag[i] + 1;
1006     nz  = ai[i+1] - a->diag[i] - 1;
1007     sum = tmp[i];
1008     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1009     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1010   }
1011 
1012   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1013   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1014   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1015   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1016   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1022 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1023 {
1024   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1025   IS              iscol = a->col,isrow = a->row;
1026   PetscErrorCode  ierr;
1027   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1028   PetscInt        nz,neq;
1029   const PetscInt  *rout,*cout,*r,*c;
1030   PetscScalar     *x,*b,*tmp,*tmps,sum;
1031   const MatScalar *aa = a->a,*v;
1032   PetscBool       bisdense,xisdense;
1033 
1034   PetscFunctionBegin;
1035   if (!n) PetscFunctionReturn(0);
1036 
1037   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1038   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1039   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1040   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1041 
1042   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1043   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1044 
1045   tmp  = a->solve_work;
1046   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1047   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1048 
1049   for (neq=0; neq<B->cmap->n; neq++){
1050     /* forward solve the lower triangular */
1051     tmp[0] = b[r[0]];
1052     tmps   = tmp;
1053     for (i=1; i<n; i++) {
1054       v   = aa + ai[i] ;
1055       vi  = aj + ai[i] ;
1056       nz  = a->diag[i] - ai[i];
1057       sum = b[r[i]];
1058       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1059       tmp[i] = sum;
1060     }
1061     /* backward solve the upper triangular */
1062     for (i=n-1; i>=0; i--){
1063       v   = aa + a->diag[i] + 1;
1064       vi  = aj + a->diag[i] + 1;
1065       nz  = ai[i+1] - a->diag[i] - 1;
1066       sum = tmp[i];
1067       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1068       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1069     }
1070 
1071     b += n;
1072     x += n;
1073   }
1074   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1075   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1076   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1077   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1078   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1084 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1085 {
1086   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1087   IS              iscol = a->col,isrow = a->row;
1088   PetscErrorCode  ierr;
1089   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1090   PetscInt        nz,neq;
1091   const PetscInt  *rout,*cout,*r,*c;
1092   PetscScalar     *x,*b,*tmp,sum;
1093   const MatScalar *aa = a->a,*v;
1094   PetscBool       bisdense,xisdense;
1095 
1096   PetscFunctionBegin;
1097   if (!n) PetscFunctionReturn(0);
1098 
1099   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1100   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1101   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1102   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1103 
1104   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1105   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1106 
1107   tmp  = a->solve_work;
1108   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1109   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1110 
1111   for (neq=0; neq<B->cmap->n; neq++){
1112     /* forward solve the lower triangular */
1113     tmp[0] = b[r[0]];
1114     v      = aa;
1115     vi     = aj;
1116     for (i=1; i<n; i++) {
1117       nz  = ai[i+1] - ai[i];
1118       sum = b[r[i]];
1119       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1120       tmp[i] = sum;
1121       v += nz; vi += nz;
1122     }
1123 
1124     /* backward solve the upper triangular */
1125     for (i=n-1; i>=0; i--){
1126       v   = aa + adiag[i+1]+1;
1127       vi  = aj + adiag[i+1]+1;
1128       nz  = adiag[i]-adiag[i+1]-1;
1129       sum = tmp[i];
1130       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1131       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1132     }
1133 
1134     b += n;
1135     x += n;
1136   }
1137   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1138   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1139   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1140   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1141   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1147 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1148 {
1149   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1150   IS              iscol = a->col,isrow = a->row;
1151   PetscErrorCode  ierr;
1152   const PetscInt  *r,*c,*rout,*cout;
1153   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1154   PetscInt        nz,row;
1155   PetscScalar     *x,*b,*tmp,*tmps,sum;
1156   const MatScalar *aa = a->a,*v;
1157 
1158   PetscFunctionBegin;
1159   if (!n) PetscFunctionReturn(0);
1160 
1161   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1162   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1163   tmp  = a->solve_work;
1164 
1165   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1166   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1167 
1168   /* forward solve the lower triangular */
1169   tmp[0] = b[*r++];
1170   tmps   = tmp;
1171   for (row=1; row<n; row++) {
1172     i   = rout[row]; /* permuted row */
1173     v   = aa + ai[i] ;
1174     vi  = aj + ai[i] ;
1175     nz  = a->diag[i] - ai[i];
1176     sum = b[*r++];
1177     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1178     tmp[row] = sum;
1179   }
1180 
1181   /* backward solve the upper triangular */
1182   for (row=n-1; row>=0; row--){
1183     i   = rout[row]; /* permuted row */
1184     v   = aa + a->diag[i] + 1;
1185     vi  = aj + a->diag[i] + 1;
1186     nz  = ai[i+1] - a->diag[i] - 1;
1187     sum = tmp[row];
1188     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1189     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1190   }
1191 
1192   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1193   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1194   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1195   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1196   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 /* ----------------------------------------------------------- */
1201 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1204 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1205 {
1206   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1207   PetscErrorCode    ierr;
1208   PetscInt          n = A->rmap->n;
1209   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1210   PetscScalar       *x;
1211   const PetscScalar *b;
1212   const MatScalar   *aa = a->a;
1213 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1214   PetscInt          adiag_i,i,nz,ai_i;
1215   const PetscInt    *vi;
1216   const MatScalar   *v;
1217   PetscScalar       sum;
1218 #endif
1219 
1220   PetscFunctionBegin;
1221   if (!n) PetscFunctionReturn(0);
1222 
1223   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1224   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1225 
1226 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1227   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1228 #else
1229   /* forward solve the lower triangular */
1230   x[0] = b[0];
1231   for (i=1; i<n; i++) {
1232     ai_i = ai[i];
1233     v    = aa + ai_i;
1234     vi   = aj + ai_i;
1235     nz   = adiag[i] - ai_i;
1236     sum  = b[i];
1237     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1238     x[i] = sum;
1239   }
1240 
1241   /* backward solve the upper triangular */
1242   for (i=n-1; i>=0; i--){
1243     adiag_i = adiag[i];
1244     v       = aa + adiag_i + 1;
1245     vi      = aj + adiag_i + 1;
1246     nz      = ai[i+1] - adiag_i - 1;
1247     sum     = x[i];
1248     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1249     x[i]    = sum*aa[adiag_i];
1250   }
1251 #endif
1252   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1253   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1254   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1260 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1261 {
1262   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1263   IS                iscol = a->col,isrow = a->row;
1264   PetscErrorCode    ierr;
1265   PetscInt          i, n = A->rmap->n,j;
1266   PetscInt          nz;
1267   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1268   PetscScalar       *x,*tmp,sum;
1269   const PetscScalar *b;
1270   const MatScalar   *aa = a->a,*v;
1271 
1272   PetscFunctionBegin;
1273   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1274 
1275   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1276   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1277   tmp  = a->solve_work;
1278 
1279   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1280   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1281 
1282   /* forward solve the lower triangular */
1283   tmp[0] = b[*r++];
1284   for (i=1; i<n; i++) {
1285     v   = aa + ai[i] ;
1286     vi  = aj + ai[i] ;
1287     nz  = a->diag[i] - ai[i];
1288     sum = b[*r++];
1289     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1290     tmp[i] = sum;
1291   }
1292 
1293   /* backward solve the upper triangular */
1294   for (i=n-1; i>=0; i--){
1295     v   = aa + a->diag[i] + 1;
1296     vi  = aj + a->diag[i] + 1;
1297     nz  = ai[i+1] - a->diag[i] - 1;
1298     sum = tmp[i];
1299     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1300     tmp[i] = sum*aa[a->diag[i]];
1301     x[*c--] += tmp[i];
1302   }
1303 
1304   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1305   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1306   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1307   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1308   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1309 
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1315 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1316 {
1317   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1318   IS                iscol = a->col,isrow = a->row;
1319   PetscErrorCode    ierr;
1320   PetscInt          i, n = A->rmap->n,j;
1321   PetscInt          nz;
1322   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1323   PetscScalar       *x,*tmp,sum;
1324   const PetscScalar *b;
1325   const MatScalar   *aa = a->a,*v;
1326 
1327   PetscFunctionBegin;
1328   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1329 
1330   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1331   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1332   tmp  = a->solve_work;
1333 
1334   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1335   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1336 
1337   /* forward solve the lower triangular */
1338   tmp[0] = b[r[0]];
1339   v      = aa;
1340   vi     = aj;
1341   for (i=1; i<n; i++) {
1342     nz  = ai[i+1] - ai[i];
1343     sum = b[r[i]];
1344     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1345     tmp[i] = sum;
1346     v += nz; vi += nz;
1347   }
1348 
1349   /* backward solve the upper triangular */
1350   v  = aa + adiag[n-1];
1351   vi = aj + adiag[n-1];
1352   for (i=n-1; i>=0; i--){
1353     nz  = adiag[i] - adiag[i+1] - 1;
1354     sum = tmp[i];
1355     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1356     tmp[i] = sum*v[nz];
1357     x[c[i]] += tmp[i];
1358     v += nz+1; vi += nz+1;
1359   }
1360 
1361   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1362   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1363   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1364   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1365   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1366 
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1372 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1373 {
1374   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1375   IS                iscol = a->col,isrow = a->row;
1376   PetscErrorCode    ierr;
1377   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1378   PetscInt          i,n = A->rmap->n,j;
1379   PetscInt          nz;
1380   PetscScalar       *x,*tmp,s1;
1381   const MatScalar   *aa = a->a,*v;
1382   const PetscScalar *b;
1383 
1384   PetscFunctionBegin;
1385   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1386   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1387   tmp  = a->solve_work;
1388 
1389   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1390   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1391 
1392   /* copy the b into temp work space according to permutation */
1393   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1394 
1395   /* forward solve the U^T */
1396   for (i=0; i<n; i++) {
1397     v   = aa + diag[i] ;
1398     vi  = aj + diag[i] + 1;
1399     nz  = ai[i+1] - diag[i] - 1;
1400     s1  = tmp[i];
1401     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1402     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1403     tmp[i] = s1;
1404   }
1405 
1406   /* backward solve the L^T */
1407   for (i=n-1; i>=0; i--){
1408     v   = aa + diag[i] - 1 ;
1409     vi  = aj + diag[i] - 1 ;
1410     nz  = diag[i] - ai[i];
1411     s1  = tmp[i];
1412     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1413   }
1414 
1415   /* copy tmp into x according to permutation */
1416   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1417 
1418   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1419   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1420   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1421   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1422 
1423   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1429 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1430 {
1431   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1432   IS                iscol = a->col,isrow = a->row;
1433   PetscErrorCode    ierr;
1434   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1435   PetscInt          i,n = A->rmap->n,j;
1436   PetscInt          nz;
1437   PetscScalar       *x,*tmp,s1;
1438   const MatScalar   *aa = a->a,*v;
1439   const PetscScalar *b;
1440 
1441   PetscFunctionBegin;
1442   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1443   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1444   tmp  = a->solve_work;
1445 
1446   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1447   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1448 
1449   /* copy the b into temp work space according to permutation */
1450   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1451 
1452   /* forward solve the U^T */
1453   for (i=0; i<n; i++) {
1454     v   = aa + adiag[i+1] + 1;
1455     vi  = aj + adiag[i+1] + 1;
1456     nz  = adiag[i] - adiag[i+1] - 1;
1457     s1  = tmp[i];
1458     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1459     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1460     tmp[i] = s1;
1461   }
1462 
1463   /* backward solve the L^T */
1464   for (i=n-1; i>=0; i--){
1465     v   = aa + ai[i];
1466     vi  = aj + ai[i];
1467     nz  = ai[i+1] - ai[i];
1468     s1  = tmp[i];
1469     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1470   }
1471 
1472   /* copy tmp into x according to permutation */
1473   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1474 
1475   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1476   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1477   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1478   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1479 
1480   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1486 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1487 {
1488   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1489   IS                iscol = a->col,isrow = a->row;
1490   PetscErrorCode    ierr;
1491   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1492   PetscInt          i,n = A->rmap->n,j;
1493   PetscInt          nz;
1494   PetscScalar       *x,*tmp,s1;
1495   const MatScalar   *aa = a->a,*v;
1496   const PetscScalar *b;
1497 
1498   PetscFunctionBegin;
1499   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1500   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1501   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1502   tmp  = a->solve_work;
1503 
1504   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1505   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1506 
1507   /* copy the b into temp work space according to permutation */
1508   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1509 
1510   /* forward solve the U^T */
1511   for (i=0; i<n; i++) {
1512     v   = aa + diag[i] ;
1513     vi  = aj + diag[i] + 1;
1514     nz  = ai[i+1] - diag[i] - 1;
1515     s1  = tmp[i];
1516     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1517     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1518     tmp[i] = s1;
1519   }
1520 
1521   /* backward solve the L^T */
1522   for (i=n-1; i>=0; i--){
1523     v   = aa + diag[i] - 1 ;
1524     vi  = aj + diag[i] - 1 ;
1525     nz  = diag[i] - ai[i];
1526     s1  = tmp[i];
1527     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1528   }
1529 
1530   /* copy tmp into x according to permutation */
1531   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1532 
1533   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1534   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1535   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1536   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1537 
1538   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1544 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1545 {
1546   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1547   IS                iscol = a->col,isrow = a->row;
1548   PetscErrorCode    ierr;
1549   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1550   PetscInt          i,n = A->rmap->n,j;
1551   PetscInt          nz;
1552   PetscScalar       *x,*tmp,s1;
1553   const MatScalar   *aa = a->a,*v;
1554   const PetscScalar *b;
1555 
1556   PetscFunctionBegin;
1557   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1558   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1559   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1560   tmp  = a->solve_work;
1561 
1562   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1563   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1564 
1565   /* copy the b into temp work space according to permutation */
1566   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1567 
1568   /* forward solve the U^T */
1569   for (i=0; i<n; i++) {
1570     v   = aa + adiag[i+1] + 1;
1571     vi  = aj + adiag[i+1] + 1;
1572     nz  = adiag[i] - adiag[i+1] - 1;
1573     s1  = tmp[i];
1574     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1575     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1576     tmp[i] = s1;
1577   }
1578 
1579 
1580   /* backward solve the L^T */
1581   for (i=n-1; i>=0; i--){
1582     v   = aa + ai[i] ;
1583     vi  = aj + ai[i];
1584     nz  = ai[i+1] - ai[i];
1585     s1  = tmp[i];
1586     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1587   }
1588 
1589   /* copy tmp into x according to permutation */
1590   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1591 
1592   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1593   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1594   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1595   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1596 
1597   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 /* ----------------------------------------------------------------*/
1602 
1603 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1604 
1605 /*
1606    ilu() under revised new data structure.
1607    Factored arrays bj and ba are stored as
1608      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1609 
1610    bi=fact->i is an array of size n+1, in which
1611    bi+
1612      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1613      bi[n]:  points to L(n-1,n-1)+1
1614 
1615   bdiag=fact->diag is an array of size n+1,in which
1616      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1617      bdiag[n]: points to entry of U(n-1,0)-1
1618 
1619    U(i,:) contains bdiag[i] as its last entry, i.e.,
1620     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1621 */
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1624 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1625 {
1626 
1627   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1628   PetscErrorCode     ierr;
1629   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1630   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1631   PetscBool          missing;
1632   IS                 isicol;
1633 
1634   PetscFunctionBegin;
1635   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1636   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1637   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1638   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1639 
1640   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1641   b    = (Mat_SeqAIJ*)(fact)->data;
1642 
1643   /* allocate matrix arrays for new data structure */
1644   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1645   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1646   b->singlemalloc = PETSC_TRUE;
1647   if (!b->diag){
1648     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1649     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1650   }
1651   bdiag = b->diag;
1652 
1653   if (n > 0) {
1654     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1655   }
1656 
1657   /* set bi and bj with new data structure */
1658   bi = b->i;
1659   bj = b->j;
1660 
1661   /* L part */
1662   bi[0] = 0;
1663   for (i=0; i<n; i++){
1664     nz = adiag[i] - ai[i];
1665     bi[i+1] = bi[i] + nz;
1666     aj = a->j + ai[i];
1667     for (j=0; j<nz; j++){
1668       /*   *bj = aj[j]; bj++; */
1669       bj[k++] = aj[j];
1670     }
1671   }
1672 
1673   /* U part */
1674   bdiag[n] = bi[n]-1;
1675   for (i=n-1; i>=0; i--){
1676     nz = ai[i+1] - adiag[i] - 1;
1677     aj = a->j + adiag[i] + 1;
1678     for (j=0; j<nz; j++){
1679       /*      *bj = aj[j]; bj++; */
1680       bj[k++] = aj[j];
1681     }
1682     /* diag[i] */
1683     /*    *bj = i; bj++; */
1684     bj[k++] = i;
1685     bdiag[i] = bdiag[i+1] + nz + 1;
1686   }
1687 
1688   fact->factortype             = MAT_FACTOR_ILU;
1689   fact->info.factor_mallocs    = 0;
1690   fact->info.fill_ratio_given  = info->fill;
1691   fact->info.fill_ratio_needed = 1.0;
1692   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1693 
1694   b       = (Mat_SeqAIJ*)(fact)->data;
1695   b->row  = isrow;
1696   b->col  = iscol;
1697   b->icol = isicol;
1698   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1699   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1700   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1706 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1707 {
1708   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1709   IS                 isicol;
1710   PetscErrorCode     ierr;
1711   const PetscInt     *r,*ic;
1712   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1713   PetscInt           *bi,*cols,nnz,*cols_lvl;
1714   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1715   PetscInt           i,levels,diagonal_fill;
1716   PetscBool          col_identity,row_identity;
1717   PetscReal          f;
1718   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1719   PetscBT            lnkbt;
1720   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1721   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1722   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1723 
1724   PetscFunctionBegin;
1725   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1726   /*
1727   PetscBool          olddatastruct=PETSC_FALSE;
1728   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1729   if(olddatastruct){
1730     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1731     PetscFunctionReturn(0);
1732   }
1733   */
1734 
1735   levels = (PetscInt)info->levels;
1736   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1737   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1738 
1739   if (!levels && row_identity && col_identity) {
1740     /* special case: ilu(0) with natural ordering */
1741     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1742     if (a->inode.size) {
1743       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1744     }
1745     PetscFunctionReturn(0);
1746   }
1747 
1748   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1749   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1750   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1751   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1752 
1753   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1754   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1755   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1756   bi[0] = bdiag[0] = 0;
1757 
1758   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1759 
1760   /* create a linked list for storing column indices of the active row */
1761   nlnk = n + 1;
1762   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1763 
1764   /* initial FreeSpace size is f*(ai[n]+1) */
1765   f             = info->fill;
1766   diagonal_fill = (PetscInt)info->diagonal_fill;
1767   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1768   current_space = free_space;
1769   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1770   current_space_lvl = free_space_lvl;
1771 
1772   for (i=0; i<n; i++) {
1773     nzi = 0;
1774     /* copy current row into linked list */
1775     nnz  = ai[r[i]+1] - ai[r[i]];
1776     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1777     cols = aj + ai[r[i]];
1778     lnk[i] = -1; /* marker to indicate if diagonal exists */
1779     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1780     nzi += nlnk;
1781 
1782     /* make sure diagonal entry is included */
1783     if (diagonal_fill && lnk[i] == -1) {
1784       fm = n;
1785       while (lnk[fm] < i) fm = lnk[fm];
1786       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1787       lnk[fm]    = i;
1788       lnk_lvl[i] = 0;
1789       nzi++; dcount++;
1790     }
1791 
1792     /* add pivot rows into the active row */
1793     nzbd = 0;
1794     prow = lnk[n];
1795     while (prow < i) {
1796       nnz      = bdiag[prow];
1797       cols     = bj_ptr[prow] + nnz + 1;
1798       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1799       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1800       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1801       nzi += nlnk;
1802       prow = lnk[prow];
1803       nzbd++;
1804     }
1805     bdiag[i] = nzbd;
1806     bi[i+1]  = bi[i] + nzi;
1807 
1808     /* if free space is not available, make more free space */
1809     if (current_space->local_remaining<nzi) {
1810       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1811       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1812       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1813       reallocs++;
1814     }
1815 
1816     /* copy data into free_space and free_space_lvl, then initialize lnk */
1817     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1818     bj_ptr[i]    = current_space->array;
1819     bjlvl_ptr[i] = current_space_lvl->array;
1820 
1821     /* make sure the active row i has diagonal entry */
1822     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1823 
1824     current_space->array           += nzi;
1825     current_space->local_used      += nzi;
1826     current_space->local_remaining -= nzi;
1827     current_space_lvl->array           += nzi;
1828     current_space_lvl->local_used      += nzi;
1829     current_space_lvl->local_remaining -= nzi;
1830   }
1831 
1832   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1833   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1834 
1835   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1836   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1837   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1838 
1839   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1840   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1841   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1842 
1843 #if defined(PETSC_USE_INFO)
1844   {
1845     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1846     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1847     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1848     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1849     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1850     if (diagonal_fill) {
1851       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1852     }
1853   }
1854 #endif
1855 
1856   /* put together the new matrix */
1857   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1858   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1859   b = (Mat_SeqAIJ*)(fact)->data;
1860   b->free_a       = PETSC_TRUE;
1861   b->free_ij      = PETSC_TRUE;
1862   b->singlemalloc = PETSC_FALSE;
1863   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1864   b->j          = bj;
1865   b->i          = bi;
1866   b->diag       = bdiag;
1867   b->ilen       = 0;
1868   b->imax       = 0;
1869   b->row        = isrow;
1870   b->col        = iscol;
1871   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1872   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1873   b->icol       = isicol;
1874   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1875   /* In b structure:  Free imax, ilen, old a, old j.
1876      Allocate bdiag, solve_work, new a, new j */
1877   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1878   b->maxnz = b->nz = bdiag[0]+1;
1879   (fact)->info.factor_mallocs    = reallocs;
1880   (fact)->info.fill_ratio_given  = f;
1881   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1882   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1883   if (a->inode.size) {
1884     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1891 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1892 {
1893   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1894   IS                 isicol;
1895   PetscErrorCode     ierr;
1896   const PetscInt     *r,*ic;
1897   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1898   PetscInt           *bi,*cols,nnz,*cols_lvl;
1899   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1900   PetscInt           i,levels,diagonal_fill;
1901   PetscBool          col_identity,row_identity;
1902   PetscReal          f;
1903   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1904   PetscBT            lnkbt;
1905   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1906   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1907   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1908   PetscBool          missing;
1909 
1910   PetscFunctionBegin;
1911   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1912   f             = info->fill;
1913   levels        = (PetscInt)info->levels;
1914   diagonal_fill = (PetscInt)info->diagonal_fill;
1915   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1916 
1917   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1918   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1919   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1920     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1921     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1922     if (a->inode.size) {
1923       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1924     }
1925     fact->factortype = MAT_FACTOR_ILU;
1926     (fact)->info.factor_mallocs    = 0;
1927     (fact)->info.fill_ratio_given  = info->fill;
1928     (fact)->info.fill_ratio_needed = 1.0;
1929     b               = (Mat_SeqAIJ*)(fact)->data;
1930     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1931     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1932     b->row              = isrow;
1933     b->col              = iscol;
1934     b->icol             = isicol;
1935     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1936     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1937     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1938     PetscFunctionReturn(0);
1939   }
1940 
1941   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1942   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1943 
1944   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1945   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1946   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1947   bi[0] = bdiag[0] = 0;
1948 
1949   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1950 
1951   /* create a linked list for storing column indices of the active row */
1952   nlnk = n + 1;
1953   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1954 
1955   /* initial FreeSpace size is f*(ai[n]+1) */
1956   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1957   current_space = free_space;
1958   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1959   current_space_lvl = free_space_lvl;
1960 
1961   for (i=0; i<n; i++) {
1962     nzi = 0;
1963     /* copy current row into linked list */
1964     nnz  = ai[r[i]+1] - ai[r[i]];
1965     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1966     cols = aj + ai[r[i]];
1967     lnk[i] = -1; /* marker to indicate if diagonal exists */
1968     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1969     nzi += nlnk;
1970 
1971     /* make sure diagonal entry is included */
1972     if (diagonal_fill && lnk[i] == -1) {
1973       fm = n;
1974       while (lnk[fm] < i) fm = lnk[fm];
1975       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1976       lnk[fm]    = i;
1977       lnk_lvl[i] = 0;
1978       nzi++; dcount++;
1979     }
1980 
1981     /* add pivot rows into the active row */
1982     nzbd = 0;
1983     prow = lnk[n];
1984     while (prow < i) {
1985       nnz      = bdiag[prow];
1986       cols     = bj_ptr[prow] + nnz + 1;
1987       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1988       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1989       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1990       nzi += nlnk;
1991       prow = lnk[prow];
1992       nzbd++;
1993     }
1994     bdiag[i] = nzbd;
1995     bi[i+1]  = bi[i] + nzi;
1996 
1997     /* if free space is not available, make more free space */
1998     if (current_space->local_remaining<nzi) {
1999       nnz = nzi*(n - i); /* estimated and max additional space needed */
2000       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2001       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2002       reallocs++;
2003     }
2004 
2005     /* copy data into free_space and free_space_lvl, then initialize lnk */
2006     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2007     bj_ptr[i]    = current_space->array;
2008     bjlvl_ptr[i] = current_space_lvl->array;
2009 
2010     /* make sure the active row i has diagonal entry */
2011     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2012 
2013     current_space->array           += nzi;
2014     current_space->local_used      += nzi;
2015     current_space->local_remaining -= nzi;
2016     current_space_lvl->array           += nzi;
2017     current_space_lvl->local_used      += nzi;
2018     current_space_lvl->local_remaining -= nzi;
2019   }
2020 
2021   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2022   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2023 
2024   /* destroy list of free space and other temporary arrays */
2025   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2026   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2027   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2028   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2029   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2030 
2031 #if defined(PETSC_USE_INFO)
2032   {
2033     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2034     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2035     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2036     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2037     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2038     if (diagonal_fill) {
2039       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2040     }
2041   }
2042 #endif
2043 
2044   /* put together the new matrix */
2045   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2046   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2047   b = (Mat_SeqAIJ*)(fact)->data;
2048   b->free_a       = PETSC_TRUE;
2049   b->free_ij      = PETSC_TRUE;
2050   b->singlemalloc = PETSC_FALSE;
2051   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2052   b->j          = bj;
2053   b->i          = bi;
2054   for (i=0; i<n; i++) bdiag[i] += bi[i];
2055   b->diag       = bdiag;
2056   b->ilen       = 0;
2057   b->imax       = 0;
2058   b->row        = isrow;
2059   b->col        = iscol;
2060   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2061   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2062   b->icol       = isicol;
2063   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2064   /* In b structure:  Free imax, ilen, old a, old j.
2065      Allocate bdiag, solve_work, new a, new j */
2066   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2067   b->maxnz             = b->nz = bi[n] ;
2068   (fact)->info.factor_mallocs    = reallocs;
2069   (fact)->info.fill_ratio_given  = f;
2070   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2071   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2072   if (a->inode.size) {
2073     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2080 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2081 {
2082   Mat            C = B;
2083   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2084   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2085   IS             ip=b->row,iip = b->icol;
2086   PetscErrorCode ierr;
2087   const PetscInt *rip,*riip;
2088   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2089   PetscInt       *ai=a->i,*aj=a->j;
2090   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2091   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2092   PetscBool      perm_identity;
2093   FactorShiftCtx sctx;
2094   PetscReal      rs;
2095   MatScalar      d,*v;
2096 
2097   PetscFunctionBegin;
2098   /* MatPivotSetUp(): initialize shift context sctx */
2099   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2100 
2101   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2102     sctx.shift_top = info->zeropivot;
2103     for (i=0; i<mbs; i++) {
2104       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2105       d  = (aa)[a->diag[i]];
2106       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2107       v  = aa+ai[i];
2108       nz = ai[i+1] - ai[i];
2109       for (j=0; j<nz; j++)
2110 	rs += PetscAbsScalar(v[j]);
2111       if (rs>sctx.shift_top) sctx.shift_top = rs;
2112     }
2113     sctx.shift_top   *= 1.1;
2114     sctx.nshift_max   = 5;
2115     sctx.shift_lo     = 0.;
2116     sctx.shift_hi     = 1.;
2117   }
2118 
2119   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2120   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2121 
2122   /* allocate working arrays
2123      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2124      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2125   */
2126   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2127 
2128   do {
2129     sctx.newshift = PETSC_FALSE;
2130 
2131     for (i=0; i<mbs; i++) c2r[i] = mbs;
2132     il[0] = 0;
2133 
2134     for (k = 0; k<mbs; k++){
2135       /* zero rtmp */
2136       nz = bi[k+1] - bi[k];
2137       bjtmp = bj + bi[k];
2138       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2139 
2140       /* load in initial unfactored row */
2141       bval = ba + bi[k];
2142       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2143       for (j = jmin; j < jmax; j++){
2144         col = riip[aj[j]];
2145         if (col >= k){ /* only take upper triangular entry */
2146           rtmp[col] = aa[j];
2147           *bval++   = 0.0; /* for in-place factorization */
2148         }
2149       }
2150       /* shift the diagonal of the matrix: ZeropivotApply() */
2151       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2152 
2153       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2154       dk = rtmp[k];
2155       i  = c2r[k]; /* first row to be added to k_th row  */
2156 
2157       while (i < k){
2158         nexti = c2r[i]; /* next row to be added to k_th row */
2159 
2160         /* compute multiplier, update diag(k) and U(i,k) */
2161         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2162         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2163         dk   += uikdi*ba[ili]; /* update diag[k] */
2164         ba[ili] = uikdi; /* -U(i,k) */
2165 
2166         /* add multiple of row i to k-th row */
2167         jmin = ili + 1; jmax = bi[i+1];
2168         if (jmin < jmax){
2169           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2170           /* update il and c2r for row i */
2171           il[i] = jmin;
2172           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2173         }
2174         i = nexti;
2175       }
2176 
2177       /* copy data into U(k,:) */
2178       rs   = 0.0;
2179       jmin = bi[k]; jmax = bi[k+1]-1;
2180       if (jmin < jmax) {
2181         for (j=jmin; j<jmax; j++){
2182           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2183         }
2184         /* add the k-th row into il and c2r */
2185         il[k] = jmin;
2186         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2187       }
2188 
2189       /* MatPivotCheck() */
2190       sctx.rs  = rs;
2191       sctx.pv  = dk;
2192       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2193       if(sctx.newshift) break;
2194       dk = sctx.pv;
2195 
2196       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2197     }
2198   } while (sctx.newshift);
2199 
2200   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2201   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2202   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2203 
2204   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2205   if (perm_identity){
2206     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2207     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2208     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2209     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2210   } else {
2211     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2212     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2213     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2214     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2215   }
2216 
2217   C->assembled    = PETSC_TRUE;
2218   C->preallocated = PETSC_TRUE;
2219   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2220 
2221   /* MatPivotView() */
2222   if (sctx.nshift){
2223     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2224       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2225     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2226       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2227     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2228       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2229     }
2230   }
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2236 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2237 {
2238   Mat            C = B;
2239   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2240   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2241   IS             ip=b->row,iip = b->icol;
2242   PetscErrorCode ierr;
2243   const PetscInt *rip,*riip;
2244   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2245   PetscInt       *ai=a->i,*aj=a->j;
2246   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2247   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2248   PetscBool      perm_identity;
2249   FactorShiftCtx sctx;
2250   PetscReal      rs;
2251   MatScalar      d,*v;
2252 
2253   PetscFunctionBegin;
2254   /* MatPivotSetUp(): initialize shift context sctx */
2255   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2256 
2257   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2258     sctx.shift_top = info->zeropivot;
2259     for (i=0; i<mbs; i++) {
2260       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2261       d  = (aa)[a->diag[i]];
2262       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2263       v  = aa+ai[i];
2264       nz = ai[i+1] - ai[i];
2265       for (j=0; j<nz; j++)
2266 	rs += PetscAbsScalar(v[j]);
2267       if (rs>sctx.shift_top) sctx.shift_top = rs;
2268     }
2269     sctx.shift_top   *= 1.1;
2270     sctx.nshift_max   = 5;
2271     sctx.shift_lo     = 0.;
2272     sctx.shift_hi     = 1.;
2273   }
2274 
2275   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2276   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2277 
2278   /* initialization */
2279   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2280 
2281   do {
2282     sctx.newshift = PETSC_FALSE;
2283 
2284     for (i=0; i<mbs; i++) jl[i] = mbs;
2285     il[0] = 0;
2286 
2287     for (k = 0; k<mbs; k++){
2288       /* zero rtmp */
2289       nz = bi[k+1] - bi[k];
2290       bjtmp = bj + bi[k];
2291       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2292 
2293       bval = ba + bi[k];
2294       /* initialize k-th row by the perm[k]-th row of A */
2295       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2296       for (j = jmin; j < jmax; j++){
2297         col = riip[aj[j]];
2298         if (col >= k){ /* only take upper triangular entry */
2299           rtmp[col] = aa[j];
2300           *bval++  = 0.0; /* for in-place factorization */
2301         }
2302       }
2303       /* shift the diagonal of the matrix */
2304       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2305 
2306       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2307       dk = rtmp[k];
2308       i = jl[k]; /* first row to be added to k_th row  */
2309 
2310       while (i < k){
2311         nexti = jl[i]; /* next row to be added to k_th row */
2312 
2313         /* compute multiplier, update diag(k) and U(i,k) */
2314         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2315         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2316         dk += uikdi*ba[ili];
2317         ba[ili] = uikdi; /* -U(i,k) */
2318 
2319         /* add multiple of row i to k-th row */
2320         jmin = ili + 1; jmax = bi[i+1];
2321         if (jmin < jmax){
2322           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2323           /* update il and jl for row i */
2324           il[i] = jmin;
2325           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2326         }
2327         i = nexti;
2328       }
2329 
2330       /* shift the diagonals when zero pivot is detected */
2331       /* compute rs=sum of abs(off-diagonal) */
2332       rs   = 0.0;
2333       jmin = bi[k]+1;
2334       nz   = bi[k+1] - jmin;
2335       bcol = bj + jmin;
2336       for (j=0; j<nz; j++) {
2337         rs += PetscAbsScalar(rtmp[bcol[j]]);
2338       }
2339 
2340       sctx.rs = rs;
2341       sctx.pv = dk;
2342       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2343       if (sctx.newshift) break;
2344       dk = sctx.pv;
2345 
2346       /* copy data into U(k,:) */
2347       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2348       jmin = bi[k]+1; jmax = bi[k+1];
2349       if (jmin < jmax) {
2350         for (j=jmin; j<jmax; j++){
2351           col = bj[j]; ba[j] = rtmp[col];
2352         }
2353         /* add the k-th row into il and jl */
2354         il[k] = jmin;
2355         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2356       }
2357     }
2358   } while (sctx.newshift);
2359 
2360   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2361   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2362   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2363 
2364   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2365   if (perm_identity){
2366     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370   } else {
2371     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2372     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2373     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2374     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2375   }
2376 
2377   C->assembled    = PETSC_TRUE;
2378   C->preallocated = PETSC_TRUE;
2379   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2380   if (sctx.nshift){
2381     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2382       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2383     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2384       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2385     }
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 /*
2391    icc() under revised new data structure.
2392    Factored arrays bj and ba are stored as
2393      U(0,:),...,U(i,:),U(n-1,:)
2394 
2395    ui=fact->i is an array of size n+1, in which
2396    ui+
2397      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2398      ui[n]:  points to U(n-1,n-1)+1
2399 
2400   udiag=fact->diag is an array of size n,in which
2401      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2402 
2403    U(i,:) contains udiag[i] as its last entry, i.e.,
2404     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2405 */
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2409 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2410 {
2411   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2412   Mat_SeqSBAIJ       *b;
2413   PetscErrorCode     ierr;
2414   PetscBool          perm_identity,missing;
2415   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2416   const PetscInt     *rip,*riip;
2417   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2418   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2419   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2420   PetscReal          fill=info->fill,levels=info->levels;
2421   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2422   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2423   PetscBT            lnkbt;
2424   IS                 iperm;
2425 
2426   PetscFunctionBegin;
2427   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2428   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2429   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2430   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2431   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2432 
2433   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2434   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2435   ui[0] = 0;
2436 
2437   /* ICC(0) without matrix ordering: simply rearrange column indices */
2438   if (!levels && perm_identity) {
2439     for (i=0; i<am; i++) {
2440       ncols    = ai[i+1] - a->diag[i];
2441       ui[i+1]  = ui[i] + ncols;
2442       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2443     }
2444     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2445     cols = uj;
2446     for (i=0; i<am; i++) {
2447       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2448       ncols = ai[i+1] - a->diag[i] -1;
2449       for (j=0; j<ncols; j++) *cols++ = aj[j];
2450       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2451     }
2452   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2453     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2454     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2455 
2456     /* initialization */
2457     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2458 
2459     /* jl: linked list for storing indices of the pivot rows
2460        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2461     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2462     for (i=0; i<am; i++){
2463       jl[i] = am; il[i] = 0;
2464     }
2465 
2466     /* create and initialize a linked list for storing column indices of the active row k */
2467     nlnk = am + 1;
2468     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2469 
2470     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2471     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2472     current_space = free_space;
2473     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2474     current_space_lvl = free_space_lvl;
2475 
2476     for (k=0; k<am; k++){  /* for each active row k */
2477       /* initialize lnk by the column indices of row rip[k] of A */
2478       nzk   = 0;
2479       ncols = ai[rip[k]+1] - ai[rip[k]];
2480       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2481       ncols_upper = 0;
2482       for (j=0; j<ncols; j++){
2483         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2484         if (riip[i] >= k){ /* only take upper triangular entry */
2485           ajtmp[ncols_upper] = i;
2486           ncols_upper++;
2487         }
2488       }
2489       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2490       nzk += nlnk;
2491 
2492       /* update lnk by computing fill-in for each pivot row to be merged in */
2493       prow = jl[k]; /* 1st pivot row */
2494 
2495       while (prow < k){
2496         nextprow = jl[prow];
2497 
2498         /* merge prow into k-th row */
2499         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2500         jmax = ui[prow+1];
2501         ncols = jmax-jmin;
2502         i     = jmin - ui[prow];
2503         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2504         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2505         j     = *(uj - 1);
2506         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2507         nzk += nlnk;
2508 
2509         /* update il and jl for prow */
2510         if (jmin < jmax){
2511           il[prow] = jmin;
2512           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2513         }
2514         prow = nextprow;
2515       }
2516 
2517       /* if free space is not available, make more free space */
2518       if (current_space->local_remaining<nzk) {
2519         i  = am - k + 1; /* num of unfactored rows */
2520         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2521         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2522         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2523         reallocs++;
2524       }
2525 
2526       /* copy data into free_space and free_space_lvl, then initialize lnk */
2527       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2528       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2529 
2530       /* add the k-th row into il and jl */
2531       if (nzk > 1){
2532         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2533         jl[k] = jl[i]; jl[i] = k;
2534         il[k] = ui[k] + 1;
2535       }
2536       uj_ptr[k]     = current_space->array;
2537       uj_lvl_ptr[k] = current_space_lvl->array;
2538 
2539       current_space->array           += nzk;
2540       current_space->local_used      += nzk;
2541       current_space->local_remaining -= nzk;
2542 
2543       current_space_lvl->array           += nzk;
2544       current_space_lvl->local_used      += nzk;
2545       current_space_lvl->local_remaining -= nzk;
2546 
2547       ui[k+1] = ui[k] + nzk;
2548     }
2549 
2550     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2551     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2552     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2553     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2554 
2555     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2556     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2557     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2558     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2559     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2560 
2561   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2562 
2563   /* put together the new matrix in MATSEQSBAIJ format */
2564   b    = (Mat_SeqSBAIJ*)(fact)->data;
2565   b->singlemalloc = PETSC_FALSE;
2566   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2567   b->j    = uj;
2568   b->i    = ui;
2569   b->diag = udiag;
2570   b->free_diag = PETSC_TRUE;
2571   b->ilen = 0;
2572   b->imax = 0;
2573   b->row  = perm;
2574   b->col  = perm;
2575   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2576   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2577   b->icol = iperm;
2578   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2579   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2580   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2581   b->maxnz   = b->nz = ui[am];
2582   b->free_a  = PETSC_TRUE;
2583   b->free_ij = PETSC_TRUE;
2584 
2585   fact->info.factor_mallocs   = reallocs;
2586   fact->info.fill_ratio_given = fill;
2587   if (ai[am] != 0) {
2588     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2589     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2590   } else {
2591     fact->info.fill_ratio_needed = 0.0;
2592   }
2593 #if defined(PETSC_USE_INFO)
2594     if (ai[am] != 0) {
2595       PetscReal af = fact->info.fill_ratio_needed;
2596       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2597       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2598       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2599     } else {
2600       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2601     }
2602 #endif
2603   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2609 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2610 {
2611   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2612   Mat_SeqSBAIJ       *b;
2613   PetscErrorCode     ierr;
2614   PetscBool          perm_identity,missing;
2615   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2616   const PetscInt     *rip,*riip;
2617   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2618   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2619   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2620   PetscReal          fill=info->fill,levels=info->levels;
2621   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2622   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2623   PetscBT            lnkbt;
2624   IS                 iperm;
2625 
2626   PetscFunctionBegin;
2627   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2628   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2629   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2630   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2631   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2632 
2633   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2634   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2635   ui[0] = 0;
2636 
2637   /* ICC(0) without matrix ordering: simply copies fill pattern */
2638   if (!levels && perm_identity) {
2639 
2640     for (i=0; i<am; i++) {
2641       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2642       udiag[i] = ui[i];
2643     }
2644     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2645     cols = uj;
2646     for (i=0; i<am; i++) {
2647       aj    = a->j + a->diag[i];
2648       ncols = ui[i+1] - ui[i];
2649       for (j=0; j<ncols; j++) *cols++ = *aj++;
2650     }
2651   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2652     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2653     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2654 
2655     /* initialization */
2656     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2657 
2658     /* jl: linked list for storing indices of the pivot rows
2659        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2660     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2661     for (i=0; i<am; i++){
2662       jl[i] = am; il[i] = 0;
2663     }
2664 
2665     /* create and initialize a linked list for storing column indices of the active row k */
2666     nlnk = am + 1;
2667     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2668 
2669     /* initial FreeSpace size is fill*(ai[am]+1) */
2670     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2671     current_space = free_space;
2672     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2673     current_space_lvl = free_space_lvl;
2674 
2675     for (k=0; k<am; k++){  /* for each active row k */
2676       /* initialize lnk by the column indices of row rip[k] of A */
2677       nzk   = 0;
2678       ncols = ai[rip[k]+1] - ai[rip[k]];
2679       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2680       ncols_upper = 0;
2681       for (j=0; j<ncols; j++){
2682         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2683         if (riip[i] >= k){ /* only take upper triangular entry */
2684           ajtmp[ncols_upper] = i;
2685           ncols_upper++;
2686         }
2687       }
2688       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2689       nzk += nlnk;
2690 
2691       /* update lnk by computing fill-in for each pivot row to be merged in */
2692       prow = jl[k]; /* 1st pivot row */
2693 
2694       while (prow < k){
2695         nextprow = jl[prow];
2696 
2697         /* merge prow into k-th row */
2698         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2699         jmax = ui[prow+1];
2700         ncols = jmax-jmin;
2701         i     = jmin - ui[prow];
2702         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2703         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2704         j     = *(uj - 1);
2705         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2706         nzk += nlnk;
2707 
2708         /* update il and jl for prow */
2709         if (jmin < jmax){
2710           il[prow] = jmin;
2711           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2712         }
2713         prow = nextprow;
2714       }
2715 
2716       /* if free space is not available, make more free space */
2717       if (current_space->local_remaining<nzk) {
2718         i = am - k + 1; /* num of unfactored rows */
2719         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2720         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2721         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2722         reallocs++;
2723       }
2724 
2725       /* copy data into free_space and free_space_lvl, then initialize lnk */
2726       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2727       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2728 
2729       /* add the k-th row into il and jl */
2730       if (nzk > 1){
2731         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2732         jl[k] = jl[i]; jl[i] = k;
2733         il[k] = ui[k] + 1;
2734       }
2735       uj_ptr[k]     = current_space->array;
2736       uj_lvl_ptr[k] = current_space_lvl->array;
2737 
2738       current_space->array           += nzk;
2739       current_space->local_used      += nzk;
2740       current_space->local_remaining -= nzk;
2741 
2742       current_space_lvl->array           += nzk;
2743       current_space_lvl->local_used      += nzk;
2744       current_space_lvl->local_remaining -= nzk;
2745 
2746       ui[k+1] = ui[k] + nzk;
2747     }
2748 
2749 #if defined(PETSC_USE_INFO)
2750     if (ai[am] != 0) {
2751       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2752       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2753       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2754       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2755     } else {
2756       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2757     }
2758 #endif
2759 
2760     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2761     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2762     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2763     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2764 
2765     /* destroy list of free space and other temporary array(s) */
2766     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2767     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2768     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2769     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2770 
2771   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2772 
2773   /* put together the new matrix in MATSEQSBAIJ format */
2774 
2775   b    = (Mat_SeqSBAIJ*)fact->data;
2776   b->singlemalloc = PETSC_FALSE;
2777   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2778   b->j    = uj;
2779   b->i    = ui;
2780   b->diag = udiag;
2781   b->free_diag = PETSC_TRUE;
2782   b->ilen = 0;
2783   b->imax = 0;
2784   b->row  = perm;
2785   b->col  = perm;
2786   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2787   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2788   b->icol = iperm;
2789   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2790   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2791   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2792   b->maxnz   = b->nz = ui[am];
2793   b->free_a  = PETSC_TRUE;
2794   b->free_ij = PETSC_TRUE;
2795 
2796   fact->info.factor_mallocs    = reallocs;
2797   fact->info.fill_ratio_given  = fill;
2798   if (ai[am] != 0) {
2799     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2800   } else {
2801     fact->info.fill_ratio_needed = 0.0;
2802   }
2803   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2809 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2810 {
2811   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2812   Mat_SeqSBAIJ       *b;
2813   PetscErrorCode     ierr;
2814   PetscBool          perm_identity;
2815   PetscReal          fill = info->fill;
2816   const PetscInt     *rip,*riip;
2817   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2818   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2819   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2820   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2821   PetscBT            lnkbt;
2822   IS                 iperm;
2823 
2824   PetscFunctionBegin;
2825   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2826   /* check whether perm is the identity mapping */
2827   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2828   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2829   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2830   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2831 
2832   /* initialization */
2833   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2834   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2835   ui[0] = 0;
2836 
2837   /* jl: linked list for storing indices of the pivot rows
2838      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2839   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2840   for (i=0; i<am; i++){
2841     jl[i] = am; il[i] = 0;
2842   }
2843 
2844   /* create and initialize a linked list for storing column indices of the active row k */
2845   nlnk = am + 1;
2846   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2847 
2848   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2849   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2850   current_space = free_space;
2851 
2852   for (k=0; k<am; k++){  /* for each active row k */
2853     /* initialize lnk by the column indices of row rip[k] of A */
2854     nzk   = 0;
2855     ncols = ai[rip[k]+1] - ai[rip[k]];
2856     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2857     ncols_upper = 0;
2858     for (j=0; j<ncols; j++){
2859       i = riip[*(aj + ai[rip[k]] + j)];
2860       if (i >= k){ /* only take upper triangular entry */
2861         cols[ncols_upper] = i;
2862         ncols_upper++;
2863       }
2864     }
2865     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2866     nzk += nlnk;
2867 
2868     /* update lnk by computing fill-in for each pivot row to be merged in */
2869     prow = jl[k]; /* 1st pivot row */
2870 
2871     while (prow < k){
2872       nextprow = jl[prow];
2873       /* merge prow into k-th row */
2874       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2875       jmax = ui[prow+1];
2876       ncols = jmax-jmin;
2877       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2878       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2879       nzk += nlnk;
2880 
2881       /* update il and jl for prow */
2882       if (jmin < jmax){
2883         il[prow] = jmin;
2884         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2885       }
2886       prow = nextprow;
2887     }
2888 
2889     /* if free space is not available, make more free space */
2890     if (current_space->local_remaining<nzk) {
2891       i  = am - k + 1; /* num of unfactored rows */
2892       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2893       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2894       reallocs++;
2895     }
2896 
2897     /* copy data into free space, then initialize lnk */
2898     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2899 
2900     /* add the k-th row into il and jl */
2901     if (nzk > 1){
2902       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2903       jl[k] = jl[i]; jl[i] = k;
2904       il[k] = ui[k] + 1;
2905     }
2906     ui_ptr[k] = current_space->array;
2907     current_space->array           += nzk;
2908     current_space->local_used      += nzk;
2909     current_space->local_remaining -= nzk;
2910 
2911     ui[k+1] = ui[k] + nzk;
2912   }
2913 
2914   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2915   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2916   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2917 
2918   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2919   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2920   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2921   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2922 
2923   /* put together the new matrix in MATSEQSBAIJ format */
2924 
2925   b = (Mat_SeqSBAIJ*)fact->data;
2926   b->singlemalloc = PETSC_FALSE;
2927   b->free_a       = PETSC_TRUE;
2928   b->free_ij      = PETSC_TRUE;
2929   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2930   b->j    = uj;
2931   b->i    = ui;
2932   b->diag = udiag;
2933   b->free_diag = PETSC_TRUE;
2934   b->ilen = 0;
2935   b->imax = 0;
2936   b->row  = perm;
2937   b->col  = perm;
2938   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2939   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2940   b->icol = iperm;
2941   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2942   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2943   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2944   b->maxnz = b->nz = ui[am];
2945 
2946   fact->info.factor_mallocs    = reallocs;
2947   fact->info.fill_ratio_given  = fill;
2948   if (ai[am] != 0) {
2949     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2950     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2951   } else {
2952     fact->info.fill_ratio_needed = 0.0;
2953   }
2954 #if defined(PETSC_USE_INFO)
2955   if (ai[am] != 0) {
2956     PetscReal af = fact->info.fill_ratio_needed;
2957     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2958     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2959     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2960   } else {
2961      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2962   }
2963 #endif
2964   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2970 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2971 {
2972   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2973   Mat_SeqSBAIJ       *b;
2974   PetscErrorCode     ierr;
2975   PetscBool          perm_identity;
2976   PetscReal          fill = info->fill;
2977   const PetscInt     *rip,*riip;
2978   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2979   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2980   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2981   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2982   PetscBT            lnkbt;
2983   IS                 iperm;
2984 
2985   PetscFunctionBegin;
2986   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2987   /* check whether perm is the identity mapping */
2988   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2989   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2990   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2991   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2992 
2993   /* initialization */
2994   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2995   ui[0] = 0;
2996 
2997   /* jl: linked list for storing indices of the pivot rows
2998      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2999   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
3000   for (i=0; i<am; i++){
3001     jl[i] = am; il[i] = 0;
3002   }
3003 
3004   /* create and initialize a linked list for storing column indices of the active row k */
3005   nlnk = am + 1;
3006   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3007 
3008   /* initial FreeSpace size is fill*(ai[am]+1) */
3009   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3010   current_space = free_space;
3011 
3012   for (k=0; k<am; k++){  /* for each active row k */
3013     /* initialize lnk by the column indices of row rip[k] of A */
3014     nzk   = 0;
3015     ncols = ai[rip[k]+1] - ai[rip[k]];
3016     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3017     ncols_upper = 0;
3018     for (j=0; j<ncols; j++){
3019       i = riip[*(aj + ai[rip[k]] + j)];
3020       if (i >= k){ /* only take upper triangular entry */
3021         cols[ncols_upper] = i;
3022         ncols_upper++;
3023       }
3024     }
3025     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3026     nzk += nlnk;
3027 
3028     /* update lnk by computing fill-in for each pivot row to be merged in */
3029     prow = jl[k]; /* 1st pivot row */
3030 
3031     while (prow < k){
3032       nextprow = jl[prow];
3033       /* merge prow into k-th row */
3034       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3035       jmax = ui[prow+1];
3036       ncols = jmax-jmin;
3037       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3038       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3039       nzk += nlnk;
3040 
3041       /* update il and jl for prow */
3042       if (jmin < jmax){
3043         il[prow] = jmin;
3044         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3045       }
3046       prow = nextprow;
3047     }
3048 
3049     /* if free space is not available, make more free space */
3050     if (current_space->local_remaining<nzk) {
3051       i = am - k + 1; /* num of unfactored rows */
3052       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3053       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3054       reallocs++;
3055     }
3056 
3057     /* copy data into free space, then initialize lnk */
3058     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3059 
3060     /* add the k-th row into il and jl */
3061     if (nzk-1 > 0){
3062       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3063       jl[k] = jl[i]; jl[i] = k;
3064       il[k] = ui[k] + 1;
3065     }
3066     ui_ptr[k] = current_space->array;
3067     current_space->array           += nzk;
3068     current_space->local_used      += nzk;
3069     current_space->local_remaining -= nzk;
3070 
3071     ui[k+1] = ui[k] + nzk;
3072   }
3073 
3074 #if defined(PETSC_USE_INFO)
3075   if (ai[am] != 0) {
3076     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3077     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3078     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3079     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3080   } else {
3081      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3082   }
3083 #endif
3084 
3085   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3086   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3087   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3088 
3089   /* destroy list of free space and other temporary array(s) */
3090   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3091   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3092   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3093 
3094   /* put together the new matrix in MATSEQSBAIJ format */
3095 
3096   b = (Mat_SeqSBAIJ*)fact->data;
3097   b->singlemalloc = PETSC_FALSE;
3098   b->free_a       = PETSC_TRUE;
3099   b->free_ij      = PETSC_TRUE;
3100   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3101   b->j    = uj;
3102   b->i    = ui;
3103   b->diag = 0;
3104   b->ilen = 0;
3105   b->imax = 0;
3106   b->row  = perm;
3107   b->col  = perm;
3108   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3109   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3110   b->icol = iperm;
3111   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3112   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3113   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3114   b->maxnz = b->nz = ui[am];
3115 
3116   fact->info.factor_mallocs    = reallocs;
3117   fact->info.fill_ratio_given  = fill;
3118   if (ai[am] != 0) {
3119     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3120   } else {
3121     fact->info.fill_ratio_needed = 0.0;
3122   }
3123   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3129 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3130 {
3131   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3132   PetscErrorCode    ierr;
3133   PetscInt          n = A->rmap->n;
3134   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3135   PetscScalar       *x,sum;
3136   const PetscScalar *b;
3137   const MatScalar   *aa = a->a,*v;
3138   PetscInt          i,nz;
3139 
3140   PetscFunctionBegin;
3141   if (!n) PetscFunctionReturn(0);
3142 
3143   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3144   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3145 
3146   /* forward solve the lower triangular */
3147   x[0] = b[0];
3148   v    = aa;
3149   vi   = aj;
3150   for (i=1; i<n; i++) {
3151     nz  = ai[i+1] - ai[i];
3152     sum = b[i];
3153     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3154     v  += nz;
3155     vi += nz;
3156     x[i] = sum;
3157   }
3158 
3159   /* backward solve the upper triangular */
3160   for (i=n-1; i>=0; i--){
3161     v   = aa + adiag[i+1] + 1;
3162     vi  = aj + adiag[i+1] + 1;
3163     nz = adiag[i] - adiag[i+1]-1;
3164     sum = x[i];
3165     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3166     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3167   }
3168 
3169   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3170   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3171   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatSolve_SeqAIJ"
3177 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3178 {
3179   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3180   IS                iscol = a->col,isrow = a->row;
3181   PetscErrorCode    ierr;
3182   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3183   const PetscInt    *rout,*cout,*r,*c;
3184   PetscScalar       *x,*tmp,sum;
3185   const PetscScalar *b;
3186   const MatScalar   *aa = a->a,*v;
3187 
3188   PetscFunctionBegin;
3189   if (!n) PetscFunctionReturn(0);
3190 
3191   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3193   tmp  = a->solve_work;
3194 
3195   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3196   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3197 
3198   /* forward solve the lower triangular */
3199   tmp[0] = b[r[0]];
3200   v      = aa;
3201   vi     = aj;
3202   for (i=1; i<n; i++) {
3203     nz  = ai[i+1] - ai[i];
3204     sum = b[r[i]];
3205     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3206     tmp[i] = sum;
3207     v += nz; vi += nz;
3208   }
3209 
3210   /* backward solve the upper triangular */
3211   for (i=n-1; i>=0; i--){
3212     v   = aa + adiag[i+1]+1;
3213     vi  = aj + adiag[i+1]+1;
3214     nz  = adiag[i]-adiag[i+1]-1;
3215     sum = tmp[i];
3216     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3217     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3218   }
3219 
3220   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3221   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3222   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3223   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3224   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3230 /*
3231     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3232 */
3233 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3234 {
3235   Mat                B = *fact;
3236   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3237   IS                 isicol;
3238   PetscErrorCode     ierr;
3239   const PetscInt     *r,*ic;
3240   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3241   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3242   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3243   PetscInt           nlnk,*lnk;
3244   PetscBT            lnkbt;
3245   PetscBool          row_identity,icol_identity;
3246   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3247   const PetscInt     *ics;
3248   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3249   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3250   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3251   PetscBool          missing;
3252 
3253   PetscFunctionBegin;
3254 
3255   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3256   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3257   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3258 
3259   /* ------- symbolic factorization, can be reused ---------*/
3260   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3261   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3262   adiag=a->diag;
3263 
3264   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3265 
3266   /* bdiag is location of diagonal in factor */
3267   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3268   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3269 
3270   /* allocate row pointers bi */
3271   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3272 
3273   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3274   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3275   nnz_max  = ai[n]+2*n*dtcount+2;
3276 
3277   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3278   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3279 
3280   /* put together the new matrix */
3281   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3282   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3283   b    = (Mat_SeqAIJ*)B->data;
3284   b->free_a       = PETSC_TRUE;
3285   b->free_ij      = PETSC_TRUE;
3286   b->singlemalloc = PETSC_FALSE;
3287   b->a          = ba;
3288   b->j          = bj;
3289   b->i          = bi;
3290   b->diag       = bdiag;
3291   b->ilen       = 0;
3292   b->imax       = 0;
3293   b->row        = isrow;
3294   b->col        = iscol;
3295   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3296   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3297   b->icol       = isicol;
3298   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3299 
3300   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3301   b->maxnz = nnz_max;
3302 
3303   B->factortype            = MAT_FACTOR_ILUDT;
3304   B->info.factor_mallocs   = 0;
3305   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3306   CHKMEMQ;
3307   /* ------- end of symbolic factorization ---------*/
3308 
3309   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3310   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3311   ics  = ic;
3312 
3313   /* linked list for storing column indices of the active row */
3314   nlnk = n + 1;
3315   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3316 
3317   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3318   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3319   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3320   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3321   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3322 
3323   bi[0]    = 0;
3324   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3325   bdiag_rev[n] = bdiag[0];
3326   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3327   for (i=0; i<n; i++) {
3328     /* copy initial fill into linked list */
3329     nzi = 0; /* nonzeros for active row i */
3330     nzi = ai[r[i]+1] - ai[r[i]];
3331     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3332     nzi_al = adiag[r[i]] - ai[r[i]];
3333     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3334     ajtmp = aj + ai[r[i]];
3335     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3336 
3337     /* load in initial (unfactored row) */
3338     aatmp = a->a + ai[r[i]];
3339     for (j=0; j<nzi; j++) {
3340       rtmp[ics[*ajtmp++]] = *aatmp++;
3341     }
3342 
3343     /* add pivot rows into linked list */
3344     row = lnk[n];
3345     while (row < i ) {
3346       nzi_bl = bi[row+1] - bi[row] + 1;
3347       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3348       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3349       nzi  += nlnk;
3350       row   = lnk[row];
3351     }
3352 
3353     /* copy data from lnk into jtmp, then initialize lnk */
3354     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3355 
3356     /* numerical factorization */
3357     bjtmp = jtmp;
3358     row   = *bjtmp++; /* 1st pivot row */
3359     while  ( row < i ) {
3360       pc         = rtmp + row;
3361       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3362       multiplier = (*pc) * (*pv);
3363       *pc        = multiplier;
3364       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3365         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3366         pv         = ba + bdiag[row+1] + 1;
3367         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3368         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3369         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3370         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
3371       }
3372       row = *bjtmp++;
3373     }
3374 
3375     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3376     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3377     nzi_bl = 0; j = 0;
3378     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3379       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3380       nzi_bl++; j++;
3381     }
3382     nzi_bu = nzi - nzi_bl -1;
3383     while (j < nzi){
3384       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3385       j++;
3386     }
3387 
3388     bjtmp = bj + bi[i];
3389     batmp = ba + bi[i];
3390     /* apply level dropping rule to L part */
3391     ncut = nzi_al + dtcount;
3392     if (ncut < nzi_bl){
3393       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3394       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3395     } else {
3396       ncut = nzi_bl;
3397     }
3398     for (j=0; j<ncut; j++){
3399       bjtmp[j] = jtmp[j];
3400       batmp[j] = vtmp[j];
3401       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3402     }
3403     bi[i+1] = bi[i] + ncut;
3404     nzi = ncut + 1;
3405 
3406     /* apply level dropping rule to U part */
3407     ncut = nzi_au + dtcount;
3408     if (ncut < nzi_bu){
3409       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3410       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3411     } else {
3412       ncut = nzi_bu;
3413     }
3414     nzi += ncut;
3415 
3416     /* mark bdiagonal */
3417     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3418     bdiag_rev[n-i-1] = bdiag[i+1];
3419     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3420     bjtmp = bj + bdiag[i];
3421     batmp = ba + bdiag[i];
3422     *bjtmp = i;
3423     *batmp = diag_tmp; /* rtmp[i]; */
3424     if (*batmp == 0.0) {
3425       *batmp = dt+shift;
3426       /* printf(" row %d add shift %g\n",i,shift); */
3427     }
3428     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3429     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3430 
3431     bjtmp = bj + bdiag[i+1]+1;
3432     batmp = ba + bdiag[i+1]+1;
3433     for (k=0; k<ncut; k++){
3434       bjtmp[k] = jtmp[nzi_bl+1+k];
3435       batmp[k] = vtmp[nzi_bl+1+k];
3436       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3437     }
3438     /* printf("\n"); */
3439 
3440     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3441     /*
3442     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3443     printf(" ----------------------------\n");
3444     */
3445   } /* for (i=0; i<n; i++) */
3446   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3447   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3448 
3449   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3450   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3451 
3452   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3453   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3454   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3455   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3456 
3457   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3458   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3459 
3460   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3461   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3462   if (row_identity && icol_identity) {
3463     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3464   } else {
3465     B->ops->solve = MatSolve_SeqAIJ;
3466   }
3467 
3468   B->ops->solveadd          = 0;
3469   B->ops->solvetranspose    = 0;
3470   B->ops->solvetransposeadd = 0;
3471   B->ops->matsolve          = 0;
3472   B->assembled              = PETSC_TRUE;
3473   B->preallocated           = PETSC_TRUE;
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 /* a wraper of MatILUDTFactor_SeqAIJ() */
3478 #undef __FUNCT__
3479 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3480 /*
3481     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3482 */
3483 
3484 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3485 {
3486   PetscErrorCode     ierr;
3487 
3488   PetscFunctionBegin;
3489   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 /*
3494    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3495    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3496 */
3497 #undef __FUNCT__
3498 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3499 /*
3500     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3501 */
3502 
3503 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3504 {
3505   Mat            C=fact;
3506   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3507   IS             isrow = b->row,isicol = b->icol;
3508   PetscErrorCode ierr;
3509   const PetscInt *r,*ic,*ics;
3510   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3511   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3512   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3513   PetscReal      dt=info->dt,shift=info->shiftamount;
3514   PetscBool      row_identity, col_identity;
3515 
3516   PetscFunctionBegin;
3517   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3518   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3519   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3520   ics  = ic;
3521 
3522   for (i=0; i<n; i++){
3523     /* initialize rtmp array */
3524     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3525     bjtmp = bj + bi[i];
3526     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3527     rtmp[i] = 0.0;
3528     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3529     bjtmp = bj + bdiag[i+1] + 1;
3530     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3531 
3532     /* load in initial unfactored row of A */
3533     /* printf("row %d\n",i); */
3534     nz    = ai[r[i]+1] - ai[r[i]];
3535     ajtmp = aj + ai[r[i]];
3536     v     = aa + ai[r[i]];
3537     for (j=0; j<nz; j++) {
3538       rtmp[ics[*ajtmp++]] = v[j];
3539       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3540     }
3541     /* printf("\n"); */
3542 
3543     /* numerical factorization */
3544     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3545     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3546     k = 0;
3547     while (k < nzl){
3548       row   = *bjtmp++;
3549       /* printf("  prow %d\n",row); */
3550       pc         = rtmp + row;
3551       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3552       multiplier = (*pc) * (*pv);
3553       *pc        = multiplier;
3554       if (PetscAbsScalar(multiplier) > dt){
3555         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3556         pv         = b->a + bdiag[row+1] + 1;
3557         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3558         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3559         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
3560       }
3561       k++;
3562     }
3563 
3564     /* finished row so stick it into b->a */
3565     /* L-part */
3566     pv = b->a + bi[i] ;
3567     pj = bj + bi[i] ;
3568     nzl = bi[i+1] - bi[i];
3569     for (j=0; j<nzl; j++) {
3570       pv[j] = rtmp[pj[j]];
3571       /* printf(" (%d,%g),",pj[j],pv[j]); */
3572     }
3573 
3574     /* diagonal: invert diagonal entries for simplier triangular solves */
3575     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3576     b->a[bdiag[i]] = 1.0/rtmp[i];
3577     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3578 
3579     /* U-part */
3580     pv = b->a + bdiag[i+1] + 1;
3581     pj = bj + bdiag[i+1] + 1;
3582     nzu = bdiag[i] - bdiag[i+1] - 1;
3583     for (j=0; j<nzu; j++) {
3584       pv[j] = rtmp[pj[j]];
3585       /* printf(" (%d,%g),",pj[j],pv[j]); */
3586     }
3587     /* printf("\n"); */
3588   }
3589 
3590   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3591   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3592   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3593 
3594   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3595   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3596   if (row_identity && col_identity) {
3597     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3598   } else {
3599     C->ops->solve   = MatSolve_SeqAIJ;
3600   }
3601   C->ops->solveadd           = 0;
3602   C->ops->solvetranspose     = 0;
3603   C->ops->solvetransposeadd  = 0;
3604   C->ops->matsolve           = 0;
3605   C->assembled    = PETSC_TRUE;
3606   C->preallocated = PETSC_TRUE;
3607   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3608   PetscFunctionReturn(0);
3609 }
3610