xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 813a1f2b15e089a41994ae49ec03bce476400765)
1 #define PETSCMAT_DLL
2 
3 
4 #include "../src/mat/impls/aij/seq/aij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscbt.h"
7 #include "../src/mat/utils/freespace.h"
8 
9 EXTERN_C_BEGIN
10 #undef __FUNCT__
11 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
12 /*
13       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
14 */
15 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscTruth        *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr);
41   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
42   ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0;
69         past   = 0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
116   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
117   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
119     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
121     (*B)->ops->iludtfactor       = MatILUDTFactor_SeqAIJ;
122   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
123     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
124     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
125     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
126     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
127   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
128   (*B)->factor = ftype;
129   PetscFunctionReturn(0);
130 }
131 EXTERN_C_END
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
136 {
137   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
138   IS                 isicol;
139   PetscErrorCode     ierr;
140   const PetscInt     *r,*ic;
141   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
142   PetscInt           *bi,*bj,*ajtmp;
143   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
144   PetscReal          f;
145   PetscInt           nlnk,*lnk,k,**bi_ptr;
146   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
147   PetscBT            lnkbt;
148   PetscTruth         newdatastruct=PETSC_FALSE;
149 
150   PetscFunctionBegin;
151   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr);
152   if(newdatastruct){
153     ierr = MatLUFactorSymbolic_SeqAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr);
154     PetscFunctionReturn(0);
155   }
156 
157   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
158   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
159   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
160   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
161 
162   /* get new row pointers */
163   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
164   bi[0] = 0;
165 
166   /* bdiag is location of diagonal in factor */
167   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
168   bdiag[0] = 0;
169 
170   /* linked list for storing column indices of the active row */
171   nlnk = n + 1;
172   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
173 
174   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
175 
176   /* initial FreeSpace size is f*(ai[n]+1) */
177   f = info->fill;
178   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
179   current_space = free_space;
180 
181   for (i=0; i<n; i++) {
182     /* copy previous fill into linked list */
183     nzi = 0;
184     nnz = ai[r[i]+1] - ai[r[i]];
185     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
186     ajtmp = aj + ai[r[i]];
187     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
188     nzi += nlnk;
189 
190     /* add pivot rows into linked list */
191     row = lnk[n];
192     while (row < i) {
193       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
194       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
195       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
196       nzi += nlnk;
197       row  = lnk[row];
198     }
199     bi[i+1] = bi[i] + nzi;
200     im[i]   = nzi;
201 
202     /* mark bdiag */
203     nzbd = 0;
204     nnz  = nzi;
205     k    = lnk[n];
206     while (nnz-- && k < i){
207       nzbd++;
208       k = lnk[k];
209     }
210     bdiag[i] = bi[i] + nzbd;
211 
212     /* if free space is not available, make more free space */
213     if (current_space->local_remaining<nzi) {
214       nnz = (n - i)*nzi; /* estimated and max additional space needed */
215       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
216       reallocs++;
217     }
218 
219     /* copy data into free space, then initialize lnk */
220     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
221     bi_ptr[i] = current_space->array;
222     current_space->array           += nzi;
223     current_space->local_used      += nzi;
224     current_space->local_remaining -= nzi;
225   }
226 #if defined(PETSC_USE_INFO)
227   if (ai[n] != 0) {
228     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
229     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
230     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
231     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
232     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
233   } else {
234     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
235   }
236 #endif
237 
238   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
239   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
240 
241   /* destroy list of free space and other temporary array(s) */
242   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
243   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
244   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
245   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
246 
247   /* put together the new matrix */
248   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
249   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
250   b    = (Mat_SeqAIJ*)(B)->data;
251   b->free_a       = PETSC_TRUE;
252   b->free_ij      = PETSC_TRUE;
253   b->singlemalloc = PETSC_FALSE;
254   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
255   b->j          = bj;
256   b->i          = bi;
257   b->diag       = bdiag;
258   b->ilen       = 0;
259   b->imax       = 0;
260   b->row        = isrow;
261   b->col        = iscol;
262   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
263   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
264   b->icol       = isicol;
265   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
266 
267   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
268   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
269   b->maxnz = b->nz = bi[n] ;
270 
271   (B)->factor                = MAT_FACTOR_LU;
272   (B)->info.factor_mallocs   = reallocs;
273   (B)->info.fill_ratio_given = f;
274 
275   if (ai[n]) {
276     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
277   } else {
278     (B)->info.fill_ratio_needed = 0.0;
279   }
280   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
281   (B)->ops->solve            = MatSolve_SeqAIJ;
282   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
283   /* switch to inodes if appropriate */
284   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_newdatastruct"
290 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
291 {
292   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
293   IS                 isicol;
294   PetscErrorCode     ierr;
295   const PetscInt     *r,*ic;
296   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
297   PetscInt           *bi,*bj,*ajtmp;
298   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
299   PetscReal          f;
300   PetscInt           nlnk,*lnk,k,**bi_ptr;
301   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
302   PetscBT            lnkbt;
303 
304   PetscFunctionBegin;
305   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
306   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
307   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
308   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
309 
310   /* get new row pointers */
311   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
312   bi[0] = 0;
313 
314   /* bdiag is location of diagonal in factor */
315   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
316   bdiag[0] = 0;
317 
318   /* linked list for storing column indices of the active row */
319   nlnk = n + 1;
320   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
321 
322   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
323 
324   /* initial FreeSpace size is f*(ai[n]+1) */
325   f = info->fill;
326   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
327   current_space = free_space;
328 
329   for (i=0; i<n; i++) {
330     /* copy previous fill into linked list */
331     nzi = 0;
332     nnz = ai[r[i]+1] - ai[r[i]];
333     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
334     ajtmp = aj + ai[r[i]];
335     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
336     nzi += nlnk;
337 
338     /* add pivot rows into linked list */
339     row = lnk[n];
340     while (row < i){
341       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
342       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
343       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
344       nzi  += nlnk;
345       row   = lnk[row];
346     }
347     bi[i+1] = bi[i] + nzi;
348     im[i]   = nzi;
349 
350     /* mark bdiag */
351     nzbd = 0;
352     nnz  = nzi;
353     k    = lnk[n];
354     while (nnz-- && k < i){
355       nzbd++;
356       k = lnk[k];
357     }
358     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
359 
360     /* if free space is not available, make more free space */
361     if (current_space->local_remaining<nzi) {
362       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
363       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
364       reallocs++;
365     }
366 
367     /* copy data into free space, then initialize lnk */
368     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
369     bi_ptr[i] = current_space->array;
370     current_space->array           += nzi;
371     current_space->local_used      += nzi;
372     current_space->local_remaining -= nzi;
373   }
374 #if defined(PETSC_USE_INFO)
375   if (ai[n] != 0) {
376     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
377     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
378     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
379     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
380     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
381   } else {
382     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
383   }
384 #endif
385 
386   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
387   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
388 
389   /* destroy list of free space and other temporary array(s) */
390   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
391   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
392   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
393   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
394 
395   /* put together the new matrix */
396   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
397   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
398   b    = (Mat_SeqAIJ*)(B)->data;
399   b->free_a       = PETSC_TRUE;
400   b->free_ij      = PETSC_TRUE;
401   b->singlemalloc = PETSC_FALSE;
402   ierr          = PetscMalloc((bi[2*n+1])*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
403   b->j          = bj;
404   b->i          = bi;
405   b->diag       = bdiag;
406   b->ilen       = 0;
407   b->imax       = 0;
408   b->row        = isrow;
409   b->col        = iscol;
410   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
411   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
412   b->icol       = isicol;
413   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
414 
415   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
416   ierr = PetscLogObjectMemory(B,bi[2*n+1]*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
417   b->maxnz = b->nz = bi[2*n+1] ;
418 
419   (B)->factor                = MAT_FACTOR_LU;
420   (B)->info.factor_mallocs   = reallocs;
421   (B)->info.fill_ratio_given = f;
422 
423   if (ai[n]) {
424     (B)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]);
425   } else {
426     (B)->info.fill_ratio_needed = 0.0;
427   }
428   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_newdatastruct;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433     Trouble in factorization, should we dump the original matrix?
434 */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatFactorDumpMatrix"
437 PetscErrorCode MatFactorDumpMatrix(Mat A)
438 {
439   PetscErrorCode ierr;
440   PetscTruth     flg = PETSC_FALSE;
441 
442   PetscFunctionBegin;
443   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
444   if (flg) {
445     PetscViewer viewer;
446     char        filename[PETSC_MAX_PATH_LEN];
447 
448     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
449     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
450     ierr = MatView(A,viewer);CHKERRQ(ierr);
451     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
457 
458 /* ----------------------------------------------------------- */
459 extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
460 extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec);
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct"
464 PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info)
465 {
466   Mat            C=B;
467   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
468   IS             isrow = b->row,isicol = b->icol;
469   PetscErrorCode ierr;
470   const PetscInt *r,*ic,*ics;
471   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
472   PetscInt       *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj;
473   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
474   PetscTruth     row_identity,col_identity;
475 
476   LUShift_Ctx    sctx;
477   PetscInt       *ddiag,newshift;
478   PetscReal      rs;
479   MatScalar      d;
480 
481   PetscFunctionBegin;
482   /* ZeropivotSetUp(): initialize shift context sctx */
483   sctx.nshift         = 0;
484   sctx.nshift_max     = 0;
485   sctx.shift_top      = 0.0;
486   sctx.shift_lo       = 0.0;
487   sctx.shift_hi       = 0.0;
488   sctx.shift_fraction = 0.0;
489   sctx.shift_amount   = 0.0;
490 
491   /* if both shift schemes are chosen by user, only use info->shiftpd */
492   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
493     ddiag          = a->diag;
494     sctx.shift_top = info->zeropivot;
495     for (i=0; i<n; i++) {
496       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
497       d  = (aa)[ddiag[i]];
498       rs = -PetscAbsScalar(d) - PetscRealPart(d);
499       v  = aa+ai[i];
500       nz = ai[i+1] - ai[i];
501       for (j=0; j<nz; j++)
502 	rs += PetscAbsScalar(v[j]);
503       if (rs>sctx.shift_top) sctx.shift_top = rs;
504     }
505     sctx.shift_top   *= 1.1;
506     sctx.nshift_max   = 5;
507     sctx.shift_lo     = 0.;
508     sctx.shift_hi     = 1.;
509   }
510 
511   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
512   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
513   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
514   ics  = ic;
515 
516   do {
517     sctx.lushift = PETSC_FALSE;
518     for (i=0; i<n; i++){
519       /* zero rtmp */
520       /* L part */
521       nz    = bi[i+1] - bi[i];
522       bjtmp = bj + bi[i];
523       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
524 
525       /* U part */
526       nz = bi[2*n-i+1] - bi[2*n-i];
527       bjtmp = bj + bi[2*n-i];
528       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
529 
530       /* load in initial (unfactored row) */
531       nz    = ai[r[i]+1] - ai[r[i]];
532       ajtmp = aj + ai[r[i]];
533       v     = aa + ai[r[i]];
534       for (j=0; j<nz; j++) {
535         rtmp[ics[ajtmp[j]]] = v[j];
536       }
537       /* ZeropivotApply() */
538       rtmp[ics[r[i]]] += sctx.shift_amount;  /* shift the diagonal of the matrix */
539 
540       /* elimination */
541       bjtmp = bj + bi[i];
542       row   = *bjtmp++;
543       nzL   = bi[i+1] - bi[i];
544       k   = 0;
545       while  (k < nzL) {
546         pc = rtmp + row;
547         if (*pc != 0.0) {
548           pv         = b->a + bdiag[row];
549           multiplier = *pc * (*pv);
550           *pc        = multiplier;
551           pj         = b->j + bi[2*n-row]; /* begining of U(row,:) */
552           pv         = b->a + bi[2*n-row];
553           nz         = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */
554           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
555           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
556         }
557         row = *bjtmp++; k++;
558       }
559 
560       /* finished row so stick it into b->a */
561       rs = 0.0;
562       /* L part */
563       pv   = b->a + bi[i] ;
564       pj   = b->j + bi[i] ;
565       nz   = bi[i+1] - bi[i];
566       for (j=0; j<nz; j++) {
567         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
568       }
569 
570       /* U part */
571       pv = b->a + bi[2*n-i];
572       pj = b->j + bi[2*n-i];
573       nz = bi[2*n-i+1] - bi[2*n-i] - 1;
574       for (j=0; j<nz; j++) {
575         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
576       }
577 
578       /* ZeropivotCheck() */
579       sctx.rs  = rs;
580       sctx.pv  = rtmp[i];
581       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
582       if (newshift == 1) break;
583 
584       /* Mark diagonal and invert diagonal for simplier triangular solves */
585       pv  = b->a + bdiag[i];
586       *pv = 1.0/rtmp[i];
587 
588     } /* endof for (i=0; i<n; i++){ */
589 
590     /* ZeropivotRefine() */
591     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
592       /*
593        * if no shift in this attempt & shifting & started shifting & can refine,
594        * then try lower shift
595        */
596       sctx.shift_hi       = sctx.shift_fraction;
597       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
598       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
599       sctx.lushift        = PETSC_TRUE;
600       sctx.nshift++;
601     }
602   } while (sctx.lushift);
603 
604   ierr = PetscFree(rtmp);CHKERRQ(ierr);
605   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
606   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
607 
608   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
609   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
610   if (row_identity && col_identity) {
611     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct;
612   } else {
613     C->ops->solve = MatSolve_SeqAIJ_newdatastruct;
614   }
615 
616   C->ops->solveadd           = 0;
617   C->ops->solvetranspose     = 0;
618   C->ops->solvetransposeadd  = 0;
619   C->ops->matsolve           = 0;
620   C->assembled    = PETSC_TRUE;
621   C->preallocated = PETSC_TRUE;
622   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
623 
624   /* ZeropivotView() */
625   if (sctx.nshift){
626     if (info->shiftpd) {
627       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
628     } else if (info->shiftnz) {
629       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /* ----------------------------------------------------------- */
636 extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec);
637 /* extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec); */
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct_v2"
641 PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct_v2(Mat B,Mat A,const MatFactorInfo *info)
642 {
643   Mat            C=B;
644   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
645   IS             isrow = b->row,isicol = b->icol;
646   PetscErrorCode ierr;
647   const PetscInt *r,*ic,*ics;
648   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
649   PetscInt       *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj;
650   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
651   PetscTruth     row_identity,col_identity;
652 
653   LUShift_Ctx    sctx;
654   PetscInt       *ddiag,newshift;
655   PetscReal      rs;
656   MatScalar      d;
657 
658   PetscFunctionBegin;
659   /* ZeropivotSetUp(): initialize shift context sctx */
660   sctx.nshift         = 0;
661   sctx.nshift_max     = 0;
662   sctx.shift_top      = 0.0;
663   sctx.shift_lo       = 0.0;
664   sctx.shift_hi       = 0.0;
665   sctx.shift_fraction = 0.0;
666   sctx.shift_amount   = 0.0;
667 
668   /* if both shift schemes are chosen by user, only use info->shiftpd */
669   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
670     ddiag          = a->diag;
671     sctx.shift_top = info->zeropivot;
672     for (i=0; i<n; i++) {
673       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
674       d  = (aa)[ddiag[i]];
675       rs = -PetscAbsScalar(d) - PetscRealPart(d);
676       v  = aa+ai[i];
677       nz = ai[i+1] - ai[i];
678       for (j=0; j<nz; j++)
679 	rs += PetscAbsScalar(v[j]);
680       if (rs>sctx.shift_top) sctx.shift_top = rs;
681     }
682     sctx.shift_top   *= 1.1;
683     sctx.nshift_max   = 5;
684     sctx.shift_lo     = 0.;
685     sctx.shift_hi     = 1.;
686   }
687 
688   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
689   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
690   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
691   ics  = ic;
692 
693   do {
694     sctx.lushift = PETSC_FALSE;
695     for (i=0; i<n; i++){
696       /* zero rtmp */
697       /* L part */
698       nz    = bi[i+1] - bi[i];
699       bjtmp = bj + bi[i];
700       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
701 
702       /* U part */
703       nz = bi[2*n-i+1] - bi[2*n-i];
704       bjtmp = bj + bi[2*n-i];
705       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
706 
707       /* load in initial (unfactored row) */
708       nz    = ai[r[i]+1] - ai[r[i]];
709       ajtmp = aj + ai[r[i]];
710       v     = aa + ai[r[i]];
711       for (j=0; j<nz; j++) {
712         rtmp[ics[ajtmp[j]]] = v[j];
713       }
714       /* ZeropivotApply() */
715       rtmp[ics[r[i]]] += sctx.shift_amount;  /* shift the diagonal of the matrix */
716 
717       /* elimination */
718       bjtmp = bj + bi[i];
719       row   = *bjtmp++;
720       nzL   = bi[i+1] - bi[i];
721       k   = 0;
722       while  (k < nzL) {
723         pc = rtmp + row;
724         if (*pc != 0.0) {
725           pv         = b->a + bdiag[row];
726           multiplier = *pc * (*pv);
727           *pc        = multiplier;
728           pj         = b->j + bi[2*n-row]; /* begining of U(row,:) */
729           pv         = b->a + bi[2*n-row];
730           nz         = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */
731           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
732           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
733         }
734         row = *bjtmp++; k++;
735       }
736 
737       /* finished row so stick it into b->a */
738       rs = 0.0;
739       /* L part */
740       pv   = b->a + bi[i] ;
741       pj   = b->j + bi[i] ;
742       nz   = bi[i+1] - bi[i];
743       for (j=0; j<nz; j++) {
744         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
745       }
746 
747       /* U part */
748       pv = b->a + bi[2*n-i];
749       pj = b->j + bi[2*n-i];
750       nz = bi[2*n-i+1] - bi[2*n-i] - 1;
751       for (j=0; j<nz; j++) {
752         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
753       }
754 
755       /* ZeropivotCheck() */
756       sctx.rs  = rs;
757       sctx.pv  = rtmp[i];
758       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
759       if (newshift == 1) break;
760 
761       /* Mark diagonal and invert diagonal for simplier triangular solves */
762       pv  = b->a + bdiag[i];
763       *pv = 1.0/rtmp[i];
764 
765     } /* endof for (i=0; i<n; i++){ */
766 
767     /* ZeropivotRefine() */
768     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
769       /*
770        * if no shift in this attempt & shifting & started shifting & can refine,
771        * then try lower shift
772        */
773       sctx.shift_hi       = sctx.shift_fraction;
774       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
775       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
776       sctx.lushift        = PETSC_TRUE;
777       sctx.nshift++;
778     }
779   } while (sctx.lushift);
780 
781   ierr = PetscFree(rtmp);CHKERRQ(ierr);
782   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
783   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
784 
785   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
786   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
787   if (row_identity && col_identity) {
788     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct;
789   } else {
790     C->ops->solve = MatSolve_SeqAIJ_newdatastruct;
791   }
792 
793   C->ops->solveadd           = 0;
794   C->ops->solvetranspose     = 0;
795   C->ops->solvetransposeadd  = 0;
796   C->ops->matsolve           = 0;
797   C->assembled    = PETSC_TRUE;
798   C->preallocated = PETSC_TRUE;
799   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
800 
801   /* ZeropivotView() */
802   if (sctx.nshift){
803     if (info->shiftpd) {
804       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
805     } else if (info->shiftnz) {
806       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
807     }
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
814 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
815 {
816   Mat             C=B;
817   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
818   IS              isrow = b->row,isicol = b->icol;
819   PetscErrorCode  ierr;
820   const PetscInt   *r,*ic,*ics;
821   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
822   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
823   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
824   MatScalar       *pv,*rtmp,*pc,multiplier,d;
825   const MatScalar *v,*aa=a->a;
826   PetscReal       rs=0.0;
827   LUShift_Ctx     sctx;
828   PetscInt        newshift,*ddiag;
829 
830   PetscFunctionBegin;
831   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
832   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
833   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
834   ics  = ic;
835 
836   /* initialize shift context sctx */
837   sctx.nshift         = 0;
838   sctx.nshift_max     = 0;
839   sctx.shift_top      = 0.0;
840   sctx.shift_lo       = 0.0;
841   sctx.shift_hi       = 0.0;
842   sctx.shift_fraction = 0.0;
843   sctx.shift_amount   = 0.0;
844 
845   /* if both shift schemes are chosen by user, only use info->shiftpd */
846   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
847     ddiag          = a->diag;
848     sctx.shift_top = info->zeropivot;
849     for (i=0; i<n; i++) {
850       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
851       d  = (aa)[ddiag[i]];
852       rs = -PetscAbsScalar(d) - PetscRealPart(d);
853       v  = aa+ai[i];
854       nz = ai[i+1] - ai[i];
855       for (j=0; j<nz; j++)
856 	rs += PetscAbsScalar(v[j]);
857       if (rs>sctx.shift_top) sctx.shift_top = rs;
858     }
859     sctx.shift_top   *= 1.1;
860     sctx.nshift_max   = 5;
861     sctx.shift_lo     = 0.;
862     sctx.shift_hi     = 1.;
863   }
864 
865   do {
866     sctx.lushift = PETSC_FALSE;
867     for (i=0; i<n; i++){
868       nz    = bi[i+1] - bi[i];
869       bjtmp = bj + bi[i];
870       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
871 
872       /* load in initial (unfactored row) */
873       nz    = ai[r[i]+1] - ai[r[i]];
874       ajtmp = aj + ai[r[i]];
875       v     = aa + ai[r[i]];
876       for (j=0; j<nz; j++) {
877         rtmp[ics[ajtmp[j]]] = v[j];
878       }
879       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
880       /* if (sctx.shift_amount > 0.0) printf("row %d, shift %g\n",i,sctx.shift_amount); */
881 
882       row = *bjtmp++;
883       while  (row < i) {
884         pc = rtmp + row;
885         if (*pc != 0.0) {
886           pv         = b->a + diag_offset[row];
887           pj         = b->j + diag_offset[row] + 1;
888           multiplier = *pc / *pv++;
889           *pc        = multiplier;
890           nz         = bi[row+1] - diag_offset[row] - 1;
891           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
892           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
893         }
894         row = *bjtmp++;
895       }
896       /* finished row so stick it into b->a */
897       pv   = b->a + bi[i] ;
898       pj   = b->j + bi[i] ;
899       nz   = bi[i+1] - bi[i];
900       diag = diag_offset[i] - bi[i];
901       rs   = 0.0;
902       for (j=0; j<nz; j++) {
903         pv[j] = rtmp[pj[j]];
904         rs   += PetscAbsScalar(pv[j]);
905       }
906       rs   -= PetscAbsScalar(pv[diag]);
907 
908       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
909       sctx.rs  = rs;
910       sctx.pv  = pv[diag];
911       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
912       if (newshift == 1) break;
913     }
914 
915     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
916       /*
917        * if no shift in this attempt & shifting & started shifting & can refine,
918        * then try lower shift
919        */
920       sctx.shift_hi       = sctx.shift_fraction;
921       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
922       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
923       sctx.lushift        = PETSC_TRUE;
924       sctx.nshift++;
925     }
926   } while (sctx.lushift);
927 
928   /* invert diagonal entries for simplier triangular solves */
929   for (i=0; i<n; i++) {
930     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
931   }
932   ierr = PetscFree(rtmp);CHKERRQ(ierr);
933   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
934   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
935   if (b->inode.use) {
936     C->ops->solve   = MatSolve_Inode;
937   } else {
938     PetscTruth row_identity, col_identity;
939     ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
940     ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
941     if (row_identity && col_identity) {
942       C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
943     } else {
944       C->ops->solve   = MatSolve_SeqAIJ;
945     }
946   }
947   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
948   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
949   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
950   C->ops->matsolve           = MatMatSolve_SeqAIJ;
951   C->assembled    = PETSC_TRUE;
952   C->preallocated = PETSC_TRUE;
953   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
954   if (sctx.nshift){
955      if (info->shiftpd) {
956       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
957     } else if (info->shiftnz) {
958       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
959     }
960   }
961   PetscFunctionReturn(0);
962 }
963 
964 /*
965    This routine implements inplace ILU(0) with row or/and column permutations.
966    Input:
967      A - original matrix
968    Output;
969      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
970          a->j (col index) is permuted by the inverse of colperm, then sorted
971          a->a reordered accordingly with a->j
972          a->diag (ptr to diagonal elements) is updated.
973 */
974 #undef __FUNCT__
975 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
976 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
977 {
978   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
979   IS             isrow = a->row,isicol = a->icol;
980   PetscErrorCode ierr;
981   const PetscInt *r,*ic,*ics;
982   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
983   PetscInt       *ajtmp,nz,row;
984   PetscInt       *diag = a->diag,nbdiag,*pj;
985   PetscScalar    *rtmp,*pc,multiplier,d;
986   MatScalar      *v,*pv;
987   PetscReal      rs;
988   LUShift_Ctx    sctx;
989   PetscInt       newshift;
990 
991   PetscFunctionBegin;
992   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
993   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
994   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
995   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
996   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
997   ics = ic;
998 
999   sctx.shift_top      = 0;
1000   sctx.nshift_max     = 0;
1001   sctx.shift_lo       = 0;
1002   sctx.shift_hi       = 0;
1003   sctx.shift_fraction = 0;
1004 
1005   /* if both shift schemes are chosen by user, only use info->shiftpd */
1006   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
1007     sctx.shift_top = 0;
1008     for (i=0; i<n; i++) {
1009       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1010       d  = (a->a)[diag[i]];
1011       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1012       v  = a->a+ai[i];
1013       nz = ai[i+1] - ai[i];
1014       for (j=0; j<nz; j++)
1015 	rs += PetscAbsScalar(v[j]);
1016       if (rs>sctx.shift_top) sctx.shift_top = rs;
1017     }
1018     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
1019     sctx.shift_top    *= 1.1;
1020     sctx.nshift_max   = 5;
1021     sctx.shift_lo     = 0.;
1022     sctx.shift_hi     = 1.;
1023   }
1024 
1025   sctx.shift_amount = 0;
1026   sctx.nshift       = 0;
1027   do {
1028     sctx.lushift = PETSC_FALSE;
1029     for (i=0; i<n; i++){
1030       /* load in initial unfactored row */
1031       nz    = ai[r[i]+1] - ai[r[i]];
1032       ajtmp = aj + ai[r[i]];
1033       v     = a->a + ai[r[i]];
1034       /* sort permuted ajtmp and values v accordingly */
1035       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
1036       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
1037 
1038       diag[r[i]] = ai[r[i]];
1039       for (j=0; j<nz; j++) {
1040         rtmp[ajtmp[j]] = v[j];
1041         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
1042       }
1043       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
1044 
1045       row = *ajtmp++;
1046       while  (row < i) {
1047         pc = rtmp + row;
1048         if (*pc != 0.0) {
1049           pv         = a->a + diag[r[row]];
1050           pj         = aj + diag[r[row]] + 1;
1051 
1052           multiplier = *pc / *pv++;
1053           *pc        = multiplier;
1054           nz         = ai[r[row]+1] - diag[r[row]] - 1;
1055           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
1056           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
1057         }
1058         row = *ajtmp++;
1059       }
1060       /* finished row so overwrite it onto a->a */
1061       pv   = a->a + ai[r[i]] ;
1062       pj   = aj + ai[r[i]] ;
1063       nz   = ai[r[i]+1] - ai[r[i]];
1064       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
1065 
1066       rs   = 0.0;
1067       for (j=0; j<nz; j++) {
1068         pv[j] = rtmp[pj[j]];
1069         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
1070       }
1071 
1072       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
1073       sctx.rs  = rs;
1074       sctx.pv  = pv[nbdiag];
1075       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
1076       if (newshift == 1) break;
1077     }
1078 
1079     if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
1080       /*
1081        * if no shift in this attempt & shifting & started shifting & can refine,
1082        * then try lower shift
1083        */
1084       sctx.shift_hi        = sctx.shift_fraction;
1085       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
1086       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
1087       sctx.lushift         = PETSC_TRUE;
1088       sctx.nshift++;
1089     }
1090   } while (sctx.lushift);
1091 
1092   /* invert diagonal entries for simplier triangular solves */
1093   for (i=0; i<n; i++) {
1094     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
1095   }
1096 
1097   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1098   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1099   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1100   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
1101   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
1102   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
1103   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
1104   A->assembled = PETSC_TRUE;
1105   A->preallocated = PETSC_TRUE;
1106   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
1107   if (sctx.nshift){
1108     if (info->shiftpd) {
1109       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
1110     } else if (info->shiftnz) {
1111       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1112     }
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 /* ----------------------------------------------------------- */
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatLUFactor_SeqAIJ"
1120 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
1121 {
1122   PetscErrorCode ierr;
1123   Mat            C;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
1127   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
1128   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
1129   A->ops->solve            = C->ops->solve;
1130   A->ops->solvetranspose   = C->ops->solvetranspose;
1131   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1132   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 /* ----------------------------------------------------------- */
1136 
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatSolve_SeqAIJ"
1140 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
1141 {
1142   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1143   IS                iscol = a->col,isrow = a->row;
1144   PetscErrorCode    ierr;
1145   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1146   PetscInt          nz;
1147   const PetscInt    *rout,*cout,*r,*c;
1148   PetscScalar       *x,*tmp,*tmps,sum;
1149   const PetscScalar *b;
1150   const MatScalar   *aa = a->a,*v;
1151 
1152   PetscFunctionBegin;
1153   if (!n) PetscFunctionReturn(0);
1154 
1155   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1156   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1157   tmp  = a->solve_work;
1158 
1159   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1160   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1161 
1162   /* forward solve the lower triangular */
1163   tmp[0] = b[*r++];
1164   tmps   = tmp;
1165   for (i=1; i<n; i++) {
1166     v   = aa + ai[i] ;
1167     vi  = aj + ai[i] ;
1168     nz  = a->diag[i] - ai[i];
1169     sum = b[*r++];
1170     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1171     tmp[i] = sum;
1172   }
1173 
1174   /* backward solve the upper triangular */
1175   for (i=n-1; i>=0; i--){
1176     v   = aa + a->diag[i] + 1;
1177     vi  = aj + a->diag[i] + 1;
1178     nz  = ai[i+1] - a->diag[i] - 1;
1179     sum = tmp[i];
1180     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1181     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1182   }
1183 
1184   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1185   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1186   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1187   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1188   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1194 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1195 {
1196   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1197   IS              iscol = a->col,isrow = a->row;
1198   PetscErrorCode  ierr;
1199   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1200   PetscInt        nz,neq;
1201   const PetscInt  *rout,*cout,*r,*c;
1202   PetscScalar     *x,*b,*tmp,*tmps,sum;
1203   const MatScalar *aa = a->a,*v;
1204   PetscTruth      bisdense,xisdense;
1205 
1206   PetscFunctionBegin;
1207   if (!n) PetscFunctionReturn(0);
1208 
1209   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1210   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1211   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1212   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1213 
1214   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1215   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1216 
1217   tmp  = a->solve_work;
1218   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1219   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1220 
1221   for (neq=0; neq<B->cmap->n; neq++){
1222     /* forward solve the lower triangular */
1223     tmp[0] = b[r[0]];
1224     tmps   = tmp;
1225     for (i=1; i<n; i++) {
1226       v   = aa + ai[i] ;
1227       vi  = aj + ai[i] ;
1228       nz  = a->diag[i] - ai[i];
1229       sum = b[r[i]];
1230       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1231       tmp[i] = sum;
1232     }
1233     /* backward solve the upper triangular */
1234     for (i=n-1; i>=0; i--){
1235       v   = aa + a->diag[i] + 1;
1236       vi  = aj + a->diag[i] + 1;
1237       nz  = ai[i+1] - a->diag[i] - 1;
1238       sum = tmp[i];
1239       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1240       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1241     }
1242 
1243     b += n;
1244     x += n;
1245   }
1246   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1247   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1248   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1249   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1250   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1256 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1257 {
1258   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1259   IS              iscol = a->col,isrow = a->row;
1260   PetscErrorCode  ierr;
1261   const PetscInt  *r,*c,*rout,*cout;
1262   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1263   PetscInt        nz,row;
1264   PetscScalar     *x,*b,*tmp,*tmps,sum;
1265   const MatScalar *aa = a->a,*v;
1266 
1267   PetscFunctionBegin;
1268   if (!n) PetscFunctionReturn(0);
1269 
1270   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1271   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1272   tmp  = a->solve_work;
1273 
1274   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1275   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1276 
1277   /* forward solve the lower triangular */
1278   tmp[0] = b[*r++];
1279   tmps   = tmp;
1280   for (row=1; row<n; row++) {
1281     i   = rout[row]; /* permuted row */
1282     v   = aa + ai[i] ;
1283     vi  = aj + ai[i] ;
1284     nz  = a->diag[i] - ai[i];
1285     sum = b[*r++];
1286     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1287     tmp[row] = sum;
1288   }
1289 
1290   /* backward solve the upper triangular */
1291   for (row=n-1; row>=0; row--){
1292     i   = rout[row]; /* permuted row */
1293     v   = aa + a->diag[i] + 1;
1294     vi  = aj + a->diag[i] + 1;
1295     nz  = ai[i+1] - a->diag[i] - 1;
1296     sum = tmp[row];
1297     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1298     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1299   }
1300 
1301   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1302   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1303   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1304   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1305   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /* ----------------------------------------------------------- */
1310 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h"
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
1313 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
1314 {
1315   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1316   PetscErrorCode    ierr;
1317   PetscInt          n = A->rmap->n;
1318   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1319   PetscScalar       *x;
1320   const PetscScalar *b;
1321   const MatScalar   *aa = a->a;
1322 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1323   PetscInt          adiag_i,i,nz,ai_i;
1324   const PetscInt    *vi;
1325   const MatScalar   *v;
1326   PetscScalar       sum;
1327 #endif
1328 
1329   PetscFunctionBegin;
1330   if (!n) PetscFunctionReturn(0);
1331 
1332   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1333   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1334 
1335 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1336   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1337 #else
1338   /* forward solve the lower triangular */
1339   x[0] = b[0];
1340   for (i=1; i<n; i++) {
1341     ai_i = ai[i];
1342     v    = aa + ai_i;
1343     vi   = aj + ai_i;
1344     nz   = adiag[i] - ai_i;
1345     sum  = b[i];
1346     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1347     x[i] = sum;
1348   }
1349 
1350   /* backward solve the upper triangular */
1351   for (i=n-1; i>=0; i--){
1352     adiag_i = adiag[i];
1353     v       = aa + adiag_i + 1;
1354     vi      = aj + adiag_i + 1;
1355     nz      = ai[i+1] - adiag_i - 1;
1356     sum     = x[i];
1357     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1358     x[i]    = sum*aa[adiag_i];
1359   }
1360 #endif
1361   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1362   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1363   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1369 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1370 {
1371   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1372   IS                iscol = a->col,isrow = a->row;
1373   PetscErrorCode    ierr;
1374   PetscInt          i, n = A->rmap->n,j;
1375   PetscInt          nz;
1376   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1377   PetscScalar       *x,*tmp,sum;
1378   const PetscScalar *b;
1379   const MatScalar   *aa = a->a,*v;
1380 
1381   PetscFunctionBegin;
1382   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1383 
1384   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1385   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1386   tmp  = a->solve_work;
1387 
1388   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1389   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1390 
1391   /* forward solve the lower triangular */
1392   tmp[0] = b[*r++];
1393   for (i=1; i<n; i++) {
1394     v   = aa + ai[i] ;
1395     vi  = aj + ai[i] ;
1396     nz  = a->diag[i] - ai[i];
1397     sum = b[*r++];
1398     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1399     tmp[i] = sum;
1400   }
1401 
1402   /* backward solve the upper triangular */
1403   for (i=n-1; i>=0; i--){
1404     v   = aa + a->diag[i] + 1;
1405     vi  = aj + a->diag[i] + 1;
1406     nz  = ai[i+1] - a->diag[i] - 1;
1407     sum = tmp[i];
1408     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1409     tmp[i] = sum*aa[a->diag[i]];
1410     x[*c--] += tmp[i];
1411   }
1412 
1413   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1414   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1415   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1416   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1417   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1418 
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1424 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1425 {
1426   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1427   IS                iscol = a->col,isrow = a->row;
1428   PetscErrorCode    ierr;
1429   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1430   PetscInt          i,n = A->rmap->n,j;
1431   PetscInt          nz;
1432   PetscScalar       *x,*tmp,s1;
1433   const MatScalar   *aa = a->a,*v;
1434   const PetscScalar *b;
1435 
1436   PetscFunctionBegin;
1437   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1438   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1439   tmp  = a->solve_work;
1440 
1441   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1442   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1443 
1444   /* copy the b into temp work space according to permutation */
1445   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1446 
1447   /* forward solve the U^T */
1448   for (i=0; i<n; i++) {
1449     v   = aa + diag[i] ;
1450     vi  = aj + diag[i] + 1;
1451     nz  = ai[i+1] - diag[i] - 1;
1452     s1  = tmp[i];
1453     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1454     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1455     tmp[i] = s1;
1456   }
1457 
1458   /* backward solve the L^T */
1459   for (i=n-1; i>=0; i--){
1460     v   = aa + diag[i] - 1 ;
1461     vi  = aj + diag[i] - 1 ;
1462     nz  = diag[i] - ai[i];
1463     s1  = tmp[i];
1464     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1465   }
1466 
1467   /* copy tmp into x according to permutation */
1468   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1469 
1470   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1471   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1472   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1473   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1474 
1475   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1481 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1482 {
1483   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1484   IS                iscol = a->col,isrow = a->row;
1485   PetscErrorCode    ierr;
1486   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1487   PetscInt          i,n = A->rmap->n,j;
1488   PetscInt          nz;
1489   PetscScalar       *x,*tmp,s1;
1490   const MatScalar   *aa = a->a,*v;
1491   const PetscScalar *b;
1492 
1493   PetscFunctionBegin;
1494   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1495   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1496   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1497   tmp  = a->solve_work;
1498 
1499   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1500   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1501 
1502   /* copy the b into temp work space according to permutation */
1503   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1504 
1505   /* forward solve the U^T */
1506   for (i=0; i<n; i++) {
1507     v   = aa + diag[i] ;
1508     vi  = aj + diag[i] + 1;
1509     nz  = ai[i+1] - diag[i] - 1;
1510     s1  = tmp[i];
1511     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1512     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1513     tmp[i] = s1;
1514   }
1515 
1516   /* backward solve the L^T */
1517   for (i=n-1; i>=0; i--){
1518     v   = aa + diag[i] - 1 ;
1519     vi  = aj + diag[i] - 1 ;
1520     nz  = diag[i] - ai[i];
1521     s1  = tmp[i];
1522     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1523   }
1524 
1525   /* copy tmp into x according to permutation */
1526   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1527 
1528   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1529   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1530   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1531   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1532 
1533   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 /* ----------------------------------------------------------------*/
1538 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1539 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth);
1540 
1541 /*
1542    ilu(0) with natural ordering under new data structure.
1543    Factored arrays bj and ba are stored as
1544      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1545 
1546    bi=fact->i is an array of size 2n+2, in which
1547    bi+
1548      bi[i]      ->  1st entry of L(i,:),i=0,...,i-1
1549      bi[n]      ->  points to L(n-1,:)+1
1550      bi[n+1]    ->  1st entry of U(n-1,:)
1551      bi[2n-i]   ->  1st entry of U(i,:)
1552      bi[2n-i+1] ->  end of U(i,:)+1, the 1st entry of U(i-1,:)
1553      bi[2n]     ->  1st entry of U(0,:)
1554      bi[2n+1]   ->  points to U(0,:)+1
1555 
1556    U(i,:) contains diag[i] as its last entry, i.e.,
1557     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1558 */
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct"
1561 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1562 {
1563 
1564   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1565   PetscErrorCode     ierr;
1566   PetscInt           n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1567   PetscInt           i,j,nz,*bi,*bj,*bdiag;
1568 
1569   PetscFunctionBegin;
1570   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1571   b    = (Mat_SeqAIJ*)(fact)->data;
1572 
1573   /* allocate matrix arrays for new data structure */
1574   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,2*n+2,PetscInt,&b->i);CHKERRQ(ierr);
1575   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(2*n+2)*sizeof(PetscInt));CHKERRQ(ierr);
1576   b->singlemalloc = PETSC_TRUE;
1577   if (!b->diag){
1578     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1579   }
1580   bdiag = b->diag;
1581 
1582   if (n > 0) {
1583     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1584   }
1585 
1586   /* set bi and bj with new data structure */
1587   bi = b->i;
1588   bj = b->j;
1589 
1590   /* L part */
1591   bi[0] = 0;
1592   for (i=0; i<n; i++){
1593     nz = adiag[i] - ai[i];
1594     bi[i+1] = bi[i] + nz;
1595     aj = a->j + ai[i];
1596     for (j=0; j<nz; j++){
1597       *bj = aj[j]; bj++;
1598     }
1599   }
1600 
1601   /* U part */
1602   bi[n+1] = bi[n];
1603   for (i=n-1; i>=0; i--){
1604     nz = ai[i+1] - adiag[i] - 1;
1605     bi[2*n-i+1] = bi[2*n-i] + nz + 1;
1606     aj = a->j + adiag[i] + 1;
1607     for (j=0; j<nz; j++){
1608       *bj = aj[j]; bj++;
1609     }
1610     /* diag[i] */
1611     *bj = i; bj++;
1612     bdiag[i] = bi[2*n-i+1]-1;
1613   }
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct"
1619 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1620 {
1621   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1622   IS                 isicol;
1623   PetscErrorCode     ierr;
1624   const PetscInt     *r,*ic;
1625   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1626   PetscInt           *bi,*cols,nnz,*cols_lvl;
1627   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1628   PetscInt           i,levels,diagonal_fill;
1629   PetscTruth         col_identity,row_identity;
1630   PetscReal          f;
1631   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1632   PetscBT            lnkbt;
1633   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1634   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1635   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1636   PetscTruth         missing;
1637 
1638   PetscFunctionBegin;
1639   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1640   f             = info->fill;
1641   levels        = (PetscInt)info->levels;
1642   diagonal_fill = (PetscInt)info->diagonal_fill;
1643   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1644 
1645   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1646   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1647 
1648   if (!levels && row_identity && col_identity) {
1649     /* special case: ilu(0) with natural ordering */
1650     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1651     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_newdatastruct;
1652 
1653     fact->factor = MAT_FACTOR_ILU;
1654     (fact)->info.factor_mallocs    = 0;
1655     (fact)->info.fill_ratio_given  = info->fill;
1656     (fact)->info.fill_ratio_needed = 1.0;
1657     b               = (Mat_SeqAIJ*)(fact)->data;
1658     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1659     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1660     b->row              = isrow;
1661     b->col              = iscol;
1662     b->icol             = isicol;
1663     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1664     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1665     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1666     /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */
1667     PetscFunctionReturn(0);
1668   }
1669 
1670   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1671   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1672 
1673   /* get new row pointers */
1674   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1675   bi[0] = 0;
1676   /* bdiag is location of diagonal in factor */
1677   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1678   bdiag[0]  = 0;
1679 
1680   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1681   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1682 
1683   /* create a linked list for storing column indices of the active row */
1684   nlnk = n + 1;
1685   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1686 
1687   /* initial FreeSpace size is f*(ai[n]+1) */
1688   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1689   current_space = free_space;
1690   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1691   current_space_lvl = free_space_lvl;
1692 
1693   for (i=0; i<n; i++) {
1694     nzi = 0;
1695     /* copy current row into linked list */
1696     nnz  = ai[r[i]+1] - ai[r[i]];
1697     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1698     cols = aj + ai[r[i]];
1699     lnk[i] = -1; /* marker to indicate if diagonal exists */
1700     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1701     nzi += nlnk;
1702 
1703     /* make sure diagonal entry is included */
1704     if (diagonal_fill && lnk[i] == -1) {
1705       fm = n;
1706       while (lnk[fm] < i) fm = lnk[fm];
1707       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1708       lnk[fm]    = i;
1709       lnk_lvl[i] = 0;
1710       nzi++; dcount++;
1711     }
1712 
1713     /* add pivot rows into the active row */
1714     nzbd = 0;
1715     prow = lnk[n];
1716     while (prow < i) {
1717       nnz      = bdiag[prow];
1718       cols     = bj_ptr[prow] + nnz + 1;
1719       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1720       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1721       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1722       nzi += nlnk;
1723       prow = lnk[prow];
1724       nzbd++;
1725     }
1726     bdiag[i] = nzbd;
1727     bi[i+1]  = bi[i] + nzi;
1728 
1729     /* if free space is not available, make more free space */
1730     if (current_space->local_remaining<nzi) {
1731       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1732       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1733       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1734       reallocs++;
1735     }
1736 
1737     /* copy data into free_space and free_space_lvl, then initialize lnk */
1738     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1739     bj_ptr[i]    = current_space->array;
1740     bjlvl_ptr[i] = current_space_lvl->array;
1741 
1742     /* make sure the active row i has diagonal entry */
1743     if (*(bj_ptr[i]+bdiag[i]) != i) {
1744       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1745     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1746     }
1747 
1748     current_space->array           += nzi;
1749     current_space->local_used      += nzi;
1750     current_space->local_remaining -= nzi;
1751     current_space_lvl->array           += nzi;
1752     current_space_lvl->local_used      += nzi;
1753     current_space_lvl->local_remaining -= nzi;
1754   }
1755 
1756   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1757   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1758 
1759   /* destroy list of free space and other temporary arrays */
1760   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1761 
1762   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1763   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1764 
1765   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1766   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1767   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1768 
1769 #if defined(PETSC_USE_INFO)
1770   {
1771     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1772     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1773     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1774     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1775     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1776     if (diagonal_fill) {
1777       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1778     }
1779   }
1780 #endif
1781 
1782   /* put together the new matrix */
1783   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1784   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1785   b = (Mat_SeqAIJ*)(fact)->data;
1786   b->free_a       = PETSC_TRUE;
1787   b->free_ij      = PETSC_TRUE;
1788   b->singlemalloc = PETSC_FALSE;
1789   ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1790   b->j          = bj;
1791   b->i          = bi;
1792   b->diag       = bdiag;
1793   b->ilen       = 0;
1794   b->imax       = 0;
1795   b->row        = isrow;
1796   b->col        = iscol;
1797   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1798   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1799   b->icol       = isicol;
1800   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1801   /* In b structure:  Free imax, ilen, old a, old j.
1802      Allocate bdiag, solve_work, new a, new j */
1803   ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1804   b->maxnz = b->nz = bi[2*n+1] ;
1805   (fact)->info.factor_mallocs    = reallocs;
1806   (fact)->info.fill_ratio_given  = f;
1807   (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]);
1808   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_newdatastruct;
1809   /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 
1814 /*
1815    ilu(0) with natural ordering under revised new data structure.
1816    Factored arrays bj and ba are stored as
1817      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1818 
1819    bi=fact->i is an array of size n+1, in which
1820    bi+
1821      bi[i]      ->  1st entry of L(i,:),i=0,...,i-1
1822      bi[n]      ->  points to L(n-1,:)+1
1823      bi[n+1]    ->  1st entry of U(n-1,:)
1824   bdiag=fact->diag is an array of size n+1,in which
1825      bdiag[0]   -> points to diagonal of U(n-1,:)
1826      bdiag[n-1] -> points to diagonal of U(0,:)
1827 
1828    U(i,:) contains bdiag[i] as its last entry, i.e.,
1829     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1830 */
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2"
1833 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1834 {
1835 
1836   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1837   PetscErrorCode     ierr;
1838   PetscInt           n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1839   PetscInt           i,j,nz,*bi,*bj,*bdiag,bi_temp;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1843   b    = (Mat_SeqAIJ*)(fact)->data;
1844 
1845   /* allocate matrix arrays for new data structure */
1846   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1847   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1848   b->singlemalloc = PETSC_TRUE;
1849   if (!b->diag){
1850     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1851     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1852   }
1853   bdiag = b->diag;
1854 
1855   if (n > 0) {
1856     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1857   }
1858 
1859   /* set bi and bj with new data structure */
1860   bi = b->i;
1861   bj = b->j;
1862 
1863   /* L part */
1864   bi[0] = 0;
1865   for (i=0; i<n; i++){
1866     nz = adiag[i] - ai[i];
1867     bi[i+1] = bi[i] + nz;
1868     aj = a->j + ai[i];
1869     for (j=0; j<nz; j++){
1870       *bj = aj[j]; bj++;
1871     }
1872   }
1873 
1874   /* U part */
1875   /* bi[n+1] = bi[n]; */
1876   bi_temp = bi[n];
1877   for (i=n-1; i>=0; i--){
1878     nz = ai[i+1] - adiag[i] - 1;
1879     /*  bi[2*n-i+1] = bi[2*n-i] + nz + 1; */
1880     bi_temp = bi_temp + nz + 1;
1881     aj = a->j + adiag[i] + 1;
1882     for (j=0; j<nz; j++){
1883       *bj = aj[j]; bj++;
1884     }
1885     /* diag[i] */
1886     *bj = i; bj++;
1887     /*   bdiag[i] = bi[2*n-i+1]-1; */
1888     bdiag[i] = bi_temp - 1;
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2"
1895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1896 {
1897   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1898   IS                 isicol;
1899   PetscErrorCode     ierr;
1900   const PetscInt     *r,*ic;
1901   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1902   PetscInt           *bi,*cols,nnz,*cols_lvl;
1903   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1904   PetscInt           i,levels,diagonal_fill;
1905   PetscTruth         col_identity,row_identity;
1906   PetscReal          f;
1907   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1908   PetscBT            lnkbt;
1909   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1910   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1911   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1912   PetscTruth         missing;
1913 
1914   PetscFunctionBegin;
1915   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1916   f             = info->fill;
1917   levels        = (PetscInt)info->levels;
1918   diagonal_fill = (PetscInt)info->diagonal_fill;
1919   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1920 
1921   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1922   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1923 
1924   if (!levels && row_identity && col_identity) {
1925     /* special case: ilu(0) with natural ordering */
1926     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1927     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_newdatastruct_v2;
1928 
1929     fact->factor = MAT_FACTOR_ILU;
1930     (fact)->info.factor_mallocs    = 0;
1931     (fact)->info.fill_ratio_given  = info->fill;
1932     (fact)->info.fill_ratio_needed = 1.0;
1933     b               = (Mat_SeqAIJ*)(fact)->data;
1934     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1935     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1936     b->row              = isrow;
1937     b->col              = iscol;
1938     b->icol             = isicol;
1939     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1940     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1941     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1942     /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */
1943     PetscFunctionReturn(0);
1944   }
1945 
1946   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1947   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1948 
1949   /* get new row pointers */
1950   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1951   bi[0] = 0;
1952   /* bdiag is location of diagonal in factor */
1953   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1954   bdiag[0]  = 0;
1955 
1956   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1957   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1958 
1959   /* create a linked list for storing column indices of the active row */
1960   nlnk = n + 1;
1961   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1962 
1963   /* initial FreeSpace size is f*(ai[n]+1) */
1964   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1965   current_space = free_space;
1966   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1967   current_space_lvl = free_space_lvl;
1968 
1969   for (i=0; i<n; i++) {
1970     nzi = 0;
1971     /* copy current row into linked list */
1972     nnz  = ai[r[i]+1] - ai[r[i]];
1973     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1974     cols = aj + ai[r[i]];
1975     lnk[i] = -1; /* marker to indicate if diagonal exists */
1976     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1977     nzi += nlnk;
1978 
1979     /* make sure diagonal entry is included */
1980     if (diagonal_fill && lnk[i] == -1) {
1981       fm = n;
1982       while (lnk[fm] < i) fm = lnk[fm];
1983       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1984       lnk[fm]    = i;
1985       lnk_lvl[i] = 0;
1986       nzi++; dcount++;
1987     }
1988 
1989     /* add pivot rows into the active row */
1990     nzbd = 0;
1991     prow = lnk[n];
1992     while (prow < i) {
1993       nnz      = bdiag[prow];
1994       cols     = bj_ptr[prow] + nnz + 1;
1995       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1996       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1997       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1998       nzi += nlnk;
1999       prow = lnk[prow];
2000       nzbd++;
2001     }
2002     bdiag[i] = nzbd;
2003     bi[i+1]  = bi[i] + nzi;
2004 
2005     /* if free space is not available, make more free space */
2006     if (current_space->local_remaining<nzi) {
2007       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
2008       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2009       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2010       reallocs++;
2011     }
2012 
2013     /* copy data into free_space and free_space_lvl, then initialize lnk */
2014     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2015     bj_ptr[i]    = current_space->array;
2016     bjlvl_ptr[i] = current_space_lvl->array;
2017 
2018     /* make sure the active row i has diagonal entry */
2019     if (*(bj_ptr[i]+bdiag[i]) != i) {
2020       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
2021     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2022     }
2023 
2024     current_space->array           += nzi;
2025     current_space->local_used      += nzi;
2026     current_space->local_remaining -= nzi;
2027     current_space_lvl->array           += nzi;
2028     current_space_lvl->local_used      += nzi;
2029     current_space_lvl->local_remaining -= nzi;
2030   }
2031 
2032   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2033   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2034 
2035   /* destroy list of free space and other temporary arrays */
2036   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2037 
2038   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
2039   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
2040 
2041   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2042   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2043   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
2044 
2045 #if defined(PETSC_USE_INFO)
2046   {
2047     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2048     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2049     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2050     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2051     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2052     if (diagonal_fill) {
2053       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2054     }
2055   }
2056 #endif
2057 
2058   /* put together the new matrix */
2059   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2060   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2061   b = (Mat_SeqAIJ*)(fact)->data;
2062   b->free_a       = PETSC_TRUE;
2063   b->free_ij      = PETSC_TRUE;
2064   b->singlemalloc = PETSC_FALSE;
2065   ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2066   b->j          = bj;
2067   b->i          = bi;
2068   b->diag       = bdiag;
2069   b->ilen       = 0;
2070   b->imax       = 0;
2071   b->row        = isrow;
2072   b->col        = iscol;
2073   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2074   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2075   b->icol       = isicol;
2076   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2077   /* In b structure:  Free imax, ilen, old a, old j.
2078      Allocate bdiag, solve_work, new a, new j */
2079   ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2080   b->maxnz = b->nz = bi[2*n+1] ;
2081   (fact)->info.factor_mallocs    = reallocs;
2082   (fact)->info.fill_ratio_given  = f;
2083   (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]);
2084   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_newdatastruct;
2085   /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
2091 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
2092 {
2093   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
2094   IS                 isicol;
2095   PetscErrorCode     ierr;
2096   const PetscInt     *r,*ic;
2097   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
2098   PetscInt           *bi,*cols,nnz,*cols_lvl;
2099   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
2100   PetscInt           i,levels,diagonal_fill;
2101   PetscTruth         col_identity,row_identity;
2102   PetscReal          f;
2103   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
2104   PetscBT            lnkbt;
2105   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
2106   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2107   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2108   PetscTruth         missing;
2109   PetscTruth         newdatastruct=PETSC_FALSE,newdatastruct_v2=PETSC_FALSE;
2110 
2111   PetscFunctionBegin;
2112   ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr);
2113   if (newdatastruct){
2114     ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr);
2115     PetscFunctionReturn(0);
2116   }
2117   ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr);
2118   if(newdatastruct_v2){
2119     ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2(fact,A,isrow,iscol,info);CHKERRQ(ierr);
2120     PetscFunctionReturn(0);
2121   }
2122 
2123   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2124   f             = info->fill;
2125   levels        = (PetscInt)info->levels;
2126   diagonal_fill = (PetscInt)info->diagonal_fill;
2127   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
2128 
2129   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2130   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
2131   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
2132     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
2133     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
2134 
2135     fact->factor = MAT_FACTOR_ILU;
2136     (fact)->info.factor_mallocs    = 0;
2137     (fact)->info.fill_ratio_given  = info->fill;
2138     (fact)->info.fill_ratio_needed = 1.0;
2139     b               = (Mat_SeqAIJ*)(fact)->data;
2140     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2141     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2142     b->row              = isrow;
2143     b->col              = iscol;
2144     b->icol             = isicol;
2145     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2146     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2147     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2148     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
2149     PetscFunctionReturn(0);
2150   }
2151 
2152   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2153   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
2154 
2155   /* get new row pointers */
2156   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
2157   bi[0] = 0;
2158   /* bdiag is location of diagonal in factor */
2159   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
2160   bdiag[0]  = 0;
2161 
2162   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
2163   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
2164 
2165   /* create a linked list for storing column indices of the active row */
2166   nlnk = n + 1;
2167   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2168 
2169   /* initial FreeSpace size is f*(ai[n]+1) */
2170   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
2171   current_space = free_space;
2172   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
2173   current_space_lvl = free_space_lvl;
2174 
2175   for (i=0; i<n; i++) {
2176     nzi = 0;
2177     /* copy current row into linked list */
2178     nnz  = ai[r[i]+1] - ai[r[i]];
2179     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
2180     cols = aj + ai[r[i]];
2181     lnk[i] = -1; /* marker to indicate if diagonal exists */
2182     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2183     nzi += nlnk;
2184 
2185     /* make sure diagonal entry is included */
2186     if (diagonal_fill && lnk[i] == -1) {
2187       fm = n;
2188       while (lnk[fm] < i) fm = lnk[fm];
2189       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
2190       lnk[fm]    = i;
2191       lnk_lvl[i] = 0;
2192       nzi++; dcount++;
2193     }
2194 
2195     /* add pivot rows into the active row */
2196     nzbd = 0;
2197     prow = lnk[n];
2198     while (prow < i) {
2199       nnz      = bdiag[prow];
2200       cols     = bj_ptr[prow] + nnz + 1;
2201       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2202       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
2203       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
2204       nzi += nlnk;
2205       prow = lnk[prow];
2206       nzbd++;
2207     }
2208     bdiag[i] = nzbd;
2209     bi[i+1]  = bi[i] + nzi;
2210 
2211     /* if free space is not available, make more free space */
2212     if (current_space->local_remaining<nzi) {
2213       nnz = nzi*(n - i); /* estimated and max additional space needed */
2214       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2215       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2216       reallocs++;
2217     }
2218 
2219     /* copy data into free_space and free_space_lvl, then initialize lnk */
2220     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2221     bj_ptr[i]    = current_space->array;
2222     bjlvl_ptr[i] = current_space_lvl->array;
2223 
2224     /* make sure the active row i has diagonal entry */
2225     if (*(bj_ptr[i]+bdiag[i]) != i) {
2226       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
2227     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2228     }
2229 
2230     current_space->array           += nzi;
2231     current_space->local_used      += nzi;
2232     current_space->local_remaining -= nzi;
2233     current_space_lvl->array           += nzi;
2234     current_space_lvl->local_used      += nzi;
2235     current_space_lvl->local_remaining -= nzi;
2236   }
2237 
2238   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2239   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2240 
2241   /* destroy list of free space and other temporary arrays */
2242   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2243   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2244   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2245   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2246   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
2247 
2248 #if defined(PETSC_USE_INFO)
2249   {
2250     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2251     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2252     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2253     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2254     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2255     if (diagonal_fill) {
2256       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2257     }
2258   }
2259 #endif
2260 
2261   /* put together the new matrix */
2262   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2263   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2264   b = (Mat_SeqAIJ*)(fact)->data;
2265   b->free_a       = PETSC_TRUE;
2266   b->free_ij      = PETSC_TRUE;
2267   b->singlemalloc = PETSC_FALSE;
2268   ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2269   b->j          = bj;
2270   b->i          = bi;
2271   for (i=0; i<n; i++) bdiag[i] += bi[i];
2272   b->diag       = bdiag;
2273   b->ilen       = 0;
2274   b->imax       = 0;
2275   b->row        = isrow;
2276   b->col        = iscol;
2277   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2278   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2279   b->icol       = isicol;
2280   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2281   /* In b structure:  Free imax, ilen, old a, old j.
2282      Allocate bdiag, solve_work, new a, new j */
2283   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2284   b->maxnz             = b->nz = bi[n] ;
2285   (fact)->info.factor_mallocs    = reallocs;
2286   (fact)->info.fill_ratio_given  = f;
2287   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2288   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
2289   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_newdatastruct"
2295 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info)
2296 {
2297   Mat            C = B;
2298   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2299   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2300   IS             ip=b->row,iip = b->icol;
2301   PetscErrorCode ierr;
2302   const PetscInt *rip,*riip;
2303   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2304   PetscInt       *ai=a->i,*aj=a->j;
2305   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2306   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2307   PetscReal      zeropivot,shiftnz;
2308   PetscReal      shiftpd;
2309   PetscTruth     perm_identity;
2310 
2311   PetscFunctionBegin;
2312 
2313   shiftnz   = info->shiftnz;
2314   shiftpd   = info->shiftpd;
2315   zeropivot = info->zeropivot;
2316 
2317   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2318   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2319 
2320   /* allocate working arrays
2321      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2322      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2323   */
2324   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
2325   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
2326   c2r  = il + mbs;
2327   rtmp = (MatScalar*)(c2r + mbs);
2328 
2329   for (i=0; i<mbs; i++) {
2330     c2r[i] = mbs;
2331   }
2332   il[0] = 0;
2333 
2334   for (k = 0; k<mbs; k++){
2335     /* zero rtmp */
2336     nz = bi[k+1] - bi[k];
2337     bjtmp = bj + bi[k];
2338     for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2339 
2340     /* load in initial unfactored row */
2341     bval = ba + bi[k];
2342     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2343     for (j = jmin; j < jmax; j++){
2344       col = riip[aj[j]];
2345       if (col >= k){ /* only take upper triangular entry */
2346         rtmp[col] = aa[j];
2347         *bval++   = 0.0; /* for in-place factorization */
2348       }
2349     }
2350     /* shift the diagonal of the matrix */
2351 
2352     /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2353     dk = rtmp[k];
2354     i  = c2r[k]; /* first row to be added to k_th row  */
2355 
2356     while (i < k){
2357       nexti = c2r[i]; /* next row to be added to k_th row */
2358 
2359       /* compute multiplier, update diag(k) and U(i,k) */
2360       ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2361       uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2362       dk   += uikdi*ba[ili]; /* update diag[k] */
2363       ba[ili] = uikdi; /* -U(i,k) */
2364 
2365       /* add multiple of row i to k-th row */
2366       jmin = ili + 1; jmax = bi[i+1];
2367       if (jmin < jmax){
2368         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2369         /* update il and c2r for row i */
2370         il[i] = jmin;
2371         j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2372       }
2373       i = nexti;
2374     }
2375 
2376     /* copy data into U(k,:) */
2377     ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2378     jmin = bi[k]; jmax = bi[k+1]-1;
2379     if (jmin < jmax) {
2380       for (j=jmin; j<jmax; j++){
2381         col = bj[j]; ba[j] = rtmp[col];
2382       }
2383       /* add the k-th row into il and c2r */
2384       il[k] = jmin;
2385       i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2386     }
2387   }
2388 
2389   ierr = PetscFree(il);CHKERRQ(ierr);
2390   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2391   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2392 
2393   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2394   if (perm_identity){
2395     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct;
2396     (B)->ops->solvetranspose  = 0;
2397     (B)->ops->forwardsolve    = 0;
2398     (B)->ops->backwardsolve   = 0;
2399   } else {
2400     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_newdatastruct;
2401     (B)->ops->solvetranspose  = 0;
2402     (B)->ops->forwardsolve    = 0;
2403     (B)->ops->backwardsolve   = 0;
2404   }
2405 
2406   C->assembled    = PETSC_TRUE;
2407   C->preallocated = PETSC_TRUE;
2408   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2414 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2415 {
2416   Mat            C = B;
2417   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2418   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2419   IS             ip=b->row,iip = b->icol;
2420   PetscErrorCode ierr;
2421   const PetscInt *rip,*riip;
2422   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
2423   PetscInt       *ai=a->i,*aj=a->j;
2424   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2425   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2426   PetscReal      zeropivot,rs,shiftnz;
2427   PetscReal      shiftpd;
2428   ChShift_Ctx    sctx;
2429   PetscInt       newshift;
2430   PetscTruth     perm_identity;
2431 
2432   PetscFunctionBegin;
2433   shiftnz   = info->shiftnz;
2434   shiftpd   = info->shiftpd;
2435   zeropivot = info->zeropivot;
2436 
2437   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2438   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2439 
2440   /* initialization */
2441   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
2442   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
2443   jl   = il + mbs;
2444   rtmp = (MatScalar*)(jl + mbs);
2445 
2446   sctx.shift_amount = 0;
2447   sctx.nshift       = 0;
2448   do {
2449     sctx.chshift = PETSC_FALSE;
2450     for (i=0; i<mbs; i++) {
2451       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
2452     }
2453 
2454     for (k = 0; k<mbs; k++){
2455       bval = ba + bi[k];
2456       /* initialize k-th row by the perm[k]-th row of A */
2457       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2458       for (j = jmin; j < jmax; j++){
2459         col = riip[aj[j]];
2460         if (col >= k){ /* only take upper triangular entry */
2461           rtmp[col] = aa[j];
2462           *bval++  = 0.0; /* for in-place factorization */
2463         }
2464       }
2465       /* shift the diagonal of the matrix */
2466       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2467 
2468       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2469       dk = rtmp[k];
2470       i = jl[k]; /* first row to be added to k_th row  */
2471 
2472       while (i < k){
2473         nexti = jl[i]; /* next row to be added to k_th row */
2474 
2475         /* compute multiplier, update diag(k) and U(i,k) */
2476         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2477         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2478         dk += uikdi*ba[ili];
2479         ba[ili] = uikdi; /* -U(i,k) */
2480 
2481         /* add multiple of row i to k-th row */
2482         jmin = ili + 1; jmax = bi[i+1];
2483         if (jmin < jmax){
2484           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2485           /* update il and jl for row i */
2486           il[i] = jmin;
2487           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2488         }
2489         i = nexti;
2490       }
2491 
2492       /* shift the diagonals when zero pivot is detected */
2493       /* compute rs=sum of abs(off-diagonal) */
2494       rs   = 0.0;
2495       jmin = bi[k]+1;
2496       nz   = bi[k+1] - jmin;
2497       bcol = bj + jmin;
2498       for (j=0; j<nz; j++) {
2499         rs += PetscAbsScalar(rtmp[bcol[j]]);
2500       }
2501 
2502       sctx.rs = rs;
2503       sctx.pv = dk;
2504       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
2505 
2506       if (newshift == 1) {
2507         if (!sctx.shift_amount) {
2508           sctx.shift_amount = 1e-5;
2509         }
2510         break;
2511       }
2512 
2513       /* copy data into U(k,:) */
2514       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2515       jmin = bi[k]+1; jmax = bi[k+1];
2516       if (jmin < jmax) {
2517         for (j=jmin; j<jmax; j++){
2518           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
2519         }
2520         /* add the k-th row into il and jl */
2521         il[k] = jmin;
2522         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2523       }
2524     }
2525   } while (sctx.chshift);
2526   ierr = PetscFree(il);CHKERRQ(ierr);
2527 
2528   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2529   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2530 
2531   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2532   if (perm_identity){
2533     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2534     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2535     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2536     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2537   } else {
2538     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
2539     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2540     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2541     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2542   }
2543 
2544   C->assembled    = PETSC_TRUE;
2545   C->preallocated = PETSC_TRUE;
2546   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2547   if (sctx.nshift){
2548     if (shiftnz) {
2549       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2550     } else if (shiftpd) {
2551       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2552     }
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_newdatastruct"
2559 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2560 {
2561   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2562   Mat_SeqSBAIJ       *b;
2563   PetscErrorCode     ierr;
2564   PetscTruth         perm_identity,missing;
2565   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2566   const PetscInt     *rip,*riip;
2567   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2568   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2569   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2570   PetscReal          fill=info->fill,levels=info->levels;
2571   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2572   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2573   PetscBT            lnkbt;
2574   IS                 iperm;
2575 
2576   PetscFunctionBegin;
2577   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2578   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2579   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2580   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2581   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2582 
2583   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2584   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2585   ui[0] = 0;
2586 
2587   /* ICC(0) without matrix ordering: simply copies fill pattern */
2588   if (!levels && perm_identity) {
2589 
2590     for (i=0; i<am; i++) {
2591       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2592       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2593     }
2594     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2595     cols = uj;
2596     for (i=0; i<am; i++) {
2597       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2598       ncols = ui[i+1] - ui[i] - 1;
2599       for (j=0; j<ncols; j++) *cols++ = aj[j];
2600       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2601     }
2602   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2603 
2604     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2605     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2606 
2607     /* initialization */
2608     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2609 
2610     /* jl: linked list for storing indices of the pivot rows
2611        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2612     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
2613     il         = jl + am;
2614     uj_ptr     = (PetscInt**)(il + am);
2615     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2616     for (i=0; i<am; i++){
2617       jl[i] = am; il[i] = 0;
2618     }
2619 
2620     /* create and initialize a linked list for storing column indices of the active row k */
2621     nlnk = am + 1;
2622     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2623 
2624     /* initial FreeSpace size is fill*(ai[am]+1) */
2625     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2626     current_space = free_space;
2627     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2628     current_space_lvl = free_space_lvl;
2629 
2630     for (k=0; k<am; k++){  /* for each active row k */
2631       /* initialize lnk by the column indices of row rip[k] of A */
2632       nzk   = 0;
2633       ncols = ai[rip[k]+1] - ai[rip[k]];
2634       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2635       ncols_upper = 0;
2636       for (j=0; j<ncols; j++){
2637         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2638         if (riip[i] >= k){ /* only take upper triangular entry */
2639           ajtmp[ncols_upper] = i;
2640           ncols_upper++;
2641         }
2642       }
2643       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2644       nzk += nlnk;
2645 
2646       /* update lnk by computing fill-in for each pivot row to be merged in */
2647       prow = jl[k]; /* 1st pivot row */
2648 
2649       while (prow < k){
2650         nextprow = jl[prow];
2651 
2652         /* merge prow into k-th row */
2653         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2654         jmax = ui[prow+1];
2655         ncols = jmax-jmin;
2656         i     = jmin - ui[prow];
2657         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2658         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2659         j     = *(uj - 1);
2660         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2661         nzk += nlnk;
2662 
2663         /* update il and jl for prow */
2664         if (jmin < jmax){
2665           il[prow] = jmin;
2666           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2667         }
2668         prow = nextprow;
2669       }
2670 
2671       /* if free space is not available, make more free space */
2672       if (current_space->local_remaining<nzk) {
2673         i = am - k + 1; /* num of unfactored rows */
2674         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2675         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2676         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2677         reallocs++;
2678       }
2679 
2680       /* copy data into free_space and free_space_lvl, then initialize lnk */
2681       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2682       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2683 
2684       /* add the k-th row into il and jl */
2685       if (nzk > 1){
2686         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2687         jl[k] = jl[i]; jl[i] = k;
2688         il[k] = ui[k] + 1;
2689       }
2690       uj_ptr[k]     = current_space->array;
2691       uj_lvl_ptr[k] = current_space_lvl->array;
2692 
2693       current_space->array           += nzk;
2694       current_space->local_used      += nzk;
2695       current_space->local_remaining -= nzk;
2696 
2697       current_space_lvl->array           += nzk;
2698       current_space_lvl->local_used      += nzk;
2699       current_space_lvl->local_remaining -= nzk;
2700 
2701       ui[k+1] = ui[k] + nzk;
2702     }
2703 
2704 #if defined(PETSC_USE_INFO)
2705     if (ai[am] != 0) {
2706       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2707       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2708       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2709       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2710     } else {
2711       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2712     }
2713 #endif
2714 
2715     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2716     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2717     ierr = PetscFree(jl);CHKERRQ(ierr);
2718     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2719 
2720     /* destroy list of free space and other temporary array(s) */
2721     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2722     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr);
2723     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2724     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2725 
2726   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2727 
2728   /* put together the new matrix in MATSEQSBAIJ format */
2729 
2730   b    = (Mat_SeqSBAIJ*)(fact)->data;
2731   b->singlemalloc = PETSC_FALSE;
2732   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2733   b->j    = uj;
2734   b->i    = ui;
2735   b->diag = udiag;
2736   b->free_diag = PETSC_TRUE;
2737   b->ilen = 0;
2738   b->imax = 0;
2739   b->row  = perm;
2740   b->col  = perm;
2741   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2742   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2743   b->icol = iperm;
2744   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2745   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2746   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2747   b->maxnz   = b->nz = ui[am];
2748   b->free_a  = PETSC_TRUE;
2749   b->free_ij = PETSC_TRUE;
2750 
2751   (fact)->info.factor_mallocs    = reallocs;
2752   (fact)->info.fill_ratio_given  = fill;
2753   if (ai[am] != 0) {
2754     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2755   } else {
2756     (fact)->info.fill_ratio_needed = 0.0;
2757   }
2758   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_newdatastruct;
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2764 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2765 {
2766   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2767   Mat_SeqSBAIJ       *b;
2768   PetscErrorCode     ierr;
2769   PetscTruth         perm_identity,missing;
2770   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2771   const PetscInt     *rip,*riip;
2772   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2773   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2774   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2775   PetscReal          fill=info->fill,levels=info->levels;
2776   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2777   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2778   PetscBT            lnkbt;
2779   IS                 iperm;
2780   PetscTruth         newdatastruct=PETSC_FALSE;
2781 
2782   PetscFunctionBegin;
2783   ierr = PetscOptionsGetTruth(PETSC_NULL,"-icc_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr);
2784   if(newdatastruct){
2785     ierr = MatICCFactorSymbolic_SeqAIJ_newdatastruct(fact,A,perm,info);CHKERRQ(ierr);
2786     PetscFunctionReturn(0);
2787   }
2788 
2789   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2790   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2791   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2792   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2793   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2794 
2795   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2796   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2797   ui[0] = 0;
2798 
2799   /* ICC(0) without matrix ordering: simply copies fill pattern */
2800   if (!levels && perm_identity) {
2801 
2802     for (i=0; i<am; i++) {
2803       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2804       udiag[i] = ui[i];
2805     }
2806     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2807     cols = uj;
2808     for (i=0; i<am; i++) {
2809       aj    = a->j + a->diag[i];
2810       ncols = ui[i+1] - ui[i];
2811       for (j=0; j<ncols; j++) *cols++ = *aj++;
2812     }
2813   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2814     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2815     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2816 
2817     /* initialization */
2818     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2819 
2820     /* jl: linked list for storing indices of the pivot rows
2821        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2822     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
2823     il         = jl + am;
2824     uj_ptr     = (PetscInt**)(il + am);
2825     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2826     for (i=0; i<am; i++){
2827       jl[i] = am; il[i] = 0;
2828     }
2829 
2830     /* create and initialize a linked list for storing column indices of the active row k */
2831     nlnk = am + 1;
2832     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2833 
2834     /* initial FreeSpace size is fill*(ai[am]+1) */
2835     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2836     current_space = free_space;
2837     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2838     current_space_lvl = free_space_lvl;
2839 
2840     for (k=0; k<am; k++){  /* for each active row k */
2841       /* initialize lnk by the column indices of row rip[k] of A */
2842       nzk   = 0;
2843       ncols = ai[rip[k]+1] - ai[rip[k]];
2844       if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2845       ncols_upper = 0;
2846       for (j=0; j<ncols; j++){
2847         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2848         if (riip[i] >= k){ /* only take upper triangular entry */
2849           ajtmp[ncols_upper] = i;
2850           ncols_upper++;
2851         }
2852       }
2853       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2854       nzk += nlnk;
2855 
2856       /* update lnk by computing fill-in for each pivot row to be merged in */
2857       prow = jl[k]; /* 1st pivot row */
2858 
2859       while (prow < k){
2860         nextprow = jl[prow];
2861 
2862         /* merge prow into k-th row */
2863         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2864         jmax = ui[prow+1];
2865         ncols = jmax-jmin;
2866         i     = jmin - ui[prow];
2867         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2868         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2869         j     = *(uj - 1);
2870         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2871         nzk += nlnk;
2872 
2873         /* update il and jl for prow */
2874         if (jmin < jmax){
2875           il[prow] = jmin;
2876           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2877         }
2878         prow = nextprow;
2879       }
2880 
2881       /* if free space is not available, make more free space */
2882       if (current_space->local_remaining<nzk) {
2883         i = am - k + 1; /* num of unfactored rows */
2884         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2885         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2886         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2887         reallocs++;
2888       }
2889 
2890       /* copy data into free_space and free_space_lvl, then initialize lnk */
2891       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2892       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2893 
2894       /* add the k-th row into il and jl */
2895       if (nzk > 1){
2896         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2897         jl[k] = jl[i]; jl[i] = k;
2898         il[k] = ui[k] + 1;
2899       }
2900       uj_ptr[k]     = current_space->array;
2901       uj_lvl_ptr[k] = current_space_lvl->array;
2902 
2903       current_space->array           += nzk;
2904       current_space->local_used      += nzk;
2905       current_space->local_remaining -= nzk;
2906 
2907       current_space_lvl->array           += nzk;
2908       current_space_lvl->local_used      += nzk;
2909       current_space_lvl->local_remaining -= nzk;
2910 
2911       ui[k+1] = ui[k] + nzk;
2912     }
2913 
2914 #if defined(PETSC_USE_INFO)
2915     if (ai[am] != 0) {
2916       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2917       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2918       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2919       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2920     } else {
2921       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2922     }
2923 #endif
2924 
2925     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2926     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2927     ierr = PetscFree(jl);CHKERRQ(ierr);
2928     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2929 
2930     /* destroy list of free space and other temporary array(s) */
2931     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2932     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2933     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2934     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2935 
2936   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2937 
2938   /* put together the new matrix in MATSEQSBAIJ format */
2939 
2940   b    = (Mat_SeqSBAIJ*)(fact)->data;
2941   b->singlemalloc = PETSC_FALSE;
2942   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2943   b->j    = uj;
2944   b->i    = ui;
2945   b->diag = udiag;
2946   b->free_diag = PETSC_TRUE;
2947   b->ilen = 0;
2948   b->imax = 0;
2949   b->row  = perm;
2950   b->col  = perm;
2951   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2952   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2953   b->icol = iperm;
2954   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2955   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2956   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2957   b->maxnz   = b->nz = ui[am];
2958   b->free_a  = PETSC_TRUE;
2959   b->free_ij = PETSC_TRUE;
2960 
2961   (fact)->info.factor_mallocs    = reallocs;
2962   (fact)->info.fill_ratio_given  = fill;
2963   if (ai[am] != 0) {
2964     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2965   } else {
2966     (fact)->info.fill_ratio_needed = 0.0;
2967   }
2968   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2974 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2975 {
2976   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2977   Mat_SeqSBAIJ       *b;
2978   PetscErrorCode     ierr;
2979   PetscTruth         perm_identity;
2980   PetscReal          fill = info->fill;
2981   const PetscInt     *rip,*riip;
2982   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2983   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2984   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2985   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2986   PetscBT            lnkbt;
2987   IS                 iperm;
2988 
2989   PetscFunctionBegin;
2990   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2991   /* check whether perm is the identity mapping */
2992   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2993   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2994   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2995   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2996 
2997   /* initialization */
2998   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2999   ui[0] = 0;
3000 
3001   /* jl: linked list for storing indices of the pivot rows
3002      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3003   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
3004   il     = jl + am;
3005   cols   = il + am;
3006   ui_ptr = (PetscInt**)(cols + am);
3007   for (i=0; i<am; i++){
3008     jl[i] = am; il[i] = 0;
3009   }
3010 
3011   /* create and initialize a linked list for storing column indices of the active row k */
3012   nlnk = am + 1;
3013   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3014 
3015   /* initial FreeSpace size is fill*(ai[am]+1) */
3016   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3017   current_space = free_space;
3018 
3019   for (k=0; k<am; k++){  /* for each active row k */
3020     /* initialize lnk by the column indices of row rip[k] of A */
3021     nzk   = 0;
3022     ncols = ai[rip[k]+1] - ai[rip[k]];
3023     if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3024     ncols_upper = 0;
3025     for (j=0; j<ncols; j++){
3026       i = riip[*(aj + ai[rip[k]] + j)];
3027       if (i >= k){ /* only take upper triangular entry */
3028         cols[ncols_upper] = i;
3029         ncols_upper++;
3030       }
3031     }
3032     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3033     nzk += nlnk;
3034 
3035     /* update lnk by computing fill-in for each pivot row to be merged in */
3036     prow = jl[k]; /* 1st pivot row */
3037 
3038     while (prow < k){
3039       nextprow = jl[prow];
3040       /* merge prow into k-th row */
3041       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3042       jmax = ui[prow+1];
3043       ncols = jmax-jmin;
3044       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3045       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3046       nzk += nlnk;
3047 
3048       /* update il and jl for prow */
3049       if (jmin < jmax){
3050         il[prow] = jmin;
3051         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3052       }
3053       prow = nextprow;
3054     }
3055 
3056     /* if free space is not available, make more free space */
3057     if (current_space->local_remaining<nzk) {
3058       i = am - k + 1; /* num of unfactored rows */
3059       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3060       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3061       reallocs++;
3062     }
3063 
3064     /* copy data into free space, then initialize lnk */
3065     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3066 
3067     /* add the k-th row into il and jl */
3068     if (nzk-1 > 0){
3069       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3070       jl[k] = jl[i]; jl[i] = k;
3071       il[k] = ui[k] + 1;
3072     }
3073     ui_ptr[k] = current_space->array;
3074     current_space->array           += nzk;
3075     current_space->local_used      += nzk;
3076     current_space->local_remaining -= nzk;
3077 
3078     ui[k+1] = ui[k] + nzk;
3079   }
3080 
3081 #if defined(PETSC_USE_INFO)
3082   if (ai[am] != 0) {
3083     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3084     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3085     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3086     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3087   } else {
3088      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3089   }
3090 #endif
3091 
3092   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3093   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3094   ierr = PetscFree(jl);CHKERRQ(ierr);
3095 
3096   /* destroy list of free space and other temporary array(s) */
3097   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3098   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3099   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3100 
3101   /* put together the new matrix in MATSEQSBAIJ format */
3102 
3103   b = (Mat_SeqSBAIJ*)(fact)->data;
3104   b->singlemalloc = PETSC_FALSE;
3105   b->free_a       = PETSC_TRUE;
3106   b->free_ij      = PETSC_TRUE;
3107   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3108   b->j    = uj;
3109   b->i    = ui;
3110   b->diag = 0;
3111   b->ilen = 0;
3112   b->imax = 0;
3113   b->row  = perm;
3114   b->col  = perm;
3115   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3116   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3117   b->icol = iperm;
3118   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3119   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3120   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3121   b->maxnz = b->nz = ui[am];
3122 
3123   (fact)->info.factor_mallocs    = reallocs;
3124   (fact)->info.fill_ratio_given  = fill;
3125   if (ai[am] != 0) {
3126     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3127   } else {
3128     (fact)->info.fill_ratio_needed = 0.0;
3129   }
3130   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct"
3136 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx)
3137 {
3138   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3139   PetscErrorCode    ierr;
3140   PetscInt          n = A->rmap->n;
3141   const PetscInt    *ai = a->i,*aj = a->j,*vi;
3142   PetscScalar       *x,sum;
3143   const PetscScalar *b;
3144   const MatScalar   *aa = a->a,*v;
3145   PetscInt          i,nz;
3146 
3147   PetscFunctionBegin;
3148   if (!n) PetscFunctionReturn(0);
3149 
3150   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
3151   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3152 
3153   /* forward solve the lower triangular */
3154   x[0] = b[0];
3155   v    = aa;
3156   vi   = aj;
3157   for (i=1; i<n; i++) {
3158     nz  = ai[i+1] - ai[i];
3159     sum = b[i];
3160     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3161     v  += nz;
3162     vi += nz;
3163     x[i] = sum;
3164   }
3165 
3166   /* backward solve the upper triangular */
3167   v   = aa + ai[n+1];
3168   vi  = aj + ai[n+1];
3169   for (i=n-1; i>=0; i--){
3170     nz = ai[2*n-i +1] - ai[2*n-i]-1;
3171     sum = x[i];
3172     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3173     v   += nz;
3174     vi  += nz; vi++;
3175     x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */
3176   }
3177 
3178   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3179   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
3180   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2"
3186 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2(Mat A,Vec bb,Vec xx)
3187 {
3188   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3189   PetscErrorCode    ierr;
3190   PetscInt          n = A->rmap->n;
3191   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3192   PetscScalar       *x,sum;
3193   const PetscScalar *b;
3194   const MatScalar   *aa = a->a,*v;
3195   PetscInt          i,nz;
3196 
3197   PetscFunctionBegin;
3198   if (!n) PetscFunctionReturn(0);
3199 
3200   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
3201   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3202 
3203   /* forward solve the lower triangular */
3204   x[0] = b[0];
3205   v    = aa;
3206   vi   = aj;
3207   for (i=1; i<n; i++) {
3208     nz  = ai[i+1] - ai[i];
3209     sum = b[i];
3210     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3211     v  += nz;
3212     vi += nz;
3213     x[i] = sum;
3214   }
3215 
3216   /* backward solve the upper triangular */
3217   /*  v   = aa + ai[n+1];
3218       vi  = aj + ai[n+1]; */
3219   v  = aa + adiag[n-1];
3220   vi = aj + adiag[n-1];
3221   x[n-1] = *v++ *x[n-1];
3222   vi++;
3223   for (i=n-2; i>=0; i--){
3224     nz = adiag[i] - adiag[i+1]-1;
3225     sum = x[i];
3226     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3227     v   += nz;
3228     vi  += nz; vi++;
3229     x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */
3230   }
3231 
3232   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3233   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
3234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 #undef __FUNCT__
3239 #define __FUNCT__ "MatSolve_SeqAIJ_newdatastruct"
3240 PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec xx)
3241 {
3242   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3243   IS                iscol = a->col,isrow = a->row;
3244   PetscErrorCode    ierr;
3245   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,nz,k;
3246   const PetscInt    *rout,*cout,*r,*c;
3247   PetscScalar       *x,*tmp,*tmps,sum;
3248   const PetscScalar *b;
3249   const MatScalar   *aa = a->a,*v;
3250 
3251   PetscFunctionBegin;
3252   if (!n) PetscFunctionReturn(0);
3253 
3254   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
3255   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3256   tmp  = a->solve_work;
3257 
3258   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3259   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3260 
3261   /* forward solve the lower triangular */
3262   tmp[0] = b[*r++];
3263   tmps   = tmp;
3264   v      = aa;
3265   vi     = aj;
3266   for (i=1; i<n; i++) {
3267     nz  = ai[i+1] - ai[i];
3268     sum = b[*r++];
3269     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
3270     tmp[i] = sum;
3271     v += nz; vi += nz;
3272   }
3273 
3274   /* backward solve the upper triangular */
3275   k  = n+1;
3276   v  = aa + ai[k]; /* 1st entry of U(n-1,:) */
3277   vi = aj + ai[k];
3278   for (i=n-1; i>=0; i--){
3279     k  = 2*n-i;
3280     nz = ai[k +1] - ai[k] - 1;
3281     sum = tmp[i];
3282     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
3283     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3284     v += nz+1; vi += nz+1;
3285   }
3286 
3287   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3288   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
3290   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3291   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 #undef __FUNCT__
3296 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3297 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3298 {
3299   Mat                B = *fact;
3300   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3301   IS                 isicol;
3302   PetscErrorCode     ierr;
3303   const PetscInt     *r,*ic;
3304   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3305   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3306   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3307   PetscInt           nlnk,*lnk;
3308   PetscBT            lnkbt;
3309   PetscTruth         row_identity,icol_identity,both_identity;
3310   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3311   const PetscInt     *ics;
3312   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3313   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftinblocks;
3314   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3315   PetscTruth         missing;
3316 
3317   PetscFunctionBegin;
3318 
3319   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3320   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3321   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3322 
3323   /* ------- symbolic factorization, can be reused ---------*/
3324   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3325   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3326   adiag=a->diag;
3327 
3328   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3329 
3330   /* bdiag is location of diagonal in factor */
3331   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
3332   bdiag_rev = bdiag + n+1;
3333 
3334   /* allocate row pointers bi */
3335   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3336 
3337   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3338   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3339   nnz_max  = ai[n]+2*n*dtcount+2;
3340 
3341   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3342   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3343 
3344   /* put together the new matrix */
3345   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3346   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3347   b    = (Mat_SeqAIJ*)(B)->data;
3348   b->free_a       = PETSC_TRUE;
3349   b->free_ij      = PETSC_TRUE;
3350   b->singlemalloc = PETSC_FALSE;
3351   b->a          = ba;
3352   b->j          = bj;
3353   b->i          = bi;
3354   b->diag       = bdiag;
3355   b->ilen       = 0;
3356   b->imax       = 0;
3357   b->row        = isrow;
3358   b->col        = iscol;
3359   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3360   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3361   b->icol       = isicol;
3362   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3363 
3364   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3365   b->maxnz = nnz_max;
3366 
3367   (B)->factor                = MAT_FACTOR_ILUDT;
3368   (B)->info.factor_mallocs   = 0;
3369   (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3370   CHKMEMQ;
3371   /* ------- end of symbolic factorization ---------*/
3372 
3373   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3374   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3375   ics  = ic;
3376 
3377   /* linked list for storing column indices of the active row */
3378   nlnk = n + 1;
3379   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3380 
3381   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3382   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
3383   jtmp = im + n;
3384   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3385   ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3386   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3387   vtmp = rtmp + n;
3388 
3389   bi[0]    = 0;
3390   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3391   bdiag_rev[n] = bdiag[0];
3392   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3393   for (i=0; i<n; i++) {
3394     /* copy initial fill into linked list */
3395     nzi = 0; /* nonzeros for active row i */
3396     nzi = ai[r[i]+1] - ai[r[i]];
3397     if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3398     nzi_al = adiag[r[i]] - ai[r[i]];
3399     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3400     ajtmp = aj + ai[r[i]];
3401     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3402 
3403     /* load in initial (unfactored row) */
3404     aatmp = a->a + ai[r[i]];
3405     for (j=0; j<nzi; j++) {
3406       rtmp[ics[*ajtmp++]] = *aatmp++;
3407     }
3408 
3409     /* add pivot rows into linked list */
3410     row = lnk[n];
3411     while (row < i ) {
3412       nzi_bl = bi[row+1] - bi[row] + 1;
3413       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3414       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3415       nzi  += nlnk;
3416       row   = lnk[row];
3417     }
3418 
3419     /* copy data from lnk into jtmp, then initialize lnk */
3420     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3421 
3422     /* numerical factorization */
3423     bjtmp = jtmp;
3424     row   = *bjtmp++; /* 1st pivot row */
3425     while  ( row < i ) {
3426       pc         = rtmp + row;
3427       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3428       multiplier = (*pc) * (*pv);
3429       *pc        = multiplier;
3430       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3431         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3432         pv         = ba + bdiag[row+1] + 1;
3433         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3434         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3435         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3436         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
3437       }
3438       row = *bjtmp++;
3439     }
3440 
3441     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3442     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3443     nzi_bl = 0; j = 0;
3444     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3445       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3446       nzi_bl++; j++;
3447     }
3448     nzi_bu = nzi - nzi_bl -1;
3449     while (j < nzi){
3450       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3451       j++;
3452     }
3453 
3454     bjtmp = bj + bi[i];
3455     batmp = ba + bi[i];
3456     /* apply level dropping rule to L part */
3457     ncut = nzi_al + dtcount;
3458     if (ncut < nzi_bl){
3459       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3460       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3461     } else {
3462       ncut = nzi_bl;
3463     }
3464     for (j=0; j<ncut; j++){
3465       bjtmp[j] = jtmp[j];
3466       batmp[j] = vtmp[j];
3467       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3468     }
3469     bi[i+1] = bi[i] + ncut;
3470     nzi = ncut + 1;
3471 
3472     /* apply level dropping rule to U part */
3473     ncut = nzi_au + dtcount;
3474     if (ncut < nzi_bu){
3475       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3476       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3477     } else {
3478       ncut = nzi_bu;
3479     }
3480     nzi += ncut;
3481 
3482     /* mark bdiagonal */
3483     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3484     bdiag_rev[n-i-1] = bdiag[i+1];
3485     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3486     bjtmp = bj + bdiag[i];
3487     batmp = ba + bdiag[i];
3488     *bjtmp = i;
3489     *batmp = diag_tmp; /* rtmp[i]; */
3490     if (*batmp == 0.0) {
3491       *batmp = dt+shift;
3492       /* printf(" row %d add shift %g\n",i,shift); */
3493     }
3494     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3495     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3496 
3497     bjtmp = bj + bdiag[i+1]+1;
3498     batmp = ba + bdiag[i+1]+1;
3499     for (k=0; k<ncut; k++){
3500       bjtmp[k] = jtmp[nzi_bl+1+k];
3501       batmp[k] = vtmp[nzi_bl+1+k];
3502       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3503     }
3504     /* printf("\n"); */
3505 
3506     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3507     /*
3508     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3509     printf(" ----------------------------\n");
3510     */
3511   } /* for (i=0; i<n; i++) */
3512   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3513   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3514 
3515   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3516   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3517 
3518   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3519   ierr = PetscFree(im);CHKERRQ(ierr);
3520   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3521 
3522   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3523   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3524 
3525   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3526   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3527   both_identity = (PetscTruth) (row_identity && icol_identity);
3528   if (row_identity && icol_identity) {
3529     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct;
3530   } else {
3531     B->ops->solve = MatSolve_SeqAIJ_newdatastruct;
3532   }
3533 
3534   B->ops->lufactorsymbolic  = MatILUDTFactorSymbolic_SeqAIJ;
3535   B->ops->lufactornumeric   = MatILUDTFactorNumeric_SeqAIJ;
3536   B->ops->solveadd          = 0;
3537   B->ops->solvetranspose    = 0;
3538   B->ops->solvetransposeadd = 0;
3539   B->ops->matsolve          = 0;
3540   B->assembled              = PETSC_TRUE;
3541   B->preallocated           = PETSC_TRUE;
3542   PetscFunctionReturn(0);
3543 }
3544 
3545 /* a wraper of MatILUDTFactor_SeqAIJ() */
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3548 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3549 {
3550   PetscErrorCode     ierr;
3551 
3552   PetscFunctionBegin;
3553   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3554 
3555   fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ;
3556   PetscFunctionReturn(0);
3557 }
3558 
3559 /*
3560    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3561    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3562 */
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3565 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3566 {
3567   Mat            C=fact;
3568   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3569   IS             isrow = b->row,isicol = b->icol;
3570   PetscErrorCode ierr;
3571   const PetscInt *r,*ic,*ics;
3572   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3573   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3574   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3575   PetscReal      dt=info->dt,shift=info->shiftinblocks;
3576   PetscTruth     row_identity, col_identity;
3577 
3578   PetscFunctionBegin;
3579   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3580   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3581   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3582   ics  = ic;
3583 
3584   for (i=0; i<n; i++){
3585     /* initialize rtmp array */
3586     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3587     bjtmp = bj + bi[i];
3588     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3589     rtmp[i] = 0.0;
3590     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3591     bjtmp = bj + bdiag[i+1] + 1;
3592     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3593 
3594     /* load in initial unfactored row of A */
3595     /* printf("row %d\n",i); */
3596     nz    = ai[r[i]+1] - ai[r[i]];
3597     ajtmp = aj + ai[r[i]];
3598     v     = aa + ai[r[i]];
3599     for (j=0; j<nz; j++) {
3600       rtmp[ics[*ajtmp++]] = v[j];
3601       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3602     }
3603     /* printf("\n"); */
3604 
3605     /* numerical factorization */
3606     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3607     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3608     k = 0;
3609     while (k < nzl){
3610       row   = *bjtmp++;
3611       /* printf("  prow %d\n",row); */
3612       pc         = rtmp + row;
3613       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3614       multiplier = (*pc) * (*pv);
3615       *pc        = multiplier;
3616       if (PetscAbsScalar(multiplier) > dt){
3617         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3618         pv         = b->a + bdiag[row+1] + 1;
3619         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3620         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3621         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
3622       }
3623       k++;
3624     }
3625 
3626     /* finished row so stick it into b->a */
3627     /* L-part */
3628     pv = b->a + bi[i] ;
3629     pj = bj + bi[i] ;
3630     nzl = bi[i+1] - bi[i];
3631     for (j=0; j<nzl; j++) {
3632       pv[j] = rtmp[pj[j]];
3633       /* printf(" (%d,%g),",pj[j],pv[j]); */
3634     }
3635 
3636     /* diagonal: invert diagonal entries for simplier triangular solves */
3637     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3638     b->a[bdiag[i]] = 1.0/rtmp[i];
3639     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3640 
3641     /* U-part */
3642     pv = b->a + bdiag[i+1] + 1;
3643     pj = bj + bdiag[i+1] + 1;
3644     nzu = bdiag[i] - bdiag[i+1] - 1;
3645     for (j=0; j<nzu; j++) {
3646       pv[j] = rtmp[pj[j]];
3647       /* printf(" (%d,%g),",pj[j],pv[j]); */
3648     }
3649     /* printf("\n"); */
3650   }
3651 
3652   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3653   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3654   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3655 
3656   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3657   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3658   if (row_identity && col_identity) {
3659     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct;
3660   } else {
3661     C->ops->solve   = MatSolve_SeqAIJ_newdatastruct;
3662   }
3663   C->ops->solveadd           = 0;
3664   C->ops->solvetranspose     = 0;
3665   C->ops->solvetransposeadd  = 0;
3666   C->ops->matsolve           = 0;
3667   C->assembled    = PETSC_TRUE;
3668   C->preallocated = PETSC_TRUE;
3669   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3670   PetscFunctionReturn(0);
3671 }
3672