xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bfb15611a2f62d5f3c65919701908022350ceac6)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
252   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
253   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
254   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
267   IS                 isicol;
268   PetscErrorCode     ierr;
269   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
270   PetscInt           *bi,*bj,*ajtmp;
271   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal          f;
273   PetscInt           nlnk,*lnk,k,**bi_ptr;
274   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT            lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr);
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   if (f < 1.0) f = 1.0; /* In case user input a wrong fill, reset it to 1.0 */
301   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
302   current_space = free_space;
303 
304   for (i=0; i<n; i++) {
305     /* copy previous fill into linked list */
306     nzi = 0;
307     nnz = ai[r[i]+1] - ai[r[i]];
308     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309     ajtmp = aj + ai[r[i]];
310     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
317       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
318       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
319       nzi += nlnk;
320       row  = lnk[row];
321     }
322     bi[i+1] = bi[i] + nzi;
323     im[i]   = nzi;
324 
325     /* mark bdiag */
326     nzbd = 0;
327     nnz  = nzi;
328     k    = lnk[n];
329     while (nnz-- && k < i){
330       nzbd++;
331       k = lnk[k];
332     }
333     bdiag[i] = bi[i] + nzbd;
334 
335     /* if free space is not available, make more free space */
336     if (current_space->local_remaining<nzi) {
337       nnz = (n - i)*nzi; /* estimated and max additional space needed */
338       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
339       reallocs++;
340     }
341 
342     /* copy data into free space, then initialize lnk */
343     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
344     bi_ptr[i] = current_space->array;
345     current_space->array           += nzi;
346     current_space->local_used      += nzi;
347     current_space->local_remaining -= nzi;
348   }
349 #if defined(PETSC_USE_INFO)
350   if (ai[n] != 0) {
351     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
352     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
353     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
354     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
355     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
356   } else {
357     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
358   }
359 #endif
360 
361   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
362   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
363 
364   /* destroy list of free space and other temporary array(s) */
365   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
366   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
367   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
368   ierr = PetscFree(im);CHKERRQ(ierr);
369 
370   /* put together the new matrix */
371   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
372   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
373   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
374   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
375   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
376   b    = (Mat_SeqAIJ*)(*B)->data;
377   b->freedata     = PETSC_TRUE;
378   b->singlemalloc = PETSC_FALSE;
379   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
380   b->j          = bj;
381   b->i          = bi;
382   b->diag       = bdiag;
383   b->ilen       = 0;
384   b->imax       = 0;
385   b->row        = isrow;
386   b->col        = iscol;
387   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
388   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
389   b->icol       = isicol;
390   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
391 
392   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
393   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
394   b->maxnz = b->nz = bi[n] ;
395 
396   (*B)->factor                 =  FACTOR_LU;
397   (*B)->info.factor_mallocs    = reallocs;
398   (*B)->info.fill_ratio_given  = f;
399 
400   if (ai[n] != 0) {
401     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
402   } else {
403     (*B)->info.fill_ratio_needed = 0.0;
404   }
405   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
406   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
407   PetscFunctionReturn(0);
408 }
409 
410 /*
411     Trouble in factorization, should we dump the original matrix?
412 */
413 #undef __FUNCT__
414 #define __FUNCT__ "MatFactorDumpMatrix"
415 PetscErrorCode MatFactorDumpMatrix(Mat A)
416 {
417   PetscErrorCode ierr;
418   PetscTruth     flg;
419 
420   PetscFunctionBegin;
421   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
422   if (flg) {
423     PetscViewer viewer;
424     char        filename[PETSC_MAX_PATH_LEN];
425 
426     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
427     ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
428     ierr = MatView(A,viewer);CHKERRQ(ierr);
429     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 /* ----------------------------------------------------------- */
435 #undef __FUNCT__
436 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
437 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
438 {
439   Mat            C=*B;
440   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
441   IS             isrow = b->row,isicol = b->icol;
442   PetscErrorCode ierr;
443   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
444   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
445   PetscInt       *diag_offset = b->diag,diag,*pj;
446   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
447   PetscScalar    d;
448   PetscReal      rs;
449   LUShift_Ctx    sctx;
450   PetscInt       newshift,*ddiag;
451 
452   PetscFunctionBegin;
453   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
454   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
455   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
456   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
457   rtmps = rtmp; ics = ic;
458 
459   sctx.shift_top  = 0;
460   sctx.nshift_max = 0;
461   sctx.shift_lo   = 0;
462   sctx.shift_hi   = 0;
463 
464   if (!a->diag) {
465     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
466   }
467   /* if both shift schemes are chosen by user, only use info->shiftpd */
468   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
469   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
470     PetscInt *aai = a->i;
471     ddiag          = a->diag;
472     sctx.shift_top = 0;
473     for (i=0; i<n; i++) {
474       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
475       d  = (a->a)[ddiag[i]];
476       rs = -PetscAbsScalar(d) - PetscRealPart(d);
477       v  = a->a+aai[i];
478       nz = aai[i+1] - aai[i];
479       for (j=0; j<nz; j++)
480 	rs += PetscAbsScalar(v[j]);
481       if (rs>sctx.shift_top) sctx.shift_top = rs;
482     }
483     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
484     sctx.shift_top    *= 1.1;
485     sctx.nshift_max   = 5;
486     sctx.shift_lo     = 0.;
487     sctx.shift_hi     = 1.;
488   }
489 
490   sctx.shift_amount = 0;
491   sctx.nshift       = 0;
492   do {
493     sctx.lushift = PETSC_FALSE;
494     for (i=0; i<n; i++){
495       nz    = bi[i+1] - bi[i];
496       bjtmp = bj + bi[i];
497       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
498 
499       /* load in initial (unfactored row) */
500       nz    = a->i[r[i]+1] - a->i[r[i]];
501       ajtmp = a->j + a->i[r[i]];
502       v     = a->a + a->i[r[i]];
503       for (j=0; j<nz; j++) {
504         rtmp[ics[ajtmp[j]]] = v[j];
505       }
506       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
507 
508       row = *bjtmp++;
509       while  (row < i) {
510         pc = rtmp + row;
511         if (*pc != 0.0) {
512           pv         = b->a + diag_offset[row];
513           pj         = b->j + diag_offset[row] + 1;
514           multiplier = *pc / *pv++;
515           *pc        = multiplier;
516           nz         = bi[row+1] - diag_offset[row] - 1;
517           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
518           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
519         }
520         row = *bjtmp++;
521       }
522       /* finished row so stick it into b->a */
523       pv   = b->a + bi[i] ;
524       pj   = b->j + bi[i] ;
525       nz   = bi[i+1] - bi[i];
526       diag = diag_offset[i] - bi[i];
527       rs   = 0.0;
528       for (j=0; j<nz; j++) {
529         pv[j] = rtmps[pj[j]];
530         if (j != diag) rs += PetscAbsScalar(pv[j]);
531       }
532 
533       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
534       sctx.rs  = rs;
535       sctx.pv  = pv[diag];
536       ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr);
537       if (newshift == 1) break;
538     }
539 
540     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
541       /*
542        * if no shift in this attempt & shifting & started shifting & can refine,
543        * then try lower shift
544        */
545       sctx.shift_hi        = info->shift_fraction;
546       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
547       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
548       sctx.lushift         = PETSC_TRUE;
549       sctx.nshift++;
550     }
551   } while (sctx.lushift);
552 
553   /* invert diagonal entries for simplier triangular solves */
554   for (i=0; i<n; i++) {
555     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
556   }
557 
558   ierr = PetscFree(rtmp);CHKERRQ(ierr);
559   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
560   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
561   C->factor = FACTOR_LU;
562   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
563   C->assembled = PETSC_TRUE;
564   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
565   if (sctx.nshift){
566     if (info->shiftnz) {
567       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
568     } else if (info->shiftpd) {
569       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
570     }
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
577 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
578 {
579   PetscFunctionBegin;
580   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
581   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
582   PetscFunctionReturn(0);
583 }
584 
585 
586 /* ----------------------------------------------------------- */
587 #undef __FUNCT__
588 #define __FUNCT__ "MatLUFactor_SeqAIJ"
589 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
590 {
591   PetscErrorCode ierr;
592   Mat            C;
593 
594   PetscFunctionBegin;
595   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
596   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
597   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
598   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 /* ----------------------------------------------------------- */
602 #undef __FUNCT__
603 #define __FUNCT__ "MatSolve_SeqAIJ"
604 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
605 {
606   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
607   IS             iscol = a->col,isrow = a->row;
608   PetscErrorCode ierr;
609   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
610   PetscInt       nz,*rout,*cout;
611   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
612 
613   PetscFunctionBegin;
614   if (!n) PetscFunctionReturn(0);
615 
616   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
617   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
618   tmp  = a->solve_work;
619 
620   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
621   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
622 
623   /* forward solve the lower triangular */
624   tmp[0] = b[*r++];
625   tmps   = tmp;
626   for (i=1; i<n; i++) {
627     v   = aa + ai[i] ;
628     vi  = aj + ai[i] ;
629     nz  = a->diag[i] - ai[i];
630     sum = b[*r++];
631     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
632     tmp[i] = sum;
633   }
634 
635   /* backward solve the upper triangular */
636   for (i=n-1; i>=0; i--){
637     v   = aa + a->diag[i] + 1;
638     vi  = aj + a->diag[i] + 1;
639     nz  = ai[i+1] - a->diag[i] - 1;
640     sum = tmp[i];
641     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
642     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
643   }
644 
645   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
646   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
647   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
648   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
649   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatMatSolve_SeqAIJ"
655 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
656 {
657   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
658   IS             iscol = a->col,isrow = a->row;
659   PetscErrorCode ierr;
660   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
661   PetscInt       nz,*rout,*cout,neq;
662   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
663 
664   PetscFunctionBegin;
665   if (!n) PetscFunctionReturn(0);
666 
667   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
668   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
669 
670   tmp  = a->solve_work;
671   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
672   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
673 
674   for (neq=0; neq<n; neq++){
675     /* forward solve the lower triangular */
676     tmp[0] = b[r[0]];
677     tmps   = tmp;
678     for (i=1; i<n; i++) {
679       v   = aa + ai[i] ;
680       vi  = aj + ai[i] ;
681       nz  = a->diag[i] - ai[i];
682       sum = b[r[i]];
683       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
684       tmp[i] = sum;
685     }
686     /* backward solve the upper triangular */
687     for (i=n-1; i>=0; i--){
688       v   = aa + a->diag[i] + 1;
689       vi  = aj + a->diag[i] + 1;
690       nz  = ai[i+1] - a->diag[i] - 1;
691       sum = tmp[i];
692       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
693       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
694     }
695 
696     b += n;
697     x += n;
698   }
699   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
700   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
701   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
702   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
703   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 /* ----------------------------------------------------------- */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
710 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
711 {
712   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
715   PetscScalar    *x,*b,*aa = a->a;
716 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
717   PetscInt       adiag_i,i,*vi,nz,ai_i;
718   PetscScalar    *v,sum;
719 #endif
720 
721   PetscFunctionBegin;
722   if (!n) PetscFunctionReturn(0);
723 
724   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
725   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
726 
727 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
728   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
729 #else
730   /* forward solve the lower triangular */
731   x[0] = b[0];
732   for (i=1; i<n; i++) {
733     ai_i = ai[i];
734     v    = aa + ai_i;
735     vi   = aj + ai_i;
736     nz   = adiag[i] - ai_i;
737     sum  = b[i];
738     while (nz--) sum -= *v++ * x[*vi++];
739     x[i] = sum;
740   }
741 
742   /* backward solve the upper triangular */
743   for (i=n-1; i>=0; i--){
744     adiag_i = adiag[i];
745     v       = aa + adiag_i + 1;
746     vi      = aj + adiag_i + 1;
747     nz      = ai[i+1] - adiag_i - 1;
748     sum     = x[i];
749     while (nz--) sum -= *v++ * x[*vi++];
750     x[i]    = sum*aa[adiag_i];
751   }
752 #endif
753   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
754   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
755   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
761 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
762 {
763   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
764   IS             iscol = a->col,isrow = a->row;
765   PetscErrorCode ierr;
766   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
767   PetscInt       nz,*rout,*cout;
768   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
769 
770   PetscFunctionBegin;
771   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
772 
773   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
774   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
775   tmp  = a->solve_work;
776 
777   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
778   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
779 
780   /* forward solve the lower triangular */
781   tmp[0] = b[*r++];
782   for (i=1; i<n; i++) {
783     v   = aa + ai[i] ;
784     vi  = aj + ai[i] ;
785     nz  = a->diag[i] - ai[i];
786     sum = b[*r++];
787     while (nz--) sum -= *v++ * tmp[*vi++ ];
788     tmp[i] = sum;
789   }
790 
791   /* backward solve the upper triangular */
792   for (i=n-1; i>=0; i--){
793     v   = aa + a->diag[i] + 1;
794     vi  = aj + a->diag[i] + 1;
795     nz  = ai[i+1] - a->diag[i] - 1;
796     sum = tmp[i];
797     while (nz--) sum -= *v++ * tmp[*vi++ ];
798     tmp[i] = sum*aa[a->diag[i]];
799     x[*c--] += tmp[i];
800   }
801 
802   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
803   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
804   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
805   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
806   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
807 
808   PetscFunctionReturn(0);
809 }
810 /* -------------------------------------------------------------------*/
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
813 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
814 {
815   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
816   IS             iscol = a->col,isrow = a->row;
817   PetscErrorCode ierr;
818   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
819   PetscInt       nz,*rout,*cout,*diag = a->diag;
820   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
821 
822   PetscFunctionBegin;
823   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
824   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
825   tmp  = a->solve_work;
826 
827   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
828   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
829 
830   /* copy the b into temp work space according to permutation */
831   for (i=0; i<n; i++) tmp[i] = b[c[i]];
832 
833   /* forward solve the U^T */
834   for (i=0; i<n; i++) {
835     v   = aa + diag[i] ;
836     vi  = aj + diag[i] + 1;
837     nz  = ai[i+1] - diag[i] - 1;
838     s1  = tmp[i];
839     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
840     while (nz--) {
841       tmp[*vi++ ] -= (*v++)*s1;
842     }
843     tmp[i] = s1;
844   }
845 
846   /* backward solve the L^T */
847   for (i=n-1; i>=0; i--){
848     v   = aa + diag[i] - 1 ;
849     vi  = aj + diag[i] - 1 ;
850     nz  = diag[i] - ai[i];
851     s1  = tmp[i];
852     while (nz--) {
853       tmp[*vi-- ] -= (*v--)*s1;
854     }
855   }
856 
857   /* copy tmp into x according to permutation */
858   for (i=0; i<n; i++) x[r[i]] = tmp[i];
859 
860   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
861   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
862   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
863   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
864 
865   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
871 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
872 {
873   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
874   IS             iscol = a->col,isrow = a->row;
875   PetscErrorCode ierr;
876   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
877   PetscInt       nz,*rout,*cout,*diag = a->diag;
878   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
879 
880   PetscFunctionBegin;
881   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
882 
883   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
884   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
885   tmp = a->solve_work;
886 
887   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
888   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
889 
890   /* copy the b into temp work space according to permutation */
891   for (i=0; i<n; i++) tmp[i] = b[c[i]];
892 
893   /* forward solve the U^T */
894   for (i=0; i<n; i++) {
895     v   = aa + diag[i] ;
896     vi  = aj + diag[i] + 1;
897     nz  = ai[i+1] - diag[i] - 1;
898     tmp[i] *= *v++;
899     while (nz--) {
900       tmp[*vi++ ] -= (*v++)*tmp[i];
901     }
902   }
903 
904   /* backward solve the L^T */
905   for (i=n-1; i>=0; i--){
906     v   = aa + diag[i] - 1 ;
907     vi  = aj + diag[i] - 1 ;
908     nz  = diag[i] - ai[i];
909     while (nz--) {
910       tmp[*vi-- ] -= (*v--)*tmp[i];
911     }
912   }
913 
914   /* copy tmp into x according to permutation */
915   for (i=0; i<n; i++) x[r[i]] += tmp[i];
916 
917   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
918   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
919   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
920   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
921 
922   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 /* ----------------------------------------------------------------*/
926 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
930 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
931 {
932   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
933   IS                 isicol;
934   PetscErrorCode     ierr;
935   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j;
936   PetscInt           *bi,*cols,nnz,*cols_lvl;
937   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
938   PetscInt           i,levels,diagonal_fill;
939   PetscTruth         col_identity,row_identity;
940   PetscReal          f;
941   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
942   PetscBT            lnkbt;
943   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
944   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
945   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
946 
947   PetscFunctionBegin;
948   f             = info->fill;
949   if (f < 1.0) {f = info->fill = 1.0;}
950   levels        = (PetscInt)info->levels;
951   diagonal_fill = (PetscInt)info->diagonal_fill;
952   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
953 
954   /* special case that simply copies fill pattern */
955   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
956   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
957   if (!levels && row_identity && col_identity) {
958     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
959     (*fact)->factor                 = FACTOR_LU;
960     (*fact)->info.factor_mallocs    = 0;
961     (*fact)->info.fill_ratio_given  = info->fill;
962     (*fact)->info.fill_ratio_needed = 1.0;
963     b               = (Mat_SeqAIJ*)(*fact)->data;
964     if (!b->diag) {
965       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
966     }
967     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
968     b->row              = isrow;
969     b->col              = iscol;
970     b->icol             = isicol;
971     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
972     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
973     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
974     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
975     PetscFunctionReturn(0);
976   }
977 
978   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
979   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
980 
981   /* get new row pointers */
982   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
983   bi[0] = 0;
984   /* bdiag is location of diagonal in factor */
985   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
986   bdiag[0]  = 0;
987 
988   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
989   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
990 
991   /* create a linked list for storing column indices of the active row */
992   nlnk = n + 1;
993   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
994 
995   /* initial FreeSpace size is f*(ai[n]+1) */
996   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
997   current_space = free_space;
998   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
999   current_space_lvl = free_space_lvl;
1000 
1001   for (i=0; i<n; i++) {
1002     nzi = 0;
1003     /* copy current row into linked list */
1004     nnz  = ai[r[i]+1] - ai[r[i]];
1005     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1006     cols = aj + ai[r[i]];
1007     lnk[i] = -1; /* marker to indicate if diagonal exists */
1008     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1009     nzi += nlnk;
1010 
1011     /* make sure diagonal entry is included */
1012     if (diagonal_fill && lnk[i] == -1) {
1013       fm = n;
1014       while (lnk[fm] < i) fm = lnk[fm];
1015       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1016       lnk[fm]    = i;
1017       lnk_lvl[i] = 0;
1018       nzi++; dcount++;
1019     }
1020 
1021     /* add pivot rows into the active row */
1022     nzbd = 0;
1023     prow = lnk[n];
1024     while (prow < i) {
1025       nnz      = bdiag[prow];
1026       cols     = bj_ptr[prow] + nnz + 1;
1027       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1028       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1029       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1030       nzi += nlnk;
1031       prow = lnk[prow];
1032       nzbd++;
1033     }
1034     bdiag[i] = nzbd;
1035     bi[i+1]  = bi[i] + nzi;
1036 
1037     /* if free space is not available, make more free space */
1038     if (current_space->local_remaining<nzi) {
1039       nnz = nzi*(n - i); /* estimated and max additional space needed */
1040       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1041       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1042       reallocs++;
1043     }
1044 
1045     /* copy data into free_space and free_space_lvl, then initialize lnk */
1046     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1047     bj_ptr[i]    = current_space->array;
1048     bjlvl_ptr[i] = current_space_lvl->array;
1049 
1050     /* make sure the active row i has diagonal entry */
1051     if (*(bj_ptr[i]+bdiag[i]) != i) {
1052       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1053     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1054     }
1055 
1056     current_space->array           += nzi;
1057     current_space->local_used      += nzi;
1058     current_space->local_remaining -= nzi;
1059     current_space_lvl->array           += nzi;
1060     current_space_lvl->local_used      += nzi;
1061     current_space_lvl->local_remaining -= nzi;
1062   }
1063 
1064   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1065   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1066 
1067   /* destroy list of free space and other temporary arrays */
1068   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1069   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1070   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1071   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1072   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1073 
1074 #if defined(PETSC_USE_INFO)
1075   {
1076     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1077     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1078     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1079     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1080     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1081     if (diagonal_fill) {
1082       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1083     }
1084   }
1085 #endif
1086 
1087   /* put together the new matrix */
1088   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1089   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1090   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1091   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1092   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1093   b = (Mat_SeqAIJ*)(*fact)->data;
1094   b->freedata     = PETSC_TRUE;
1095   b->singlemalloc = PETSC_FALSE;
1096   len = (bi[n] )*sizeof(PetscScalar);
1097   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1098   b->j          = bj;
1099   b->i          = bi;
1100   for (i=0; i<n; i++) bdiag[i] += bi[i];
1101   b->diag       = bdiag;
1102   b->ilen       = 0;
1103   b->imax       = 0;
1104   b->row        = isrow;
1105   b->col        = iscol;
1106   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1107   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1108   b->icol       = isicol;
1109   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1110   /* In b structure:  Free imax, ilen, old a, old j.
1111      Allocate bdiag, solve_work, new a, new j */
1112   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1113   b->maxnz             = b->nz = bi[n] ;
1114   (*fact)->factor = FACTOR_LU;
1115   (*fact)->info.factor_mallocs    = reallocs;
1116   (*fact)->info.fill_ratio_given  = f;
1117   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1118 
1119   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1120   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1121 
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #include "src/mat/impls/sbaij/seq/sbaij.h"
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1128 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1129 {
1130   Mat            C = *B;
1131   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1132   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1133   IS             ip=b->row;
1134   PetscErrorCode ierr;
1135   PetscInt       *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1136   PetscInt       *ai=a->i,*aj=a->j;
1137   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1138   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1139   PetscReal      zeropivot,rs,shiftnz;
1140   PetscReal      shiftpd;
1141   ChShift_Ctx    sctx;
1142   PetscInt       newshift;
1143 
1144   PetscFunctionBegin;
1145   shiftnz   = info->shiftnz;
1146   shiftpd   = info->shiftpd;
1147   zeropivot = info->zeropivot;
1148 
1149   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1150 
1151   /* initialization */
1152   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1153   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1154   jl   = il + mbs;
1155   rtmp = (MatScalar*)(jl + mbs);
1156 
1157   sctx.shift_amount = 0;
1158   sctx.nshift       = 0;
1159   do {
1160     sctx.chshift = PETSC_FALSE;
1161     for (i=0; i<mbs; i++) {
1162       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1163     }
1164 
1165     for (k = 0; k<mbs; k++){
1166       bval = ba + bi[k];
1167       /* initialize k-th row by the perm[k]-th row of A */
1168       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1169       for (j = jmin; j < jmax; j++){
1170         col = rip[aj[j]];
1171         if (col >= k){ /* only take upper triangular entry */
1172           rtmp[col] = aa[j];
1173           *bval++  = 0.0; /* for in-place factorization */
1174         }
1175       }
1176       /* shift the diagonal of the matrix */
1177       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1178 
1179       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1180       dk = rtmp[k];
1181       i = jl[k]; /* first row to be added to k_th row  */
1182 
1183       while (i < k){
1184         nexti = jl[i]; /* next row to be added to k_th row */
1185 
1186         /* compute multiplier, update diag(k) and U(i,k) */
1187         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1188         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1189         dk += uikdi*ba[ili];
1190         ba[ili] = uikdi; /* -U(i,k) */
1191 
1192         /* add multiple of row i to k-th row */
1193         jmin = ili + 1; jmax = bi[i+1];
1194         if (jmin < jmax){
1195           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1196           /* update il and jl for row i */
1197           il[i] = jmin;
1198           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1199         }
1200         i = nexti;
1201       }
1202 
1203       /* shift the diagonals when zero pivot is detected */
1204       /* compute rs=sum of abs(off-diagonal) */
1205       rs   = 0.0;
1206       jmin = bi[k]+1;
1207       nz   = bi[k+1] - jmin;
1208       if (nz){
1209         bcol = bj + jmin;
1210         while (nz--){
1211           rs += PetscAbsScalar(rtmp[*bcol]);
1212           bcol++;
1213         }
1214       }
1215 
1216       sctx.rs = rs;
1217       sctx.pv = dk;
1218       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1219       if (newshift == 1) break;
1220 
1221       /* copy data into U(k,:) */
1222       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1223       jmin = bi[k]+1; jmax = bi[k+1];
1224       if (jmin < jmax) {
1225         for (j=jmin; j<jmax; j++){
1226           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1227         }
1228         /* add the k-th row into il and jl */
1229         il[k] = jmin;
1230         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1231       }
1232     }
1233   } while (sctx.chshift);
1234   ierr = PetscFree(il);CHKERRQ(ierr);
1235 
1236   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1237   C->factor       = FACTOR_CHOLESKY;
1238   C->assembled    = PETSC_TRUE;
1239   C->preallocated = PETSC_TRUE;
1240   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1241   if (sctx.nshift){
1242     if (shiftnz) {
1243       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1244     } else if (shiftpd) {
1245       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1246     }
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1253 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1254 {
1255   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1256   Mat_SeqSBAIJ       *b;
1257   Mat                B;
1258   PetscErrorCode     ierr;
1259   PetscTruth         perm_identity;
1260   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1261   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1262   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1263   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1264   PetscReal          fill=info->fill,levels=info->levels;
1265   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1266   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1267   PetscBT            lnkbt;
1268 
1269   PetscFunctionBegin;
1270   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1271   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1272 
1273   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1274   ui[0] = 0;
1275 
1276   /* special case that simply copies fill pattern */
1277   if (!levels && perm_identity) {
1278     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1279     for (i=0; i<am; i++) {
1280       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1281     }
1282     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1283     cols = uj;
1284     for (i=0; i<am; i++) {
1285       aj    = a->j + a->diag[i];
1286       ncols = ui[i+1] - ui[i];
1287       for (j=0; j<ncols; j++) *cols++ = *aj++;
1288     }
1289   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1290     /* initialization */
1291     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1292 
1293     /* jl: linked list for storing indices of the pivot rows
1294        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1295     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1296     il         = jl + am;
1297     uj_ptr     = (PetscInt**)(il + am);
1298     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1299     for (i=0; i<am; i++){
1300       jl[i] = am; il[i] = 0;
1301     }
1302 
1303     /* create and initialize a linked list for storing column indices of the active row k */
1304     nlnk = am + 1;
1305     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1306 
1307     /* initial FreeSpace size is fill*(ai[am]+1) */
1308     if (fill < 1.0) fill = 1.0;
1309     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1310     current_space = free_space;
1311     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1312     current_space_lvl = free_space_lvl;
1313 
1314     for (k=0; k<am; k++){  /* for each active row k */
1315       /* initialize lnk by the column indices of row rip[k] of A */
1316       nzk   = 0;
1317       ncols = ai[rip[k]+1] - ai[rip[k]];
1318       ncols_upper = 0;
1319       for (j=0; j<ncols; j++){
1320         i = *(aj + ai[rip[k]] + j);
1321         if (rip[i] >= k){ /* only take upper triangular entry */
1322           ajtmp[ncols_upper] = i;
1323           ncols_upper++;
1324         }
1325       }
1326       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1327       nzk += nlnk;
1328 
1329       /* update lnk by computing fill-in for each pivot row to be merged in */
1330       prow = jl[k]; /* 1st pivot row */
1331 
1332       while (prow < k){
1333         nextprow = jl[prow];
1334 
1335         /* merge prow into k-th row */
1336         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1337         jmax = ui[prow+1];
1338         ncols = jmax-jmin;
1339         i     = jmin - ui[prow];
1340         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1341         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1342         j     = *(uj - 1);
1343         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1344         nzk += nlnk;
1345 
1346         /* update il and jl for prow */
1347         if (jmin < jmax){
1348           il[prow] = jmin;
1349           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1350         }
1351         prow = nextprow;
1352       }
1353 
1354       /* if free space is not available, make more free space */
1355       if (current_space->local_remaining<nzk) {
1356         i = am - k + 1; /* num of unfactored rows */
1357         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1358         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1359         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1360         reallocs++;
1361       }
1362 
1363       /* copy data into free_space and free_space_lvl, then initialize lnk */
1364       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1365 
1366       /* add the k-th row into il and jl */
1367       if (nzk > 1){
1368         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1369         jl[k] = jl[i]; jl[i] = k;
1370         il[k] = ui[k] + 1;
1371       }
1372       uj_ptr[k]     = current_space->array;
1373       uj_lvl_ptr[k] = current_space_lvl->array;
1374 
1375       current_space->array           += nzk;
1376       current_space->local_used      += nzk;
1377       current_space->local_remaining -= nzk;
1378 
1379       current_space_lvl->array           += nzk;
1380       current_space_lvl->local_used      += nzk;
1381       current_space_lvl->local_remaining -= nzk;
1382 
1383       ui[k+1] = ui[k] + nzk;
1384     }
1385 
1386 #if defined(PETSC_USE_INFO)
1387     if (ai[am] != 0) {
1388       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1389       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1390       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1391       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1392     } else {
1393       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1394     }
1395 #endif
1396 
1397     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1398     ierr = PetscFree(jl);CHKERRQ(ierr);
1399     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1400 
1401     /* destroy list of free space and other temporary array(s) */
1402     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1403     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1404     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1405     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1406 
1407   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1408 
1409   /* put together the new matrix in MATSEQSBAIJ format */
1410   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1411   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1412   B = *fact;
1413   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1414   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1415 
1416   b    = (Mat_SeqSBAIJ*)B->data;
1417   b->singlemalloc = PETSC_FALSE;
1418   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1419   b->j    = uj;
1420   b->i    = ui;
1421   b->diag = 0;
1422   b->ilen = 0;
1423   b->imax = 0;
1424   b->row  = perm;
1425   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1426   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1427   b->icol = perm;
1428   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1429   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1430   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1431   b->maxnz = b->nz = ui[am];
1432   b->freedata = PETSC_TRUE;
1433 
1434   B->factor                 = FACTOR_CHOLESKY;
1435   B->info.factor_mallocs    = reallocs;
1436   B->info.fill_ratio_given  = fill;
1437   if (ai[am] != 0) {
1438     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1439   } else {
1440     B->info.fill_ratio_needed = 0.0;
1441   }
1442   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1443   if (perm_identity){
1444     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1445     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1452 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1453 {
1454   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1455   Mat_SeqSBAIJ       *b;
1456   Mat                B;
1457   PetscErrorCode     ierr;
1458   PetscTruth         perm_identity;
1459   PetscReal          fill = info->fill;
1460   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1461   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1462   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1463   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1464   PetscBT            lnkbt;
1465   IS                 iperm;
1466 
1467   PetscFunctionBegin;
1468   /* check whether perm is the identity mapping */
1469   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1470   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1471 
1472   if (!perm_identity){
1473     /* check if perm is symmetric! */
1474     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1475     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1476     for (i=0; i<am; i++) {
1477       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1478     }
1479     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1480     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1481   }
1482 
1483   /* initialization */
1484   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1485   ui[0] = 0;
1486 
1487   /* jl: linked list for storing indices of the pivot rows
1488      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1489   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1490   il     = jl + am;
1491   cols   = il + am;
1492   ui_ptr = (PetscInt**)(cols + am);
1493   for (i=0; i<am; i++){
1494     jl[i] = am; il[i] = 0;
1495   }
1496 
1497   /* create and initialize a linked list for storing column indices of the active row k */
1498   nlnk = am + 1;
1499   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1500 
1501   /* initial FreeSpace size is fill*(ai[am]+1) */
1502   if (fill < 1.0) fill = 1.0;
1503   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1504   current_space = free_space;
1505 
1506   for (k=0; k<am; k++){  /* for each active row k */
1507     /* initialize lnk by the column indices of row rip[k] of A */
1508     nzk   = 0;
1509     ncols = ai[rip[k]+1] - ai[rip[k]];
1510     ncols_upper = 0;
1511     for (j=0; j<ncols; j++){
1512       i = rip[*(aj + ai[rip[k]] + j)];
1513       if (i >= k){ /* only take upper triangular entry */
1514         cols[ncols_upper] = i;
1515         ncols_upper++;
1516       }
1517     }
1518     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1519     nzk += nlnk;
1520 
1521     /* update lnk by computing fill-in for each pivot row to be merged in */
1522     prow = jl[k]; /* 1st pivot row */
1523 
1524     while (prow < k){
1525       nextprow = jl[prow];
1526       /* merge prow into k-th row */
1527       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1528       jmax = ui[prow+1];
1529       ncols = jmax-jmin;
1530       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1531       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1532       nzk += nlnk;
1533 
1534       /* update il and jl for prow */
1535       if (jmin < jmax){
1536         il[prow] = jmin;
1537         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1538       }
1539       prow = nextprow;
1540     }
1541 
1542     /* if free space is not available, make more free space */
1543     if (current_space->local_remaining<nzk) {
1544       i = am - k + 1; /* num of unfactored rows */
1545       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1546       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1547       reallocs++;
1548     }
1549 
1550     /* copy data into free space, then initialize lnk */
1551     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1552 
1553     /* add the k-th row into il and jl */
1554     if (nzk-1 > 0){
1555       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1556       jl[k] = jl[i]; jl[i] = k;
1557       il[k] = ui[k] + 1;
1558     }
1559     ui_ptr[k] = current_space->array;
1560     current_space->array           += nzk;
1561     current_space->local_used      += nzk;
1562     current_space->local_remaining -= nzk;
1563 
1564     ui[k+1] = ui[k] + nzk;
1565   }
1566 
1567 #if defined(PETSC_USE_INFO)
1568   if (ai[am] != 0) {
1569     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1570     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1571     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1572     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1573   } else {
1574      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1575   }
1576 #endif
1577 
1578   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1579   ierr = PetscFree(jl);CHKERRQ(ierr);
1580 
1581   /* destroy list of free space and other temporary array(s) */
1582   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1583   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1584   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1585 
1586   /* put together the new matrix in MATSEQSBAIJ format */
1587   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1588   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1589   B    = *fact;
1590   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1591   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1592 
1593   b = (Mat_SeqSBAIJ*)B->data;
1594   b->singlemalloc = PETSC_FALSE;
1595   b->freedata     = PETSC_TRUE;
1596   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1597   b->j    = uj;
1598   b->i    = ui;
1599   b->diag = 0;
1600   b->ilen = 0;
1601   b->imax = 0;
1602   b->row  = perm;
1603   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1604   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1605   b->icol = perm;
1606   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1607   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1608   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1609   b->maxnz = b->nz = ui[am];
1610 
1611   B->factor                 = FACTOR_CHOLESKY;
1612   B->info.factor_mallocs    = reallocs;
1613   B->info.fill_ratio_given  = fill;
1614   if (ai[am] != 0) {
1615     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1616   } else {
1617     B->info.fill_ratio_needed = 0.0;
1618   }
1619   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1620   if (perm_identity){
1621     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1622     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626