xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bc02c2b25edd532cf51de58d59ecf40610c46de3)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28 #endif
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
32   /* ------------------------------------------------------------
33 
34           This interface was contribed by Tony Caola
35 
36      This routine is an interface to the pivoting drop-tolerance
37      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38      SPARSEKIT2.
39 
40      The SPARSEKIT2 routines used here are covered by the GNU
41      copyright; see the file gnu in this directory.
42 
43      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44      help in getting this routine ironed out.
45 
46      The major drawback to this routine is that if info->fill is
47      not large enough it fails rather than allocating more space;
48      this can be fixed by hacking/improving the f2c version of
49      Yousef Saad's code.
50 
51      ------------------------------------------------------------
52 */
53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
62   IS             iscolf,isicol,isirow;
63   PetscTruth     reorder;
64   PetscErrorCode ierr,sierr;
65   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
66   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67   PetscInt       *ordcol,*iwk,*iperm,*jw;
68   PetscInt       jmax,lfill,job,*o_i,*o_j;
69   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal      af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (PetscInt)(info->dtcount/2.0);
79   jmax    = (PetscInt)(info->fill*a->nz);
80 
81 
82   /* ------------------------------------------------------------
83      If reorder=.TRUE., then the original matrix has to be
84      reordered to reflect the user selected ordering scheme, and
85      then de-reordered so it is in it's original format.
86      Because Saad's dperm() is NOT in place, we have to copy
87      the original matrix and allocate more storage. . .
88      ------------------------------------------------------------
89   */
90 
91   /* set reorder to true if either isrow or iscol is not identity */
92   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
93   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
94   reorder = PetscNot(reorder);
95 
96 
97   /* storage for ilu factor */
98   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
101   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
102 
103   /* ------------------------------------------------------------
104      Make sure that everything is Fortran formatted (1-Based)
105      ------------------------------------------------------------
106   */
107   for (i=old_i[0];i<old_i[n];i++) {
108     old_j[i]++;
109   }
110   for(i=0;i<n+1;i++) {
111     old_i[i]++;
112   };
113 
114 
115   if (reorder) {
116     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
117     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       r[i]  = r[i]+1;
120       c[i]  = c[i]+1;
121     }
122     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
123     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
124     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
125     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126     for (i=0;i<n;i++) {
127       r[i]  = r[i]-1;
128       c[i]  = c[i]-1;
129     }
130     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
131     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
132     o_a = old_a2;
133     o_j = old_j2;
134     o_i = old_i2;
135   } else {
136     o_a = old_a;
137     o_j = old_j;
138     o_i = old_i;
139   }
140 
141   /* ------------------------------------------------------------
142      Call Saad's ilutp() routine to generate the factorization
143      ------------------------------------------------------------
144   */
145 
146   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
147   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
148   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
149 
150   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151   if (sierr) {
152     switch (sierr) {
153       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
154       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
155       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159     }
160   }
161 
162   ierr = PetscFree(w);CHKERRQ(ierr);
163   ierr = PetscFree(jw);CHKERRQ(ierr);
164 
165   /* ------------------------------------------------------------
166      Saad's routine gives the result in Modified Sparse Row (msr)
167      Convert to Compressed Sparse Row format (csr)
168      ------------------------------------------------------------
169   */
170 
171   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
172   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
173 
174   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
175 
176   ierr = PetscFree(iwk);CHKERRQ(ierr);
177   ierr = PetscFree(wk);CHKERRQ(ierr);
178 
179   if (reorder) {
180     ierr = PetscFree(old_a2);CHKERRQ(ierr);
181     ierr = PetscFree(old_j2);CHKERRQ(ierr);
182     ierr = PetscFree(old_i2);CHKERRQ(ierr);
183   } else {
184     /* fix permutation of old_j that the factorization introduced */
185     for (i=old_i[0]; i<old_i[n]; i++) {
186       old_j[i-1] = iperm[old_j[i-1]-1];
187     }
188   }
189 
190   /* get rid of the shift to indices starting at 1 */
191   for (i=0; i<n+1; i++) {
192     old_i[i]--;
193   }
194   for (i=old_i[0];i<old_i[n];i++) {
195     old_j[i]--;
196   }
197 
198   /* Make the factored matrix 0-based */
199   for (i=0; i<n+1; i++) {
200     new_i[i]--;
201   }
202   for (i=new_i[0];i<new_i[n];i++) {
203     new_j[i]--;
204   }
205 
206   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207   /*-- permute the right-hand-side and solution vectors           --*/
208   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
209   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
210   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
211   for(i=0; i<n; i++) {
212     ordcol[i] = ic[iperm[i]-1];
213   };
214   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
215   ierr = ISDestroy(isicol);CHKERRQ(ierr);
216 
217   ierr = PetscFree(iperm);CHKERRQ(ierr);
218 
219   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
220   ierr = PetscFree(ordcol);CHKERRQ(ierr);
221 
222   /*----- put together the new matrix -----*/
223 
224   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
225   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
226   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
227   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
228   (*fact)->factor    = FACTOR_LU;
229   (*fact)->assembled = PETSC_TRUE;
230 
231   b = (Mat_SeqAIJ*)(*fact)->data;
232   b->freedata      = PETSC_TRUE;
233   b->sorted        = PETSC_FALSE;
234   b->singlemalloc  = PETSC_FALSE;
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   af = ((double)b->nz)/((double)a->nz) + .001;
251   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
252   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
253   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
254   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
255 
256   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 #endif
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265 {
266   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
267   IS                 isicol;
268   PetscErrorCode     ierr;
269   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
270   PetscInt           *bi,*bj,*ajtmp;
271   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272   PetscReal          f;
273   PetscInt           nlnk,*lnk,k,**bi_ptr;
274   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275   PetscBT            lnkbt;
276 
277   PetscFunctionBegin;
278   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
280   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
281   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
282 
283   /* get new row pointers */
284   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
285   bi[0] = 0;
286 
287   /* bdiag is location of diagonal in factor */
288   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
289   bdiag[0] = 0;
290 
291   /* linked list for storing column indices of the active row */
292   nlnk = n + 1;
293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
294 
295   ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr);
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
310     nzi += nlnk;
311 
312     /* add pivot rows into linked list */
313     row = lnk[n];
314     while (row < i) {
315       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
316       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
317       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
318       nzi += nlnk;
319       row  = lnk[row];
320     }
321     bi[i+1] = bi[i] + nzi;
322     im[i]   = nzi;
323 
324     /* mark bdiag */
325     nzbd = 0;
326     nnz  = nzi;
327     k    = lnk[n];
328     while (nnz-- && k < i){
329       nzbd++;
330       k = lnk[k];
331     }
332     bdiag[i] = bi[i] + nzbd;
333 
334     /* if free space is not available, make more free space */
335     if (current_space->local_remaining<nzi) {
336       nnz = (n - i)*nzi; /* estimated and max additional space needed */
337       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
338       reallocs++;
339     }
340 
341     /* copy data into free space, then initialize lnk */
342     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
343     bi_ptr[i] = current_space->array;
344     current_space->array           += nzi;
345     current_space->local_used      += nzi;
346     current_space->local_remaining -= nzi;
347   }
348 #if defined(PETSC_USE_INFO)
349   if (ai[n] != 0) {
350     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
351     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
352     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
353     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
354     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
355   } else {
356     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
357   }
358 #endif
359 
360   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
361   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
362 
363   /* destroy list of free space and other temporary array(s) */
364   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
365   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
366   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
367   ierr = PetscFree(im);CHKERRQ(ierr);
368 
369   /* put together the new matrix */
370   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
371   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
372   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
373   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
374   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
375   b    = (Mat_SeqAIJ*)(*B)->data;
376   b->freedata     = PETSC_TRUE;
377   b->singlemalloc = PETSC_FALSE;
378   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
379   b->j          = bj;
380   b->i          = bi;
381   b->diag       = bdiag;
382   b->ilen       = 0;
383   b->imax       = 0;
384   b->row        = isrow;
385   b->col        = iscol;
386   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
387   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
388   b->icol       = isicol;
389   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
390 
391   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
392   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
393   b->maxnz = b->nz = bi[n] ;
394 
395   (*B)->factor                 =  FACTOR_LU;
396   (*B)->info.factor_mallocs    = reallocs;
397   (*B)->info.fill_ratio_given  = f;
398 
399   if (ai[n] != 0) {
400     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
401   } else {
402     (*B)->info.fill_ratio_needed = 0.0;
403   }
404   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
405   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
406   PetscFunctionReturn(0);
407 }
408 
409 /* ----------------------------------------------------------- */
410 #undef __FUNCT__
411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
413 {
414   Mat            C=*B;
415   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
416   IS             isrow = b->row,isicol = b->icol;
417   PetscErrorCode ierr;
418   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
419   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
420   PetscInt       *diag_offset = b->diag,diag,*pj;
421   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps,*aa=a->a;
422   PetscScalar    d;
423   PetscReal      rs;
424   LUShift_Ctx    sctx;
425   PetscInt       newshift,*ddiag;
426 
427   PetscFunctionBegin;
428   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
429   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
430   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
431   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
432   rtmps = rtmp; ics = ic;
433 
434   sctx.shift_top  = 0;
435   sctx.nshift_max = 0;
436   sctx.shift_lo   = 0;
437   sctx.shift_hi   = 0;
438 
439   if (!a->diag) {
440     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
441   }
442   /* if both shift schemes are chosen by user, only use info->shiftpd */
443   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
444   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
445     PetscInt *aai = a->i;
446     ddiag          = a->diag;
447     sctx.shift_top = 0;
448     for (i=0; i<n; i++) {
449       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
450       d  = (a->a)[ddiag[i]];
451       rs = -PetscAbsScalar(d) - PetscRealPart(d);
452       v  = a->a+aai[i];
453       nz = aai[i+1] - aai[i];
454       for (j=0; j<nz; j++)
455 	rs += PetscAbsScalar(v[j]);
456       if (rs>sctx.shift_top) sctx.shift_top = rs;
457     }
458     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
459     sctx.shift_top    *= 1.1;
460     sctx.nshift_max   = 5;
461     sctx.shift_lo     = 0.;
462     sctx.shift_hi     = 1.;
463   }
464 
465   sctx.shift_amount = 0;
466   sctx.nshift       = 0;
467   do {
468     sctx.lushift = PETSC_FALSE;
469     for (i=0; i<n; i++){
470       nz    = bi[i+1] - bi[i];
471       bjtmp = bj + bi[i];
472       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
473 
474       /* load in initial (unfactored row) */
475       nz    = a->i[r[i]+1] - a->i[r[i]];
476       ajtmp = a->j + a->i[r[i]];
477       v     = a->a + a->i[r[i]];
478       for (j=0; j<nz; j++) {
479         rtmp[ics[ajtmp[j]]] = v[j];
480       }
481       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
482 
483       row = *bjtmp++;
484       while  (row < i) {
485         pc = rtmp + row;
486         if (*pc != 0.0) {
487           pv         = b->a + diag_offset[row];
488           pj         = b->j + diag_offset[row] + 1;
489           multiplier = *pc / *pv++;
490           *pc        = multiplier;
491           nz         = bi[row+1] - diag_offset[row] - 1;
492           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
493           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
494         }
495         row = *bjtmp++;
496       }
497       /* finished row so stick it into b->a */
498       pv   = b->a + bi[i] ;
499       pj   = b->j + bi[i] ;
500       nz   = bi[i+1] - bi[i];
501       diag = diag_offset[i] - bi[i];
502       rs   = 0.0;
503       for (j=0; j<nz; j++) {
504         pv[j] = rtmps[pj[j]];
505         if (j != diag) rs += PetscAbsScalar(pv[j]);
506       }
507 
508       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
509       sctx.rs  = rs;
510       sctx.pv  = pv[diag];
511       ierr = MatLUCheckShift_inline(info,sctx,i,aa,a->diag,newshift);CHKERRQ(ierr);
512       if (newshift == 1){
513         break;    /* sctx.shift_amount is updated */
514       } else if (newshift == -1){
515         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
516       }
517     }
518 
519     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
520       /*
521        * if no shift in this attempt & shifting & started shifting & can refine,
522        * then try lower shift
523        */
524       sctx.shift_hi        = info->shift_fraction;
525       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
526       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
527       sctx.lushift         = PETSC_TRUE;
528       sctx.nshift++;
529     }
530   } while (sctx.lushift);
531 
532   /* invert diagonal entries for simplier triangular solves */
533   for (i=0; i<n; i++) {
534     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
535   }
536 
537   ierr = PetscFree(rtmp);CHKERRQ(ierr);
538   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
539   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
540   C->factor = FACTOR_LU;
541   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
542   C->assembled = PETSC_TRUE;
543   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
544   if (sctx.nshift){
545     if (info->shiftnz) {
546       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
547     } else if (info->shiftpd) {
548       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
549     }
550   }
551   PetscFunctionReturn(0);
552 }
553 
554 #undef __FUNCT__
555 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
556 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
557 {
558   PetscFunctionBegin;
559   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
560   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
561   PetscFunctionReturn(0);
562 }
563 
564 
565 /* ----------------------------------------------------------- */
566 #undef __FUNCT__
567 #define __FUNCT__ "MatLUFactor_SeqAIJ"
568 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
569 {
570   PetscErrorCode ierr;
571   Mat            C;
572 
573   PetscFunctionBegin;
574   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
575   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
576   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
577   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 /* ----------------------------------------------------------- */
581 #undef __FUNCT__
582 #define __FUNCT__ "MatSolve_SeqAIJ"
583 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
584 {
585   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
586   IS             iscol = a->col,isrow = a->row;
587   PetscErrorCode ierr;
588   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
589   PetscInt       nz,*rout,*cout;
590   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
591 
592   PetscFunctionBegin;
593   if (!n) PetscFunctionReturn(0);
594 
595   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
596   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
597   tmp  = a->solve_work;
598 
599   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
600   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
601 
602   /* forward solve the lower triangular */
603   tmp[0] = b[*r++];
604   tmps   = tmp;
605   for (i=1; i<n; i++) {
606     v   = aa + ai[i] ;
607     vi  = aj + ai[i] ;
608     nz  = a->diag[i] - ai[i];
609     sum = b[*r++];
610     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
611     tmp[i] = sum;
612   }
613 
614   /* backward solve the upper triangular */
615   for (i=n-1; i>=0; i--){
616     v   = aa + a->diag[i] + 1;
617     vi  = aj + a->diag[i] + 1;
618     nz  = ai[i+1] - a->diag[i] - 1;
619     sum = tmp[i];
620     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
621     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
622   }
623 
624   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
625   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
626   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
627   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
628   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatMatSolve_SeqAIJ"
634 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
635 {
636   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
637   IS             iscol = a->col,isrow = a->row;
638   PetscErrorCode ierr;
639   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
640   PetscInt       nz,*rout,*cout,neq;
641   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
642 
643   PetscFunctionBegin;
644   if (!n) PetscFunctionReturn(0);
645 
646   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
647   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
648 
649   tmp  = a->solve_work;
650   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
651   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
652 
653   for (neq=0; neq<n; neq++){
654     /* forward solve the lower triangular */
655     tmp[0] = b[r[0]];
656     tmps   = tmp;
657     for (i=1; i<n; i++) {
658       v   = aa + ai[i] ;
659       vi  = aj + ai[i] ;
660       nz  = a->diag[i] - ai[i];
661       sum = b[r[i]];
662       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
663       tmp[i] = sum;
664     }
665     /* backward solve the upper triangular */
666     for (i=n-1; i>=0; i--){
667       v   = aa + a->diag[i] + 1;
668       vi  = aj + a->diag[i] + 1;
669       nz  = ai[i+1] - a->diag[i] - 1;
670       sum = tmp[i];
671       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
672       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
673     }
674 
675     b += n;
676     x += n;
677   }
678   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
679   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
680   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
681   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
682   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 /* ----------------------------------------------------------- */
687 #undef __FUNCT__
688 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
689 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
690 {
691   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
692   PetscErrorCode ierr;
693   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
694   PetscScalar    *x,*b,*aa = a->a;
695 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
696   PetscInt       adiag_i,i,*vi,nz,ai_i;
697   PetscScalar    *v,sum;
698 #endif
699 
700   PetscFunctionBegin;
701   if (!n) PetscFunctionReturn(0);
702 
703   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
704   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
705 
706 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
707   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
708 #else
709   /* forward solve the lower triangular */
710   x[0] = b[0];
711   for (i=1; i<n; i++) {
712     ai_i = ai[i];
713     v    = aa + ai_i;
714     vi   = aj + ai_i;
715     nz   = adiag[i] - ai_i;
716     sum  = b[i];
717     while (nz--) sum -= *v++ * x[*vi++];
718     x[i] = sum;
719   }
720 
721   /* backward solve the upper triangular */
722   for (i=n-1; i>=0; i--){
723     adiag_i = adiag[i];
724     v       = aa + adiag_i + 1;
725     vi      = aj + adiag_i + 1;
726     nz      = ai[i+1] - adiag_i - 1;
727     sum     = x[i];
728     while (nz--) sum -= *v++ * x[*vi++];
729     x[i]    = sum*aa[adiag_i];
730   }
731 #endif
732   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
733   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
734   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
740 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
741 {
742   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
743   IS             iscol = a->col,isrow = a->row;
744   PetscErrorCode ierr;
745   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
746   PetscInt       nz,*rout,*cout;
747   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
748 
749   PetscFunctionBegin;
750   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
751 
752   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
753   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
754   tmp  = a->solve_work;
755 
756   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
757   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
758 
759   /* forward solve the lower triangular */
760   tmp[0] = b[*r++];
761   for (i=1; i<n; i++) {
762     v   = aa + ai[i] ;
763     vi  = aj + ai[i] ;
764     nz  = a->diag[i] - ai[i];
765     sum = b[*r++];
766     while (nz--) sum -= *v++ * tmp[*vi++ ];
767     tmp[i] = sum;
768   }
769 
770   /* backward solve the upper triangular */
771   for (i=n-1; i>=0; i--){
772     v   = aa + a->diag[i] + 1;
773     vi  = aj + a->diag[i] + 1;
774     nz  = ai[i+1] - a->diag[i] - 1;
775     sum = tmp[i];
776     while (nz--) sum -= *v++ * tmp[*vi++ ];
777     tmp[i] = sum*aa[a->diag[i]];
778     x[*c--] += tmp[i];
779   }
780 
781   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
782   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
783   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
784   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
785   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
786 
787   PetscFunctionReturn(0);
788 }
789 /* -------------------------------------------------------------------*/
790 #undef __FUNCT__
791 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
792 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
793 {
794   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
795   IS             iscol = a->col,isrow = a->row;
796   PetscErrorCode ierr;
797   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
798   PetscInt       nz,*rout,*cout,*diag = a->diag;
799   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
800 
801   PetscFunctionBegin;
802   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
803   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
804   tmp  = a->solve_work;
805 
806   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
807   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
808 
809   /* copy the b into temp work space according to permutation */
810   for (i=0; i<n; i++) tmp[i] = b[c[i]];
811 
812   /* forward solve the U^T */
813   for (i=0; i<n; i++) {
814     v   = aa + diag[i] ;
815     vi  = aj + diag[i] + 1;
816     nz  = ai[i+1] - diag[i] - 1;
817     s1  = tmp[i];
818     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
819     while (nz--) {
820       tmp[*vi++ ] -= (*v++)*s1;
821     }
822     tmp[i] = s1;
823   }
824 
825   /* backward solve the L^T */
826   for (i=n-1; i>=0; i--){
827     v   = aa + diag[i] - 1 ;
828     vi  = aj + diag[i] - 1 ;
829     nz  = diag[i] - ai[i];
830     s1  = tmp[i];
831     while (nz--) {
832       tmp[*vi-- ] -= (*v--)*s1;
833     }
834   }
835 
836   /* copy tmp into x according to permutation */
837   for (i=0; i<n; i++) x[r[i]] = tmp[i];
838 
839   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
840   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
841   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
842   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
843 
844   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
850 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
851 {
852   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
853   IS             iscol = a->col,isrow = a->row;
854   PetscErrorCode ierr;
855   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
856   PetscInt       nz,*rout,*cout,*diag = a->diag;
857   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
858 
859   PetscFunctionBegin;
860   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
861 
862   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
863   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
864   tmp = a->solve_work;
865 
866   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
867   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
868 
869   /* copy the b into temp work space according to permutation */
870   for (i=0; i<n; i++) tmp[i] = b[c[i]];
871 
872   /* forward solve the U^T */
873   for (i=0; i<n; i++) {
874     v   = aa + diag[i] ;
875     vi  = aj + diag[i] + 1;
876     nz  = ai[i+1] - diag[i] - 1;
877     tmp[i] *= *v++;
878     while (nz--) {
879       tmp[*vi++ ] -= (*v++)*tmp[i];
880     }
881   }
882 
883   /* backward solve the L^T */
884   for (i=n-1; i>=0; i--){
885     v   = aa + diag[i] - 1 ;
886     vi  = aj + diag[i] - 1 ;
887     nz  = diag[i] - ai[i];
888     while (nz--) {
889       tmp[*vi-- ] -= (*v--)*tmp[i];
890     }
891   }
892 
893   /* copy tmp into x according to permutation */
894   for (i=0; i<n; i++) x[r[i]] += tmp[i];
895 
896   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
897   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
898   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
899   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
900 
901   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 /* ----------------------------------------------------------------*/
905 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
909 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
910 {
911   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
912   IS                 isicol;
913   PetscErrorCode     ierr;
914   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j;
915   PetscInt           *bi,*cols,nnz,*cols_lvl;
916   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
917   PetscInt           i,levels,diagonal_fill;
918   PetscTruth         col_identity,row_identity;
919   PetscReal          f;
920   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
921   PetscBT            lnkbt;
922   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
923   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
924   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
925 
926   PetscFunctionBegin;
927   f             = info->fill;
928   levels        = (PetscInt)info->levels;
929   diagonal_fill = (PetscInt)info->diagonal_fill;
930   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
931 
932   /* special case that simply copies fill pattern */
933   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
934   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
935   if (!levels && row_identity && col_identity) {
936     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
937     (*fact)->factor = FACTOR_LU;
938     b               = (Mat_SeqAIJ*)(*fact)->data;
939     if (!b->diag) {
940       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
941     }
942     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
943     b->row              = isrow;
944     b->col              = iscol;
945     b->icol             = isicol;
946     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
947     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
948     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
949     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
950     PetscFunctionReturn(0);
951   }
952 
953   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
954   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
955 
956   /* get new row pointers */
957   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
958   bi[0] = 0;
959   /* bdiag is location of diagonal in factor */
960   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
961   bdiag[0]  = 0;
962 
963   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
964   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
965 
966   /* create a linked list for storing column indices of the active row */
967   nlnk = n + 1;
968   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
969 
970   /* initial FreeSpace size is f*(ai[n]+1) */
971   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
972   current_space = free_space;
973   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
974   current_space_lvl = free_space_lvl;
975 
976   for (i=0; i<n; i++) {
977     nzi = 0;
978     /* copy current row into linked list */
979     nnz  = ai[r[i]+1] - ai[r[i]];
980     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
981     cols = aj + ai[r[i]];
982     lnk[i] = -1; /* marker to indicate if diagonal exists */
983     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
984     nzi += nlnk;
985 
986     /* make sure diagonal entry is included */
987     if (diagonal_fill && lnk[i] == -1) {
988       fm = n;
989       while (lnk[fm] < i) fm = lnk[fm];
990       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
991       lnk[fm]    = i;
992       lnk_lvl[i] = 0;
993       nzi++; dcount++;
994     }
995 
996     /* add pivot rows into the active row */
997     nzbd = 0;
998     prow = lnk[n];
999     while (prow < i) {
1000       nnz      = bdiag[prow];
1001       cols     = bj_ptr[prow] + nnz + 1;
1002       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1003       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1004       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1005       nzi += nlnk;
1006       prow = lnk[prow];
1007       nzbd++;
1008     }
1009     bdiag[i] = nzbd;
1010     bi[i+1]  = bi[i] + nzi;
1011 
1012     /* if free space is not available, make more free space */
1013     if (current_space->local_remaining<nzi) {
1014       nnz = nzi*(n - i); /* estimated and max additional space needed */
1015       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1016       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1017       reallocs++;
1018     }
1019 
1020     /* copy data into free_space and free_space_lvl, then initialize lnk */
1021     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1022     bj_ptr[i]    = current_space->array;
1023     bjlvl_ptr[i] = current_space_lvl->array;
1024 
1025     /* make sure the active row i has diagonal entry */
1026     if (*(bj_ptr[i]+bdiag[i]) != i) {
1027       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1028     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1029     }
1030 
1031     current_space->array           += nzi;
1032     current_space->local_used      += nzi;
1033     current_space->local_remaining -= nzi;
1034     current_space_lvl->array           += nzi;
1035     current_space_lvl->local_used      += nzi;
1036     current_space_lvl->local_remaining -= nzi;
1037   }
1038 
1039   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1040   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1041 
1042   /* destroy list of free space and other temporary arrays */
1043   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1044   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1045   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1046   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1047   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1048 
1049 #if defined(PETSC_USE_INFO)
1050   {
1051     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1052     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1053     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1054     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1055     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1056     if (diagonal_fill) {
1057       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1058     }
1059   }
1060 #endif
1061 
1062   /* put together the new matrix */
1063   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1064   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1065   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1066   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1067   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1068   b = (Mat_SeqAIJ*)(*fact)->data;
1069   b->freedata     = PETSC_TRUE;
1070   b->singlemalloc = PETSC_FALSE;
1071   len = (bi[n] )*sizeof(PetscScalar);
1072   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1073   b->j          = bj;
1074   b->i          = bi;
1075   for (i=0; i<n; i++) bdiag[i] += bi[i];
1076   b->diag       = bdiag;
1077   b->ilen       = 0;
1078   b->imax       = 0;
1079   b->row        = isrow;
1080   b->col        = iscol;
1081   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1082   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1083   b->icol       = isicol;
1084   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1085   /* In b structure:  Free imax, ilen, old a, old j.
1086      Allocate bdiag, solve_work, new a, new j */
1087   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1088   b->maxnz             = b->nz = bi[n] ;
1089   (*fact)->factor = FACTOR_LU;
1090   (*fact)->info.factor_mallocs    = reallocs;
1091   (*fact)->info.fill_ratio_given  = f;
1092   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1093 
1094   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1095   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #include "src/mat/impls/sbaij/seq/sbaij.h"
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1104 {
1105   Mat            C = *B;
1106   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1107   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1108   IS             ip=b->row;
1109   PetscErrorCode ierr;
1110   PetscInt       *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1111   PetscInt       *ai=a->i,*aj=a->j;
1112   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1113   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1114   PetscReal      zeropivot,rs,shiftnz;
1115   PetscReal      shiftpd;
1116   ChShift_Ctx    sctx;
1117   PetscInt       newshift;
1118 
1119   PetscFunctionBegin;
1120   shiftnz   = info->shiftnz;
1121   shiftpd   = info->shiftpd;
1122   zeropivot = info->zeropivot;
1123 
1124   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1125 
1126   /* initialization */
1127   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1128   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1129   jl   = il + mbs;
1130   rtmp = (MatScalar*)(jl + mbs);
1131 
1132   sctx.shift_amount = 0;
1133   sctx.nshift       = 0;
1134   do {
1135     sctx.chshift = PETSC_FALSE;
1136     for (i=0; i<mbs; i++) {
1137       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1138     }
1139 
1140     for (k = 0; k<mbs; k++){
1141       bval = ba + bi[k];
1142       /* initialize k-th row by the perm[k]-th row of A */
1143       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1144       for (j = jmin; j < jmax; j++){
1145         col = rip[aj[j]];
1146         if (col >= k){ /* only take upper triangular entry */
1147           rtmp[col] = aa[j];
1148           *bval++  = 0.0; /* for in-place factorization */
1149         }
1150       }
1151       /* shift the diagonal of the matrix */
1152       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1153 
1154       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1155       dk = rtmp[k];
1156       i = jl[k]; /* first row to be added to k_th row  */
1157 
1158       while (i < k){
1159         nexti = jl[i]; /* next row to be added to k_th row */
1160 
1161         /* compute multiplier, update diag(k) and U(i,k) */
1162         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1163         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1164         dk += uikdi*ba[ili];
1165         ba[ili] = uikdi; /* -U(i,k) */
1166 
1167         /* add multiple of row i to k-th row */
1168         jmin = ili + 1; jmax = bi[i+1];
1169         if (jmin < jmax){
1170           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1171           /* update il and jl for row i */
1172           il[i] = jmin;
1173           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1174         }
1175         i = nexti;
1176       }
1177 
1178       /* shift the diagonals when zero pivot is detected */
1179       /* compute rs=sum of abs(off-diagonal) */
1180       rs   = 0.0;
1181       jmin = bi[k]+1;
1182       nz   = bi[k+1] - jmin;
1183       if (nz){
1184         bcol = bj + jmin;
1185         while (nz--){
1186           rs += PetscAbsScalar(rtmp[*bcol]);
1187           bcol++;
1188         }
1189       }
1190 
1191       sctx.rs = rs;
1192       sctx.pv = dk;
1193       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1194       if (newshift == 1){
1195         break;    /* sctx.shift_amount is updated */
1196       } else if (newshift == -1){
1197         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",k,PetscAbsScalar(dk),zeropivot,rs);
1198       }
1199 
1200       /* copy data into U(k,:) */
1201       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1202       jmin = bi[k]+1; jmax = bi[k+1];
1203       if (jmin < jmax) {
1204         for (j=jmin; j<jmax; j++){
1205           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1206         }
1207         /* add the k-th row into il and jl */
1208         il[k] = jmin;
1209         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1210       }
1211     }
1212   } while (sctx.chshift);
1213   ierr = PetscFree(il);CHKERRQ(ierr);
1214 
1215   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1216   C->factor       = FACTOR_CHOLESKY;
1217   C->assembled    = PETSC_TRUE;
1218   C->preallocated = PETSC_TRUE;
1219   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1220   if (sctx.nshift){
1221     if (shiftnz) {
1222       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1223     } else if (shiftpd) {
1224       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1225     }
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1232 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1233 {
1234   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1235   Mat_SeqSBAIJ       *b;
1236   Mat                B;
1237   PetscErrorCode     ierr;
1238   PetscTruth         perm_identity;
1239   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1240   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1241   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1242   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1243   PetscReal          fill=info->fill,levels=info->levels;
1244   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1245   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1246   PetscBT            lnkbt;
1247 
1248   PetscFunctionBegin;
1249   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1250   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1251 
1252   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1253   ui[0] = 0;
1254 
1255   /* special case that simply copies fill pattern */
1256   if (!levels && perm_identity) {
1257     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1258     for (i=0; i<am; i++) {
1259       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1260     }
1261     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1262     cols = uj;
1263     for (i=0; i<am; i++) {
1264       aj    = a->j + a->diag[i];
1265       ncols = ui[i+1] - ui[i];
1266       for (j=0; j<ncols; j++) *cols++ = *aj++;
1267     }
1268   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1269     /* initialization */
1270     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1271 
1272     /* jl: linked list for storing indices of the pivot rows
1273        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1274     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1275     il         = jl + am;
1276     uj_ptr     = (PetscInt**)(il + am);
1277     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1278     for (i=0; i<am; i++){
1279       jl[i] = am; il[i] = 0;
1280     }
1281 
1282     /* create and initialize a linked list for storing column indices of the active row k */
1283     nlnk = am + 1;
1284     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1285 
1286     /* initial FreeSpace size is fill*(ai[am]+1) */
1287     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1288     current_space = free_space;
1289     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1290     current_space_lvl = free_space_lvl;
1291 
1292     for (k=0; k<am; k++){  /* for each active row k */
1293       /* initialize lnk by the column indices of row rip[k] of A */
1294       nzk   = 0;
1295       ncols = ai[rip[k]+1] - ai[rip[k]];
1296       ncols_upper = 0;
1297       for (j=0; j<ncols; j++){
1298         i = *(aj + ai[rip[k]] + j);
1299         if (rip[i] >= k){ /* only take upper triangular entry */
1300           ajtmp[ncols_upper] = i;
1301           ncols_upper++;
1302         }
1303       }
1304       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1305       nzk += nlnk;
1306 
1307       /* update lnk by computing fill-in for each pivot row to be merged in */
1308       prow = jl[k]; /* 1st pivot row */
1309 
1310       while (prow < k){
1311         nextprow = jl[prow];
1312 
1313         /* merge prow into k-th row */
1314         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1315         jmax = ui[prow+1];
1316         ncols = jmax-jmin;
1317         i     = jmin - ui[prow];
1318         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1319         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1320         j     = *(uj - 1);
1321         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1322         nzk += nlnk;
1323 
1324         /* update il and jl for prow */
1325         if (jmin < jmax){
1326           il[prow] = jmin;
1327           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1328         }
1329         prow = nextprow;
1330       }
1331 
1332       /* if free space is not available, make more free space */
1333       if (current_space->local_remaining<nzk) {
1334         i = am - k + 1; /* num of unfactored rows */
1335         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1336         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1337         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1338         reallocs++;
1339       }
1340 
1341       /* copy data into free_space and free_space_lvl, then initialize lnk */
1342       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1343 
1344       /* add the k-th row into il and jl */
1345       if (nzk > 1){
1346         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1347         jl[k] = jl[i]; jl[i] = k;
1348         il[k] = ui[k] + 1;
1349       }
1350       uj_ptr[k]     = current_space->array;
1351       uj_lvl_ptr[k] = current_space_lvl->array;
1352 
1353       current_space->array           += nzk;
1354       current_space->local_used      += nzk;
1355       current_space->local_remaining -= nzk;
1356 
1357       current_space_lvl->array           += nzk;
1358       current_space_lvl->local_used      += nzk;
1359       current_space_lvl->local_remaining -= nzk;
1360 
1361       ui[k+1] = ui[k] + nzk;
1362     }
1363 
1364 #if defined(PETSC_USE_INFO)
1365     if (ai[am] != 0) {
1366       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1367       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1368       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1369       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1370     } else {
1371       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1372     }
1373 #endif
1374 
1375     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1376     ierr = PetscFree(jl);CHKERRQ(ierr);
1377     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1378 
1379     /* destroy list of free space and other temporary array(s) */
1380     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1381     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1382     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1383     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1384 
1385   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1386 
1387   /* put together the new matrix in MATSEQSBAIJ format */
1388   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1389   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1390   B = *fact;
1391   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1392   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1393 
1394   b    = (Mat_SeqSBAIJ*)B->data;
1395   b->singlemalloc = PETSC_FALSE;
1396   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1397   b->j    = uj;
1398   b->i    = ui;
1399   b->diag = 0;
1400   b->ilen = 0;
1401   b->imax = 0;
1402   b->row  = perm;
1403   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1404   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1405   b->icol = perm;
1406   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1407   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1408   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1409   b->maxnz = b->nz = ui[am];
1410 
1411   B->factor                 = FACTOR_CHOLESKY;
1412   B->info.factor_mallocs    = reallocs;
1413   B->info.fill_ratio_given  = fill;
1414   if (ai[am] != 0) {
1415     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1416   } else {
1417     B->info.fill_ratio_needed = 0.0;
1418   }
1419   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1420   if (perm_identity){
1421     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1422     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1423   }
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1429 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1430 {
1431   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1432   Mat_SeqSBAIJ       *b;
1433   Mat                B;
1434   PetscErrorCode     ierr;
1435   PetscTruth         perm_identity;
1436   PetscReal          fill = info->fill;
1437   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1438   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1439   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1440   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1441   PetscBT            lnkbt;
1442   IS                 iperm;
1443 
1444   PetscFunctionBegin;
1445   /* check whether perm is the identity mapping */
1446   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1447   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1448 
1449   if (!perm_identity){
1450     /* check if perm is symmetric! */
1451     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1452     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1453     for (i=0; i<am; i++) {
1454       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1455     }
1456     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1457     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1458   }
1459 
1460   /* initialization */
1461   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1462   ui[0] = 0;
1463 
1464   /* jl: linked list for storing indices of the pivot rows
1465      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1466   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1467   il     = jl + am;
1468   cols   = il + am;
1469   ui_ptr = (PetscInt**)(cols + am);
1470   for (i=0; i<am; i++){
1471     jl[i] = am; il[i] = 0;
1472   }
1473 
1474   /* create and initialize a linked list for storing column indices of the active row k */
1475   nlnk = am + 1;
1476   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1477 
1478   /* initial FreeSpace size is fill*(ai[am]+1) */
1479   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1480   current_space = free_space;
1481 
1482   for (k=0; k<am; k++){  /* for each active row k */
1483     /* initialize lnk by the column indices of row rip[k] of A */
1484     nzk   = 0;
1485     ncols = ai[rip[k]+1] - ai[rip[k]];
1486     ncols_upper = 0;
1487     for (j=0; j<ncols; j++){
1488       i = rip[*(aj + ai[rip[k]] + j)];
1489       if (i >= k){ /* only take upper triangular entry */
1490         cols[ncols_upper] = i;
1491         ncols_upper++;
1492       }
1493     }
1494     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1495     nzk += nlnk;
1496 
1497     /* update lnk by computing fill-in for each pivot row to be merged in */
1498     prow = jl[k]; /* 1st pivot row */
1499 
1500     while (prow < k){
1501       nextprow = jl[prow];
1502       /* merge prow into k-th row */
1503       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1504       jmax = ui[prow+1];
1505       ncols = jmax-jmin;
1506       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1507       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1508       nzk += nlnk;
1509 
1510       /* update il and jl for prow */
1511       if (jmin < jmax){
1512         il[prow] = jmin;
1513         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1514       }
1515       prow = nextprow;
1516     }
1517 
1518     /* if free space is not available, make more free space */
1519     if (current_space->local_remaining<nzk) {
1520       i = am - k + 1; /* num of unfactored rows */
1521       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1522       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1523       reallocs++;
1524     }
1525 
1526     /* copy data into free space, then initialize lnk */
1527     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1528 
1529     /* add the k-th row into il and jl */
1530     if (nzk-1 > 0){
1531       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1532       jl[k] = jl[i]; jl[i] = k;
1533       il[k] = ui[k] + 1;
1534     }
1535     ui_ptr[k] = current_space->array;
1536     current_space->array           += nzk;
1537     current_space->local_used      += nzk;
1538     current_space->local_remaining -= nzk;
1539 
1540     ui[k+1] = ui[k] + nzk;
1541   }
1542 
1543 #if defined(PETSC_USE_INFO)
1544   if (ai[am] != 0) {
1545     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1546     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1547     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1548     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1549   } else {
1550      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1551   }
1552 #endif
1553 
1554   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1555   ierr = PetscFree(jl);CHKERRQ(ierr);
1556 
1557   /* destroy list of free space and other temporary array(s) */
1558   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1559   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1560   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1561 
1562   /* put together the new matrix in MATSEQSBAIJ format */
1563   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1564   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1565   B    = *fact;
1566   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1567   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1568 
1569   b = (Mat_SeqSBAIJ*)B->data;
1570   b->singlemalloc = PETSC_FALSE;
1571   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1572   b->j    = uj;
1573   b->i    = ui;
1574   b->diag = 0;
1575   b->ilen = 0;
1576   b->imax = 0;
1577   b->row  = perm;
1578   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1579   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1580   b->icol = perm;
1581   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1582   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1583   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1584   b->maxnz = b->nz = ui[am];
1585 
1586   B->factor                 = FACTOR_CHOLESKY;
1587   B->info.factor_mallocs    = reallocs;
1588   B->info.fill_ratio_given  = fill;
1589   if (ai[am] != 0) {
1590     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1591   } else {
1592     B->info.fill_ratio_needed = 0.0;
1593   }
1594   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1595   if (perm_identity){
1596     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1597     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601