xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision a96a251defc07734f733d1abf27b5765e87fdc8b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
23 
24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->m;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   ierr = PetscFree(b->imax);CHKERRQ(ierr);
230   b->freedata      = PETSC_TRUE;
231   b->sorted        = PETSC_FALSE;
232   b->singlemalloc  = PETSC_FALSE;
233   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
248 
249   af = ((double)b->nz)/((double)a->nz) + .001;
250   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr);
251   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
252   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
253   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
254 
255   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
264 {
265   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
266   IS             isicol;
267   PetscErrorCode ierr;
268   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
269   PetscInt       *bi,*bj,*ajtmp;
270   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
271   PetscReal      f;
272   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
273   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
274   PetscBT        lnkbt;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
284   bi[0] = 0;
285 
286   /* bdiag is location of diagonal in factor */
287   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
288   bdiag[0] = 0;
289 
290   /* linked list for storing column indices of the active row */
291   nlnk = n + 1;
292   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
293 
294   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
295   im     = cols + n;
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
310     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1;
317       ajtmp   = bi_ptr[row] + nzbd;
318       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
319       im[row] = nzbd;
320       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
321       nzi     += nlnk;
322       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
323 
324       row = lnk[row];
325     }
326 
327     bi[i+1] = bi[i] + nzi;
328     im[i]   = nzi;
329 
330     /* mark bdiag */
331     nzbd = 0;
332     nnz  = nzi;
333     k    = lnk[n];
334     while (nnz-- && k < i){
335       nzbd++;
336       k = lnk[k];
337     }
338     bdiag[i] = bi[i] + nzbd;
339 
340     /* if free space is not available, make more free space */
341     if (current_space->local_remaining<nzi) {
342       nnz = (n - i)*nzi; /* estimated and max additional space needed */
343       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
344       reallocs++;
345     }
346 
347     /* copy data into free space, then initialize lnk */
348     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
349     bi_ptr[i] = current_space->array;
350     current_space->array           += nzi;
351     current_space->local_used      += nzi;
352     current_space->local_remaining -= nzi;
353   }
354 #if defined(PETSC_USE_DEBUG)
355   if (ai[n] != 0) {
356     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
357     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
358     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr);
359     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
360     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
361   } else {
362     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr);
363   }
364 #endif
365 
366   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
367   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
368 
369   /* destroy list of free space and other temporary array(s) */
370   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
371   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
373   ierr = PetscFree(cols);CHKERRQ(ierr);
374 
375   /* put together the new matrix */
376   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
377   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
378   ierr = MatSeqAIJSetPreallocation(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
379   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
380   b    = (Mat_SeqAIJ*)(*B)->data;
381   ierr = PetscFree(b->imax);CHKERRQ(ierr);
382   b->freedata     = PETSC_TRUE;
383   b->singlemalloc = PETSC_FALSE;
384   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
385   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
386   b->j          = bj;
387   b->i          = bi;
388   b->diag       = bdiag;
389   b->ilen       = 0;
390   b->imax       = 0;
391   b->row        = isrow;
392   b->col        = iscol;
393   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
394   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
395   b->icol       = isicol;
396   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
397 
398   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
399   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
400   b->maxnz = b->nz = bi[n] ;
401 
402   (*B)->factor                 =  FACTOR_LU;
403   (*B)->info.factor_mallocs    = reallocs;
404   (*B)->info.fill_ratio_given  = f;
405 
406   if (ai[n] != 0) {
407     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
408   } else {
409     (*B)->info.fill_ratio_needed = 0.0;
410   }
411   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
412   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
413   PetscFunctionReturn(0);
414 }
415 
416 /* ----------------------------------------------------------- */
417 #undef __FUNCT__
418 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
419 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
420 {
421   Mat            C=*B;
422   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
423   IS             isrow = b->row,isicol = b->icol;
424   PetscErrorCode ierr;
425   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
426   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
427   PetscInt       *diag_offset = b->diag,diag,*pj;
428   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
429   PetscScalar    d;
430   PetscReal      rs;
431   LUShift_Ctx    sctx;
432   PetscInt       newshift;
433 
434   PetscFunctionBegin;
435   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
436   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
437   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
438   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
439   rtmps = rtmp; ics = ic;
440 
441   if (!a->diag) {
442     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
443   }
444   /* if both shift schemes are chosen by user, only use info->shiftpd */
445   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
446   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
447     PetscInt *aai = a->i,*ddiag = a->diag;
448     sctx.shift_top = 0;
449     for (i=0; i<n; i++) {
450       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
451       d  = (a->a)[ddiag[i]];
452       rs = -PetscAbsScalar(d) - PetscRealPart(d);
453       v  = a->a+aai[i];
454       nz = aai[i+1] - aai[i];
455       for (j=0; j<nz; j++)
456 	rs += PetscAbsScalar(v[j]);
457       if (rs>sctx.shift_top) sctx.shift_top = rs;
458     }
459     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
460     sctx.shift_top    *= 1.1;
461     sctx.nshift_max   = 5;
462     sctx.shift_lo     = 0.;
463     sctx.shift_hi     = 1.;
464   }
465 
466   sctx.shift_amount = 0;
467   sctx.nshift       = 0;
468   do {
469     sctx.lushift = PETSC_FALSE;
470     for (i=0; i<n; i++){
471       nz    = bi[i+1] - bi[i];
472       bjtmp = bj + bi[i];
473       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
474 
475       /* load in initial (unfactored row) */
476       nz    = a->i[r[i]+1] - a->i[r[i]];
477       ajtmp = a->j + a->i[r[i]];
478       v     = a->a + a->i[r[i]];
479       for (j=0; j<nz; j++) {
480         rtmp[ics[ajtmp[j]]] = v[j];
481       }
482       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
483 
484       row = *bjtmp++;
485       while  (row < i) {
486         pc = rtmp + row;
487         if (*pc != 0.0) {
488           pv         = b->a + diag_offset[row];
489           pj         = b->j + diag_offset[row] + 1;
490           multiplier = *pc / *pv++;
491           *pc        = multiplier;
492           nz         = bi[row+1] - diag_offset[row] - 1;
493           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
494           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
495         }
496         row = *bjtmp++;
497       }
498       /* finished row so stick it into b->a */
499       pv   = b->a + bi[i] ;
500       pj   = b->j + bi[i] ;
501       nz   = bi[i+1] - bi[i];
502       diag = diag_offset[i] - bi[i];
503       rs   = 0.0;
504       for (j=0; j<nz; j++) {
505         pv[j] = rtmps[pj[j]];
506         if (j != diag) rs += PetscAbsScalar(pv[j]);
507       }
508 
509       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
510       sctx.rs  = rs;
511       sctx.pv  = pv[diag];
512       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
513       if (newshift == 1){
514         break;    /* sctx.shift_amount is updated */
515       } else if (newshift == -1){
516         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
517       }
518     }
519 
520     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
521       /*
522        * if no shift in this attempt & shifting & started shifting & can refine,
523        * then try lower shift
524        */
525       sctx.shift_hi        = info->shift_fraction;
526       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
527       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
528       sctx.lushift         = PETSC_TRUE;
529       sctx.nshift++;
530     }
531   } while (sctx.lushift);
532 
533   /* invert diagonal entries for simplier triangular solves */
534   for (i=0; i<n; i++) {
535     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
536   }
537 
538   ierr = PetscFree(rtmp);CHKERRQ(ierr);
539   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
540   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
541   C->factor = FACTOR_LU;
542   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
543   C->assembled = PETSC_TRUE;
544   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
545   if (sctx.nshift){
546     if (info->shiftnz) {
547       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
548     } else if (info->shiftpd) {
549       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr);
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
557 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
558 {
559   PetscFunctionBegin;
560   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
561   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
562   PetscFunctionReturn(0);
563 }
564 
565 
566 /* ----------------------------------------------------------- */
567 #undef __FUNCT__
568 #define __FUNCT__ "MatLUFactor_SeqAIJ"
569 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
570 {
571   PetscErrorCode ierr;
572   Mat            C;
573 
574   PetscFunctionBegin;
575   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
576   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
577   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
578   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 /* ----------------------------------------------------------- */
582 #undef __FUNCT__
583 #define __FUNCT__ "MatSolve_SeqAIJ"
584 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
585 {
586   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
587   IS             iscol = a->col,isrow = a->row;
588   PetscErrorCode ierr;
589   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
590   PetscInt       nz,*rout,*cout;
591   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
592 
593   PetscFunctionBegin;
594   if (!n) PetscFunctionReturn(0);
595 
596   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   tmp  = a->solve_work;
599 
600   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
601   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
602 
603   /* forward solve the lower triangular */
604   tmp[0] = b[*r++];
605   tmps   = tmp;
606   for (i=1; i<n; i++) {
607     v   = aa + ai[i] ;
608     vi  = aj + ai[i] ;
609     nz  = a->diag[i] - ai[i];
610     sum = b[*r++];
611     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
612     tmp[i] = sum;
613   }
614 
615   /* backward solve the upper triangular */
616   for (i=n-1; i>=0; i--){
617     v   = aa + a->diag[i] + 1;
618     vi  = aj + a->diag[i] + 1;
619     nz  = ai[i+1] - a->diag[i] - 1;
620     sum = tmp[i];
621     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
622     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
623   }
624 
625   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
626   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
627   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
628   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
629   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 /* ----------------------------------------------------------- */
634 #undef __FUNCT__
635 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
636 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
637 {
638   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
639   PetscErrorCode ierr;
640   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
641   PetscScalar    *x,*b,*aa = a->a;
642 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
643   PetscInt       adiag_i,i,*vi,nz,ai_i;
644   PetscScalar    *v,sum;
645 #endif
646 
647   PetscFunctionBegin;
648   if (!n) PetscFunctionReturn(0);
649 
650   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
651   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
652 
653 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
654   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
655 #else
656   /* forward solve the lower triangular */
657   x[0] = b[0];
658   for (i=1; i<n; i++) {
659     ai_i = ai[i];
660     v    = aa + ai_i;
661     vi   = aj + ai_i;
662     nz   = adiag[i] - ai_i;
663     sum  = b[i];
664     while (nz--) sum -= *v++ * x[*vi++];
665     x[i] = sum;
666   }
667 
668   /* backward solve the upper triangular */
669   for (i=n-1; i>=0; i--){
670     adiag_i = adiag[i];
671     v       = aa + adiag_i + 1;
672     vi      = aj + adiag_i + 1;
673     nz      = ai[i+1] - adiag_i - 1;
674     sum     = x[i];
675     while (nz--) sum -= *v++ * x[*vi++];
676     x[i]    = sum*aa[adiag_i];
677   }
678 #endif
679   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
680   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
681   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
687 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
688 {
689   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
690   IS             iscol = a->col,isrow = a->row;
691   PetscErrorCode ierr;
692   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
693   PetscInt       nz,*rout,*cout;
694   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
695 
696   PetscFunctionBegin;
697   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
698 
699   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
700   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
701   tmp  = a->solve_work;
702 
703   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
704   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
705 
706   /* forward solve the lower triangular */
707   tmp[0] = b[*r++];
708   for (i=1; i<n; i++) {
709     v   = aa + ai[i] ;
710     vi  = aj + ai[i] ;
711     nz  = a->diag[i] - ai[i];
712     sum = b[*r++];
713     while (nz--) sum -= *v++ * tmp[*vi++ ];
714     tmp[i] = sum;
715   }
716 
717   /* backward solve the upper triangular */
718   for (i=n-1; i>=0; i--){
719     v   = aa + a->diag[i] + 1;
720     vi  = aj + a->diag[i] + 1;
721     nz  = ai[i+1] - a->diag[i] - 1;
722     sum = tmp[i];
723     while (nz--) sum -= *v++ * tmp[*vi++ ];
724     tmp[i] = sum*aa[a->diag[i]];
725     x[*c--] += tmp[i];
726   }
727 
728   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
729   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
730   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
731   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
732   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
733 
734   PetscFunctionReturn(0);
735 }
736 /* -------------------------------------------------------------------*/
737 #undef __FUNCT__
738 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
739 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
740 {
741   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
742   IS             iscol = a->col,isrow = a->row;
743   PetscErrorCode ierr;
744   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
745   PetscInt       nz,*rout,*cout,*diag = a->diag;
746   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
747 
748   PetscFunctionBegin;
749   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
750   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
751   tmp  = a->solve_work;
752 
753   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
754   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
755 
756   /* copy the b into temp work space according to permutation */
757   for (i=0; i<n; i++) tmp[i] = b[c[i]];
758 
759   /* forward solve the U^T */
760   for (i=0; i<n; i++) {
761     v   = aa + diag[i] ;
762     vi  = aj + diag[i] + 1;
763     nz  = ai[i+1] - diag[i] - 1;
764     s1  = tmp[i];
765     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
766     while (nz--) {
767       tmp[*vi++ ] -= (*v++)*s1;
768     }
769     tmp[i] = s1;
770   }
771 
772   /* backward solve the L^T */
773   for (i=n-1; i>=0; i--){
774     v   = aa + diag[i] - 1 ;
775     vi  = aj + diag[i] - 1 ;
776     nz  = diag[i] - ai[i];
777     s1  = tmp[i];
778     while (nz--) {
779       tmp[*vi-- ] -= (*v--)*s1;
780     }
781   }
782 
783   /* copy tmp into x according to permutation */
784   for (i=0; i<n; i++) x[r[i]] = tmp[i];
785 
786   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
787   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
788   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
789   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
790 
791   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
797 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
798 {
799   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
800   IS             iscol = a->col,isrow = a->row;
801   PetscErrorCode ierr;
802   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
803   PetscInt       nz,*rout,*cout,*diag = a->diag;
804   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
805 
806   PetscFunctionBegin;
807   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
808 
809   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
810   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
811   tmp = a->solve_work;
812 
813   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
814   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
815 
816   /* copy the b into temp work space according to permutation */
817   for (i=0; i<n; i++) tmp[i] = b[c[i]];
818 
819   /* forward solve the U^T */
820   for (i=0; i<n; i++) {
821     v   = aa + diag[i] ;
822     vi  = aj + diag[i] + 1;
823     nz  = ai[i+1] - diag[i] - 1;
824     tmp[i] *= *v++;
825     while (nz--) {
826       tmp[*vi++ ] -= (*v++)*tmp[i];
827     }
828   }
829 
830   /* backward solve the L^T */
831   for (i=n-1; i>=0; i--){
832     v   = aa + diag[i] - 1 ;
833     vi  = aj + diag[i] - 1 ;
834     nz  = diag[i] - ai[i];
835     while (nz--) {
836       tmp[*vi-- ] -= (*v--)*tmp[i];
837     }
838   }
839 
840   /* copy tmp into x according to permutation */
841   for (i=0; i<n; i++) x[r[i]] += tmp[i];
842 
843   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
844   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
845   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
846   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
847 
848   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 /* ----------------------------------------------------------------*/
852 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
856 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
857 {
858   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
859   IS             isicol;
860   PetscErrorCode ierr;
861   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
862   PetscInt       *bi,*cols,nnz,*cols_lvl;
863   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
864   PetscInt       i,levels,diagonal_fill;
865   PetscTruth     col_identity,row_identity;
866   PetscReal      f;
867   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
868   PetscBT        lnkbt;
869   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
870   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
871   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
872 
873   PetscFunctionBegin;
874   f             = info->fill;
875   levels        = (PetscInt)info->levels;
876   diagonal_fill = (PetscInt)info->diagonal_fill;
877   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
878 
879   /* special case that simply copies fill pattern */
880   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
881   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
882   if (!levels && row_identity && col_identity) {
883     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
884     (*fact)->factor = FACTOR_LU;
885     b               = (Mat_SeqAIJ*)(*fact)->data;
886     if (!b->diag) {
887       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
888     }
889     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
890     b->row              = isrow;
891     b->col              = iscol;
892     b->icol             = isicol;
893     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
894     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
895     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
896     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
897     PetscFunctionReturn(0);
898   }
899 
900   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
901   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
902 
903   /* get new row pointers */
904   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
905   bi[0] = 0;
906   /* bdiag is location of diagonal in factor */
907   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
908   bdiag[0]  = 0;
909 
910   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
911   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
912 
913   /* create a linked list for storing column indices of the active row */
914   nlnk = n + 1;
915   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
916 
917   /* initial FreeSpace size is f*(ai[n]+1) */
918   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
919   current_space = free_space;
920   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
921   current_space_lvl = free_space_lvl;
922 
923   for (i=0; i<n; i++) {
924     nzi = 0;
925     /* copy current row into linked list */
926     nnz  = ai[r[i]+1] - ai[r[i]];
927     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
928     cols = aj + ai[r[i]];
929     lnk[i] = -1; /* marker to indicate if diagonal exists */
930     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
931     nzi += nlnk;
932 
933     /* make sure diagonal entry is included */
934     if (diagonal_fill && lnk[i] == -1) {
935       fm = n;
936       while (lnk[fm] < i) fm = lnk[fm];
937       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
938       lnk[fm]    = i;
939       lnk_lvl[i] = 0;
940       nzi++; dcount++;
941     }
942 
943     /* add pivot rows into the active row */
944     nzbd = 0;
945     prow = lnk[n];
946     while (prow < i) {
947       nnz      = bdiag[prow];
948       cols     = bj_ptr[prow] + nnz + 1;
949       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
950       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
951       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
952       nzi += nlnk;
953       prow = lnk[prow];
954       nzbd++;
955     }
956     bdiag[i] = nzbd;
957     bi[i+1]  = bi[i] + nzi;
958 
959     /* if free space is not available, make more free space */
960     if (current_space->local_remaining<nzi) {
961       nnz = nzi*(n - i); /* estimated and max additional space needed */
962       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
963       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
964       reallocs++;
965     }
966 
967     /* copy data into free_space and free_space_lvl, then initialize lnk */
968     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
969     bj_ptr[i]    = current_space->array;
970     bjlvl_ptr[i] = current_space_lvl->array;
971 
972     /* make sure the active row i has diagonal entry */
973     if (*(bj_ptr[i]+bdiag[i]) != i) {
974       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
975     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
976     }
977 
978     current_space->array           += nzi;
979     current_space->local_used      += nzi;
980     current_space->local_remaining -= nzi;
981     current_space_lvl->array           += nzi;
982     current_space_lvl->local_used      += nzi;
983     current_space_lvl->local_remaining -= nzi;
984   }
985 
986   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
987   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
988 
989   /* destroy list of free space and other temporary arrays */
990   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
991   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
992   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
993   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
994   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
995 
996 #if defined(PETSC_USE_DEBUG)
997   {
998     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
999     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
1000     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
1001     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr);
1002     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
1003     if (diagonal_fill) {
1004       ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr);
1005     }
1006   }
1007 #endif
1008 
1009   /* put together the new matrix */
1010   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1011   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1012   ierr = MatSeqAIJSetPreallocation(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1013   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1014   b = (Mat_SeqAIJ*)(*fact)->data;
1015   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1016   b->freedata     = PETSC_TRUE;
1017   b->singlemalloc = PETSC_FALSE;
1018   len = (bi[n] )*sizeof(PetscScalar);
1019   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1020   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1021   b->j          = bj;
1022   b->i          = bi;
1023   for (i=0; i<n; i++) bdiag[i] += bi[i];
1024   b->diag       = bdiag;
1025   b->ilen       = 0;
1026   b->imax       = 0;
1027   b->row        = isrow;
1028   b->col        = iscol;
1029   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1030   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1031   b->icol       = isicol;
1032   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1033   /* In b structure:  Free imax, ilen, old a, old j.
1034      Allocate bdiag, solve_work, new a, new j */
1035   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1036   b->maxnz             = b->nz = bi[n] ;
1037   (*fact)->factor = FACTOR_LU;
1038   (*fact)->info.factor_mallocs    = reallocs;
1039   (*fact)->info.fill_ratio_given  = f;
1040   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1041 
1042   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1043   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1044 
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #include "src/mat/impls/sbaij/seq/sbaij.h"
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1051 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1052 {
1053   Mat            C = *B;
1054   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1055   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1056   IS             ip=b->row;
1057   PetscErrorCode ierr;
1058   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1059   PetscInt       *ai=a->i,*aj=a->j;
1060   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1061   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1062   PetscReal      zeropivot,rs,shiftnz;
1063   PetscTruth     shiftpd;
1064   ChShift_Ctx    sctx;
1065   PetscInt       newshift;
1066 
1067   PetscFunctionBegin;
1068   shiftnz   = info->shiftnz;
1069   shiftpd   = info->shiftpd;
1070   zeropivot = info->zeropivot;
1071 
1072   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1073 
1074   /* initialization */
1075   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1076   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1077   jl   = il + mbs;
1078   rtmp = (MatScalar*)(jl + mbs);
1079 
1080   sctx.shift_amount = 0;
1081   sctx.nshift       = 0;
1082   do {
1083     sctx.chshift = PETSC_FALSE;
1084     for (i=0; i<mbs; i++) {
1085       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1086     }
1087 
1088     for (k = 0; k<mbs; k++){
1089       bval = ba + bi[k];
1090       /* initialize k-th row by the perm[k]-th row of A */
1091       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1092       for (j = jmin; j < jmax; j++){
1093         col = rip[aj[j]];
1094         if (col >= k){ /* only take upper triangular entry */
1095           rtmp[col] = aa[j];
1096           *bval++  = 0.0; /* for in-place factorization */
1097         }
1098       }
1099       /* shift the diagonal of the matrix */
1100       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1101 
1102       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1103       dk = rtmp[k];
1104       i = jl[k]; /* first row to be added to k_th row  */
1105 
1106       while (i < k){
1107         nexti = jl[i]; /* next row to be added to k_th row */
1108 
1109         /* compute multiplier, update diag(k) and U(i,k) */
1110         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1111         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1112         dk += uikdi*ba[ili];
1113         ba[ili] = uikdi; /* -U(i,k) */
1114 
1115         /* add multiple of row i to k-th row */
1116         jmin = ili + 1; jmax = bi[i+1];
1117         if (jmin < jmax){
1118           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1119           /* update il and jl for row i */
1120           il[i] = jmin;
1121           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1122         }
1123         i = nexti;
1124       }
1125 
1126       /* shift the diagonals when zero pivot is detected */
1127       /* compute rs=sum of abs(off-diagonal) */
1128       rs   = 0.0;
1129       jmin = bi[k]+1;
1130       nz   = bi[k+1] - jmin;
1131       if (nz){
1132         bcol = bj + jmin;
1133         while (nz--){
1134           rs += PetscAbsScalar(rtmp[*bcol]);
1135           bcol++;
1136         }
1137       }
1138 
1139       sctx.rs = rs;
1140       sctx.pv = dk;
1141       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1142       if (newshift == 1){
1143         break;    /* sctx.shift_amount is updated */
1144       } else if (newshift == -1){
1145         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1146       }
1147 
1148       /* copy data into U(k,:) */
1149       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1150       jmin = bi[k]+1; jmax = bi[k+1];
1151       if (jmin < jmax) {
1152         for (j=jmin; j<jmax; j++){
1153           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1154         }
1155         /* add the k-th row into il and jl */
1156         il[k] = jmin;
1157         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1158       }
1159     }
1160   } while (sctx.chshift);
1161   ierr = PetscFree(il);CHKERRQ(ierr);
1162 
1163   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1164   C->factor       = FACTOR_CHOLESKY;
1165   C->assembled    = PETSC_TRUE;
1166   C->preallocated = PETSC_TRUE;
1167   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1168   if (sctx.nshift){
1169     if (shiftnz) {
1170       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1171     } else if (shiftpd) {
1172       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1173     }
1174   }
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1180 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1181 {
1182   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1183   Mat_SeqSBAIJ   *b;
1184   Mat            B;
1185   PetscErrorCode ierr;
1186   PetscTruth     perm_identity;
1187   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1188   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1189   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1190   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1191   PetscReal      fill=info->fill,levels=info->levels;
1192   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1193   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1194   PetscBT        lnkbt;
1195 
1196   PetscFunctionBegin;
1197   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1198   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1199 
1200   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1201   ui[0] = 0;
1202 
1203   /* special case that simply copies fill pattern */
1204   if (!levels && perm_identity) {
1205     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1206     for (i=0; i<am; i++) {
1207       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1208     }
1209     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1210     cols = uj;
1211     for (i=0; i<am; i++) {
1212       aj    = a->j + a->diag[i];
1213       ncols = ui[i+1] - ui[i];
1214       for (j=0; j<ncols; j++) *cols++ = *aj++;
1215     }
1216   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1217     /* initialization */
1218     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1219 
1220     /* jl: linked list for storing indices of the pivot rows
1221        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1222     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1223     il         = jl + am;
1224     uj_ptr     = (PetscInt**)(il + am);
1225     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1226     for (i=0; i<am; i++){
1227       jl[i] = am; il[i] = 0;
1228     }
1229 
1230     /* create and initialize a linked list for storing column indices of the active row k */
1231     nlnk = am + 1;
1232     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1233 
1234     /* initial FreeSpace size is fill*(ai[am]+1) */
1235     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1236     current_space = free_space;
1237     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1238     current_space_lvl = free_space_lvl;
1239 
1240     for (k=0; k<am; k++){  /* for each active row k */
1241       /* initialize lnk by the column indices of row rip[k] of A */
1242       nzk   = 0;
1243       ncols = ai[rip[k]+1] - ai[rip[k]];
1244       ncols_upper = 0;
1245       for (j=0; j<ncols; j++){
1246         i = *(aj + ai[rip[k]] + j);
1247         if (rip[i] >= k){ /* only take upper triangular entry */
1248           ajtmp[ncols_upper] = i;
1249           ncols_upper++;
1250         }
1251       }
1252       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1253       nzk += nlnk;
1254 
1255       /* update lnk by computing fill-in for each pivot row to be merged in */
1256       prow = jl[k]; /* 1st pivot row */
1257 
1258       while (prow < k){
1259         nextprow = jl[prow];
1260 
1261         /* merge prow into k-th row */
1262         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1263         jmax = ui[prow+1];
1264         ncols = jmax-jmin;
1265         i     = jmin - ui[prow];
1266         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1267         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1268         j     = *(uj - 1);
1269         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1270         nzk += nlnk;
1271 
1272         /* update il and jl for prow */
1273         if (jmin < jmax){
1274           il[prow] = jmin;
1275           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1276         }
1277         prow = nextprow;
1278       }
1279 
1280       /* if free space is not available, make more free space */
1281       if (current_space->local_remaining<nzk) {
1282         i = am - k + 1; /* num of unfactored rows */
1283         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1284         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1285         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1286         reallocs++;
1287       }
1288 
1289       /* copy data into free_space and free_space_lvl, then initialize lnk */
1290       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1291 
1292       /* add the k-th row into il and jl */
1293       if (nzk > 1){
1294         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1295         jl[k] = jl[i]; jl[i] = k;
1296         il[k] = ui[k] + 1;
1297       }
1298       uj_ptr[k]     = current_space->array;
1299       uj_lvl_ptr[k] = current_space_lvl->array;
1300 
1301       current_space->array           += nzk;
1302       current_space->local_used      += nzk;
1303       current_space->local_remaining -= nzk;
1304 
1305       current_space_lvl->array           += nzk;
1306       current_space_lvl->local_used      += nzk;
1307       current_space_lvl->local_remaining -= nzk;
1308 
1309       ui[k+1] = ui[k] + nzk;
1310     }
1311 
1312 #if defined(PETSC_USE_DEBUG)
1313     if (ai[am] != 0) {
1314       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1315       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1316       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1317       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1318     } else {
1319       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1320     }
1321 #endif
1322 
1323     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1324     ierr = PetscFree(jl);CHKERRQ(ierr);
1325     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1326 
1327     /* destroy list of free space and other temporary array(s) */
1328     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1329     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1330     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1331     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1332 
1333   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1334 
1335   /* put together the new matrix in MATSEQSBAIJ format */
1336   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1337   B = *fact;
1338   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1339   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1340 
1341   b    = (Mat_SeqSBAIJ*)B->data;
1342   /* the next line frees the default space generated by the Create() */
1343   ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
1344   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1345   b->singlemalloc = PETSC_FALSE;
1346   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1347   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1348   b->j    = uj;
1349   b->i    = ui;
1350   b->diag = 0;
1351   b->ilen = 0;
1352   b->imax = 0;
1353   b->row  = perm;
1354   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1355   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1356   b->icol = perm;
1357   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1358   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1359   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1360   b->maxnz = b->nz = ui[am];
1361 
1362   B->factor                 = FACTOR_CHOLESKY;
1363   B->info.factor_mallocs    = reallocs;
1364   B->info.fill_ratio_given  = fill;
1365   if (ai[am] != 0) {
1366     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1367   } else {
1368     B->info.fill_ratio_needed = 0.0;
1369   }
1370   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1371   if (perm_identity){
1372     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1373     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1380 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1381 {
1382   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1383   Mat_SeqSBAIJ   *b;
1384   Mat            B;
1385   PetscErrorCode ierr;
1386   PetscTruth     perm_identity;
1387   PetscReal      fill = info->fill;
1388   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1389   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1390   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1391   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1392   PetscBT        lnkbt;
1393   IS             iperm;
1394 
1395   PetscFunctionBegin;
1396   /* check whether perm is the identity mapping */
1397   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1398   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1399 
1400   if (!perm_identity){
1401     /* check if perm is symmetric! */
1402     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1403     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1404     for (i=0; i<am; i++) {
1405       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1406     }
1407     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1408     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1409   }
1410 
1411   /* initialization */
1412   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1413   ui[0] = 0;
1414 
1415   /* jl: linked list for storing indices of the pivot rows
1416      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1417   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1418   il     = jl + am;
1419   cols   = il + am;
1420   ui_ptr = (PetscInt**)(cols + am);
1421   for (i=0; i<am; i++){
1422     jl[i] = am; il[i] = 0;
1423   }
1424 
1425   /* create and initialize a linked list for storing column indices of the active row k */
1426   nlnk = am + 1;
1427   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1428 
1429   /* initial FreeSpace size is fill*(ai[am]+1) */
1430   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1431   current_space = free_space;
1432 
1433   for (k=0; k<am; k++){  /* for each active row k */
1434     /* initialize lnk by the column indices of row rip[k] of A */
1435     nzk   = 0;
1436     ncols = ai[rip[k]+1] - ai[rip[k]];
1437     ncols_upper = 0;
1438     for (j=0; j<ncols; j++){
1439       i = rip[*(aj + ai[rip[k]] + j)];
1440       if (i >= k){ /* only take upper triangular entry */
1441         cols[ncols_upper] = i;
1442         ncols_upper++;
1443       }
1444     }
1445     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1446     nzk += nlnk;
1447 
1448     /* update lnk by computing fill-in for each pivot row to be merged in */
1449     prow = jl[k]; /* 1st pivot row */
1450 
1451     while (prow < k){
1452       nextprow = jl[prow];
1453       /* merge prow into k-th row */
1454       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1455       jmax = ui[prow+1];
1456       ncols = jmax-jmin;
1457       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1458       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1459       nzk += nlnk;
1460 
1461       /* update il and jl for prow */
1462       if (jmin < jmax){
1463         il[prow] = jmin;
1464         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1465       }
1466       prow = nextprow;
1467     }
1468 
1469     /* if free space is not available, make more free space */
1470     if (current_space->local_remaining<nzk) {
1471       i = am - k + 1; /* num of unfactored rows */
1472       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1473       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1474       reallocs++;
1475     }
1476 
1477     /* copy data into free space, then initialize lnk */
1478     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1479 
1480     /* add the k-th row into il and jl */
1481     if (nzk-1 > 0){
1482       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1483       jl[k] = jl[i]; jl[i] = k;
1484       il[k] = ui[k] + 1;
1485     }
1486     ui_ptr[k] = current_space->array;
1487     current_space->array           += nzk;
1488     current_space->local_used      += nzk;
1489     current_space->local_remaining -= nzk;
1490 
1491     ui[k+1] = ui[k] + nzk;
1492   }
1493 
1494 #if defined(PETSC_USE_DEBUG)
1495   if (ai[am] != 0) {
1496     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1497     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1498     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1499     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1500   } else {
1501      ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1502   }
1503 #endif
1504 
1505   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1506   ierr = PetscFree(jl);CHKERRQ(ierr);
1507 
1508   /* destroy list of free space and other temporary array(s) */
1509   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1510   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1511   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1512 
1513   /* put together the new matrix in MATSEQSBAIJ format */
1514   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1515   B    = *fact;
1516   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1517   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1518 
1519   b = (Mat_SeqSBAIJ*)B->data;
1520   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1521   b->singlemalloc = PETSC_FALSE;
1522   /* the next line frees the default space generated by the Create() */
1523   ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
1524   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1525   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1526   b->j    = uj;
1527   b->i    = ui;
1528   b->diag = 0;
1529   b->ilen = 0;
1530   b->imax = 0;
1531   b->row  = perm;
1532   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1533   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1534   b->icol = perm;
1535   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1536   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1537   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1538   b->maxnz = b->nz = ui[am];
1539 
1540   B->factor                 = FACTOR_CHOLESKY;
1541   B->info.factor_mallocs    = reallocs;
1542   B->info.fill_ratio_given  = fill;
1543   if (ai[am] != 0) {
1544     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1545   } else {
1546     B->info.fill_ratio_needed = 0.0;
1547   }
1548   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1549   if (perm_identity){
1550     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1551     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555