xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 6a7c8fc2a6aac83ad1069891e131b23ca2c55a16)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
29   /* ------------------------------------------------------------
30 
31           This interface was contribed by Tony Caola
32 
33      This routine is an interface to the pivoting drop-tolerance
34      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
35      SPARSEKIT2.
36 
37      The SPARSEKIT2 routines used here are covered by the GNU
38      copyright; see the file gnu in this directory.
39 
40      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
41      help in getting this routine ironed out.
42 
43      The major drawback to this routine is that if info->fill is
44      not large enough it fails rather than allocating more space;
45      this can be fixed by hacking/improving the f2c version of
46      Yousef Saad's code.
47 
48      ------------------------------------------------------------
49 */
50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
51 {
52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
53   PetscFunctionBegin;
54   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
55   You can obtain the drop tolerance routines by installing PETSc from\n\
56   www.mcs.anl.gov/petsc\n");
57 #else
58   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
59   IS             iscolf,isicol,isirow;
60   PetscTruth     reorder;
61   PetscErrorCode ierr,sierr;
62   PetscInt       *c,*r,*ic,i,n = A->m;
63   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
64   PetscInt       *ordcol,*iwk,*iperm,*jw;
65   PetscInt       jmax,lfill,job,*o_i,*o_j;
66   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
67   PetscReal      af;
68 
69   PetscFunctionBegin;
70 
71   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
72   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
73   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
74   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
75   lfill   = (PetscInt)(info->dtcount/2.0);
76   jmax    = (PetscInt)(info->fill*a->nz);
77 
78 
79   /* ------------------------------------------------------------
80      If reorder=.TRUE., then the original matrix has to be
81      reordered to reflect the user selected ordering scheme, and
82      then de-reordered so it is in it's original format.
83      Because Saad's dperm() is NOT in place, we have to copy
84      the original matrix and allocate more storage. . .
85      ------------------------------------------------------------
86   */
87 
88   /* set reorder to true if either isrow or iscol is not identity */
89   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
90   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
91   reorder = PetscNot(reorder);
92 
93 
94   /* storage for ilu factor */
95   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
98   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
99 
100   /* ------------------------------------------------------------
101      Make sure that everything is Fortran formatted (1-Based)
102      ------------------------------------------------------------
103   */
104   for (i=old_i[0];i<old_i[n];i++) {
105     old_j[i]++;
106   }
107   for(i=0;i<n+1;i++) {
108     old_i[i]++;
109   };
110 
111 
112   if (reorder) {
113     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
114     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
115     for(i=0;i<n;i++) {
116       r[i]  = r[i]+1;
117       c[i]  = c[i]+1;
118     }
119     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
122     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123     for (i=0;i<n;i++) {
124       r[i]  = r[i]-1;
125       c[i]  = c[i]-1;
126     }
127     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
128     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
222   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
223   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
224   (*fact)->factor    = FACTOR_LU;
225   (*fact)->assembled = PETSC_TRUE;
226 
227   b = (Mat_SeqAIJ*)(*fact)->data;
228   ierr = PetscFree(b->imax);CHKERRQ(ierr);
229   b->sorted        = PETSC_FALSE;
230   b->singlemalloc  = PETSC_FALSE;
231   /* the next line frees the default space generated by the MatCreate() */
232   ierr             = PetscFree(b->a);CHKERRQ(ierr);
233   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
248 
249   af = ((double)b->nz)/((double)a->nz) + .001;
250   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr);
251   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
252   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
253   ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
254 
255   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
264 {
265   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
266   IS             isicol;
267   PetscErrorCode ierr;
268   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
269   PetscInt       *bi,*bj,*ajtmp;
270   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
271   PetscReal      f;
272   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
273   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
274   PetscBT        lnkbt;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
284   bi[0] = 0;
285 
286   /* bdiag is location of diagonal in factor */
287   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
288   bdiag[0] = 0;
289 
290   /* linked list for storing column indices of the active row */
291   nlnk = n + 1;
292   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
293 
294   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
295   im     = cols + n;
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
310     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1;
317       ajtmp   = bi_ptr[row] + nzbd;
318       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
319       im[row] = nzbd;
320       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
321       nzi     += nlnk;
322       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
323 
324       row = lnk[row];
325     }
326 
327     bi[i+1] = bi[i] + nzi;
328     im[i]   = nzi;
329 
330     /* mark bdiag */
331     nzbd = 0;
332     nnz  = nzi;
333     k    = lnk[n];
334     while (nnz-- && k < i){
335       nzbd++;
336       k = lnk[k];
337     }
338     bdiag[i] = bi[i] + nzbd;
339 
340     /* if free space is not available, make more free space */
341     if (current_space->local_remaining<nzi) {
342       nnz = (n - i)*nzi; /* estimated and max additional space needed */
343       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
344       reallocs++;
345     }
346 
347     /* copy data into free space, then initialize lnk */
348     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
349     bi_ptr[i] = current_space->array;
350     current_space->array           += nzi;
351     current_space->local_used      += nzi;
352     current_space->local_remaining -= nzi;
353   }
354 #if defined(PETSC_USE_DEBUG)
355   if (ai[n] != 0) {
356     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
357     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
358     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr);
359     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr);
360     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
361   } else {
362     ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr);
363   }
364 #endif
365 
366   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
367   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
368 
369   /* destroy list of free space and other temporary array(s) */
370   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
371   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
373   ierr = PetscFree(cols);CHKERRQ(ierr);
374 
375   /* put together the new matrix */
376   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
377   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
378   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
379   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
380   b    = (Mat_SeqAIJ*)(*B)->data;
381   ierr = PetscFree(b->imax);CHKERRQ(ierr);
382   b->singlemalloc = PETSC_FALSE;
383   /* the next line frees the default space generated by the Create() */
384   ierr = PetscFree(b->a);CHKERRQ(ierr);
385   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
386   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
387   b->j          = bj;
388   b->i          = bi;
389   b->diag       = bdiag;
390   b->ilen       = 0;
391   b->imax       = 0;
392   b->row        = isrow;
393   b->col        = iscol;
394   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
395   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
396   b->icol       = isicol;
397   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
398 
399   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
400   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
401   b->maxnz = b->nz = bi[n] ;
402 
403   (*B)->factor                 =  FACTOR_LU;
404   (*B)->info.factor_mallocs    = reallocs;
405   (*B)->info.fill_ratio_given  = f;
406 
407   if (ai[n] != 0) {
408     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
409   } else {
410     (*B)->info.fill_ratio_needed = 0.0;
411   }
412   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
413   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
414   PetscFunctionReturn(0);
415 }
416 
417 /* ----------------------------------------------------------- */
418 #undef __FUNCT__
419 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
420 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
421 {
422   Mat            C=*B;
423   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
424   IS             isrow = b->row,isicol = b->icol;
425   PetscErrorCode ierr;
426   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
427   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
428   PetscInt       *diag_offset = b->diag,diag,*pj;
429   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
430   PetscScalar    d;
431   PetscReal      rs;
432   LUShift_Ctx    sctx;
433   PetscInt       newshift;
434 
435   PetscFunctionBegin;
436   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
437   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
438   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
439   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
440   rtmps = rtmp; ics = ic;
441 
442   if (!a->diag) {
443     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
444   }
445   /* if both shift schemes are chosen by user, only use info->shiftpd */
446   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
447   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
448     PetscInt *aai = a->i,*ddiag = a->diag;
449     sctx.shift_top = 0;
450     for (i=0; i<n; i++) {
451       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
452       d  = (a->a)[ddiag[i]];
453       rs = -PetscAbsScalar(d) - PetscRealPart(d);
454       v  = a->a+aai[i];
455       nz = aai[i+1] - aai[i];
456       for (j=0; j<nz; j++)
457 	rs += PetscAbsScalar(v[j]);
458       if (rs>sctx.shift_top) sctx.shift_top = rs;
459     }
460     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
461     sctx.shift_top    *= 1.1;
462     sctx.nshift_max   = 5;
463     sctx.shift_lo     = 0.;
464     sctx.shift_hi     = 1.;
465   }
466 
467   sctx.shift_amount = 0;
468   sctx.nshift       = 0;
469   do {
470     sctx.lushift = PETSC_FALSE;
471     for (i=0; i<n; i++){
472       nz    = bi[i+1] - bi[i];
473       bjtmp = bj + bi[i];
474       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
475 
476       /* load in initial (unfactored row) */
477       nz    = a->i[r[i]+1] - a->i[r[i]];
478       ajtmp = a->j + a->i[r[i]];
479       v     = a->a + a->i[r[i]];
480       for (j=0; j<nz; j++) {
481         rtmp[ics[ajtmp[j]]] = v[j];
482       }
483       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
484 
485       row = *bjtmp++;
486       while  (row < i) {
487         pc = rtmp + row;
488         if (*pc != 0.0) {
489           pv         = b->a + diag_offset[row];
490           pj         = b->j + diag_offset[row] + 1;
491           multiplier = *pc / *pv++;
492           *pc        = multiplier;
493           nz         = bi[row+1] - diag_offset[row] - 1;
494           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
495           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
496         }
497         row = *bjtmp++;
498       }
499       /* finished row so stick it into b->a */
500       pv   = b->a + bi[i] ;
501       pj   = b->j + bi[i] ;
502       nz   = bi[i+1] - bi[i];
503       diag = diag_offset[i] - bi[i];
504       rs   = 0.0;
505       for (j=0; j<nz; j++) {
506         pv[j] = rtmps[pj[j]];
507         if (j != diag) rs += PetscAbsScalar(pv[j]);
508       }
509 
510       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
511       sctx.rs  = rs;
512       sctx.pv  = pv[diag];
513       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
514       if (newshift == 1){
515         break;    /* sctx.shift_amount is updated */
516       } else if (newshift == -1){
517         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
518       }
519     }
520 
521     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
522       /*
523        * if no shift in this attempt & shifting & started shifting & can refine,
524        * then try lower shift
525        */
526       sctx.shift_hi        = info->shift_fraction;
527       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
528       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
529       sctx.lushift         = PETSC_TRUE;
530       sctx.nshift++;
531     }
532   } while (sctx.lushift);
533 
534   /* invert diagonal entries for simplier triangular solves */
535   for (i=0; i<n; i++) {
536     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
537   }
538 
539   ierr = PetscFree(rtmp);CHKERRQ(ierr);
540   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
541   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
542   C->factor = FACTOR_LU;
543   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
544   C->assembled = PETSC_TRUE;
545   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
546   if (sctx.nshift){
547     if (info->shiftnz) {
548       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
549     } else if (info->shiftpd) {
550       ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr);
551     }
552   }
553   PetscFunctionReturn(0);
554 }
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
558 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
559 {
560   PetscFunctionBegin;
561   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
562   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
563   PetscFunctionReturn(0);
564 }
565 
566 
567 /* ----------------------------------------------------------- */
568 #undef __FUNCT__
569 #define __FUNCT__ "MatLUFactor_SeqAIJ"
570 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
571 {
572   PetscErrorCode ierr;
573   Mat            C;
574 
575   PetscFunctionBegin;
576   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
577   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
578   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
579   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 /* ----------------------------------------------------------- */
583 #undef __FUNCT__
584 #define __FUNCT__ "MatSolve_SeqAIJ"
585 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
586 {
587   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
588   IS             iscol = a->col,isrow = a->row;
589   PetscErrorCode ierr;
590   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
591   PetscInt       nz,*rout,*cout;
592   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
593 
594   PetscFunctionBegin;
595   if (!n) PetscFunctionReturn(0);
596 
597   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
598   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
599   tmp  = a->solve_work;
600 
601   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
602   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
603 
604   /* forward solve the lower triangular */
605   tmp[0] = b[*r++];
606   tmps   = tmp;
607   for (i=1; i<n; i++) {
608     v   = aa + ai[i] ;
609     vi  = aj + ai[i] ;
610     nz  = a->diag[i] - ai[i];
611     sum = b[*r++];
612     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
613     tmp[i] = sum;
614   }
615 
616   /* backward solve the upper triangular */
617   for (i=n-1; i>=0; i--){
618     v   = aa + a->diag[i] + 1;
619     vi  = aj + a->diag[i] + 1;
620     nz  = ai[i+1] - a->diag[i] - 1;
621     sum = tmp[i];
622     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
623     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
624   }
625 
626   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
627   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
628   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
629   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
630   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 /* ----------------------------------------------------------- */
635 #undef __FUNCT__
636 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
637 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
638 {
639   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
640   PetscErrorCode ierr;
641   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
642   PetscScalar    *x,*b,*aa = a->a;
643 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
644   PetscInt       adiag_i,i,*vi,nz,ai_i;
645   PetscScalar    *v,sum;
646 #endif
647 
648   PetscFunctionBegin;
649   if (!n) PetscFunctionReturn(0);
650 
651   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
652   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
653 
654 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
655   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
656 #else
657   /* forward solve the lower triangular */
658   x[0] = b[0];
659   for (i=1; i<n; i++) {
660     ai_i = ai[i];
661     v    = aa + ai_i;
662     vi   = aj + ai_i;
663     nz   = adiag[i] - ai_i;
664     sum  = b[i];
665     while (nz--) sum -= *v++ * x[*vi++];
666     x[i] = sum;
667   }
668 
669   /* backward solve the upper triangular */
670   for (i=n-1; i>=0; i--){
671     adiag_i = adiag[i];
672     v       = aa + adiag_i + 1;
673     vi      = aj + adiag_i + 1;
674     nz      = ai[i+1] - adiag_i - 1;
675     sum     = x[i];
676     while (nz--) sum -= *v++ * x[*vi++];
677     x[i]    = sum*aa[adiag_i];
678   }
679 #endif
680   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
681   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
682   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
688 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
689 {
690   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
691   IS             iscol = a->col,isrow = a->row;
692   PetscErrorCode ierr;
693   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
694   PetscInt       nz,*rout,*cout;
695   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
696 
697   PetscFunctionBegin;
698   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
699 
700   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   tmp  = a->solve_work;
703 
704   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
705   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
706 
707   /* forward solve the lower triangular */
708   tmp[0] = b[*r++];
709   for (i=1; i<n; i++) {
710     v   = aa + ai[i] ;
711     vi  = aj + ai[i] ;
712     nz  = a->diag[i] - ai[i];
713     sum = b[*r++];
714     while (nz--) sum -= *v++ * tmp[*vi++ ];
715     tmp[i] = sum;
716   }
717 
718   /* backward solve the upper triangular */
719   for (i=n-1; i>=0; i--){
720     v   = aa + a->diag[i] + 1;
721     vi  = aj + a->diag[i] + 1;
722     nz  = ai[i+1] - a->diag[i] - 1;
723     sum = tmp[i];
724     while (nz--) sum -= *v++ * tmp[*vi++ ];
725     tmp[i] = sum*aa[a->diag[i]];
726     x[*c--] += tmp[i];
727   }
728 
729   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
730   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
731   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
732   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
733   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
734 
735   PetscFunctionReturn(0);
736 }
737 /* -------------------------------------------------------------------*/
738 #undef __FUNCT__
739 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
740 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
741 {
742   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
743   IS             iscol = a->col,isrow = a->row;
744   PetscErrorCode ierr;
745   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
746   PetscInt       nz,*rout,*cout,*diag = a->diag;
747   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
748 
749   PetscFunctionBegin;
750   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
751   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
752   tmp  = a->solve_work;
753 
754   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
755   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
756 
757   /* copy the b into temp work space according to permutation */
758   for (i=0; i<n; i++) tmp[i] = b[c[i]];
759 
760   /* forward solve the U^T */
761   for (i=0; i<n; i++) {
762     v   = aa + diag[i] ;
763     vi  = aj + diag[i] + 1;
764     nz  = ai[i+1] - diag[i] - 1;
765     s1  = tmp[i];
766     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
767     while (nz--) {
768       tmp[*vi++ ] -= (*v++)*s1;
769     }
770     tmp[i] = s1;
771   }
772 
773   /* backward solve the L^T */
774   for (i=n-1; i>=0; i--){
775     v   = aa + diag[i] - 1 ;
776     vi  = aj + diag[i] - 1 ;
777     nz  = diag[i] - ai[i];
778     s1  = tmp[i];
779     while (nz--) {
780       tmp[*vi-- ] -= (*v--)*s1;
781     }
782   }
783 
784   /* copy tmp into x according to permutation */
785   for (i=0; i<n; i++) x[r[i]] = tmp[i];
786 
787   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
788   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
789   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
790   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
791 
792   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
798 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
799 {
800   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
801   IS             iscol = a->col,isrow = a->row;
802   PetscErrorCode ierr;
803   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
804   PetscInt       nz,*rout,*cout,*diag = a->diag;
805   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
806 
807   PetscFunctionBegin;
808   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
809 
810   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
811   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
812   tmp = a->solve_work;
813 
814   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
815   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
816 
817   /* copy the b into temp work space according to permutation */
818   for (i=0; i<n; i++) tmp[i] = b[c[i]];
819 
820   /* forward solve the U^T */
821   for (i=0; i<n; i++) {
822     v   = aa + diag[i] ;
823     vi  = aj + diag[i] + 1;
824     nz  = ai[i+1] - diag[i] - 1;
825     tmp[i] *= *v++;
826     while (nz--) {
827       tmp[*vi++ ] -= (*v++)*tmp[i];
828     }
829   }
830 
831   /* backward solve the L^T */
832   for (i=n-1; i>=0; i--){
833     v   = aa + diag[i] - 1 ;
834     vi  = aj + diag[i] - 1 ;
835     nz  = diag[i] - ai[i];
836     while (nz--) {
837       tmp[*vi-- ] -= (*v--)*tmp[i];
838     }
839   }
840 
841   /* copy tmp into x according to permutation */
842   for (i=0; i<n; i++) x[r[i]] += tmp[i];
843 
844   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
845   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
846   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
847   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
848 
849   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 /* ----------------------------------------------------------------*/
853 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
857 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
858 {
859   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
860   IS             isicol;
861   PetscErrorCode ierr;
862   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
863   PetscInt       *bi,*cols,nnz,*cols_lvl;
864   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
865   PetscInt       i,levels,diagonal_fill;
866   PetscTruth     col_identity,row_identity;
867   PetscReal      f;
868   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
869   PetscBT        lnkbt;
870   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
871   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
872   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
873 
874   PetscFunctionBegin;
875   f             = info->fill;
876   levels        = (PetscInt)info->levels;
877   diagonal_fill = (PetscInt)info->diagonal_fill;
878   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
879 
880   /* special case that simply copies fill pattern */
881   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
882   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
883   if (!levels && row_identity && col_identity) {
884     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
885     (*fact)->factor = FACTOR_LU;
886     b               = (Mat_SeqAIJ*)(*fact)->data;
887     if (!b->diag) {
888       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
889     }
890     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
891     b->row              = isrow;
892     b->col              = iscol;
893     b->icol             = isicol;
894     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
895     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
896     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
897     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
898     PetscFunctionReturn(0);
899   }
900 
901   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
902   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
903 
904   /* get new row pointers */
905   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
906   bi[0] = 0;
907   /* bdiag is location of diagonal in factor */
908   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
909   bdiag[0]  = 0;
910 
911   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
912   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
913 
914   /* create a linked list for storing column indices of the active row */
915   nlnk = n + 1;
916   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
917 
918   /* initial FreeSpace size is f*(ai[n]+1) */
919   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
920   current_space = free_space;
921   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
922   current_space_lvl = free_space_lvl;
923 
924   for (i=0; i<n; i++) {
925     nzi = 0;
926     /* copy current row into linked list */
927     nnz  = ai[r[i]+1] - ai[r[i]];
928     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
929     cols = aj + ai[r[i]];
930     lnk[i] = -1; /* marker to indicate if diagonal exists */
931     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
932     nzi += nlnk;
933 
934     /* make sure diagonal entry is included */
935     if (diagonal_fill && lnk[i] == -1) {
936       fm = n;
937       while (lnk[fm] < i) fm = lnk[fm];
938       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
939       lnk[fm]    = i;
940       lnk_lvl[i] = 0;
941       nzi++; dcount++;
942     }
943 
944     /* add pivot rows into the active row */
945     nzbd = 0;
946     prow = lnk[n];
947     while (prow < i) {
948       nnz      = bdiag[prow];
949       cols     = bj_ptr[prow] + nnz + 1;
950       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
951       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
952       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
953       nzi += nlnk;
954       prow = lnk[prow];
955       nzbd++;
956     }
957     bdiag[i] = nzbd;
958     bi[i+1]  = bi[i] + nzi;
959 
960     /* if free space is not available, make more free space */
961     if (current_space->local_remaining<nzi) {
962       nnz = nzi*(n - i); /* estimated and max additional space needed */
963       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
964       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
965       reallocs++;
966     }
967 
968     /* copy data into free_space and free_space_lvl, then initialize lnk */
969     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
970     bj_ptr[i]    = current_space->array;
971     bjlvl_ptr[i] = current_space_lvl->array;
972 
973     /* make sure the active row i has diagonal entry */
974     if (*(bj_ptr[i]+bdiag[i]) != i) {
975       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
976     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
977     }
978 
979     current_space->array           += nzi;
980     current_space->local_used      += nzi;
981     current_space->local_remaining -= nzi;
982     current_space_lvl->array           += nzi;
983     current_space_lvl->local_used      += nzi;
984     current_space_lvl->local_remaining -= nzi;
985   }
986 
987   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
988   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
989 
990   /* destroy list of free space and other temporary arrays */
991   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
992   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
993   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
994   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
995   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
996 
997 #if defined(PETSC_USE_DEBUG)
998   {
999     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1000     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr);
1001     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr);
1002     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr);
1003     ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr);
1004     if (diagonal_fill) {
1005       ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr);
1006     }
1007   }
1008 #endif
1009 
1010   /* put together the new matrix */
1011   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1012   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1013   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1014   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1015   b = (Mat_SeqAIJ*)(*fact)->data;
1016   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1017   b->singlemalloc = PETSC_FALSE;
1018   len = (bi[n] )*sizeof(PetscScalar);
1019   /* the next line frees the default space generated by the Create() */
1020   ierr = PetscFree(b->a);CHKERRQ(ierr);
1021   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1022   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1023   b->j          = bj;
1024   b->i          = bi;
1025   for (i=0; i<n; i++) bdiag[i] += bi[i];
1026   b->diag       = bdiag;
1027   b->ilen       = 0;
1028   b->imax       = 0;
1029   b->row        = isrow;
1030   b->col        = iscol;
1031   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1032   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1033   b->icol       = isicol;
1034   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1035   /* In b structure:  Free imax, ilen, old a, old j.
1036      Allocate bdiag, solve_work, new a, new j */
1037   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1038   b->maxnz             = b->nz = bi[n] ;
1039   (*fact)->factor = FACTOR_LU;
1040   (*fact)->info.factor_mallocs    = reallocs;
1041   (*fact)->info.fill_ratio_given  = f;
1042   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1043 
1044   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1045   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1046 
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #include "src/mat/impls/sbaij/seq/sbaij.h"
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1053 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1054 {
1055   Mat            C = *B;
1056   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1057   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1058   IS             ip=b->row;
1059   PetscErrorCode ierr;
1060   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1061   PetscInt       *ai=a->i,*aj=a->j;
1062   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1063   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1064   PetscReal      zeropivot,rs,shiftnz;
1065   PetscTruth     shiftpd;
1066   ChShift_Ctx    sctx;
1067   PetscInt       newshift;
1068 
1069   PetscFunctionBegin;
1070   shiftnz   = info->shiftnz;
1071   shiftpd   = info->shiftpd;
1072   zeropivot = info->zeropivot;
1073 
1074   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1075 
1076   /* initialization */
1077   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1078   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1079   jl   = il + mbs;
1080   rtmp = (MatScalar*)(jl + mbs);
1081 
1082   sctx.shift_amount = 0;
1083   sctx.nshift       = 0;
1084   do {
1085     sctx.chshift = PETSC_FALSE;
1086     for (i=0; i<mbs; i++) {
1087       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1088     }
1089 
1090     for (k = 0; k<mbs; k++){
1091       bval = ba + bi[k];
1092       /* initialize k-th row by the perm[k]-th row of A */
1093       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1094       for (j = jmin; j < jmax; j++){
1095         col = rip[aj[j]];
1096         if (col >= k){ /* only take upper triangular entry */
1097           rtmp[col] = aa[j];
1098           *bval++  = 0.0; /* for in-place factorization */
1099         }
1100       }
1101       /* shift the diagonal of the matrix */
1102       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1103 
1104       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1105       dk = rtmp[k];
1106       i = jl[k]; /* first row to be added to k_th row  */
1107 
1108       while (i < k){
1109         nexti = jl[i]; /* next row to be added to k_th row */
1110 
1111         /* compute multiplier, update diag(k) and U(i,k) */
1112         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1113         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1114         dk += uikdi*ba[ili];
1115         ba[ili] = uikdi; /* -U(i,k) */
1116 
1117         /* add multiple of row i to k-th row */
1118         jmin = ili + 1; jmax = bi[i+1];
1119         if (jmin < jmax){
1120           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1121           /* update il and jl for row i */
1122           il[i] = jmin;
1123           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1124         }
1125         i = nexti;
1126       }
1127 
1128       /* shift the diagonals when zero pivot is detected */
1129       /* compute rs=sum of abs(off-diagonal) */
1130       rs   = 0.0;
1131       jmin = bi[k]+1;
1132       nz   = bi[k+1] - jmin;
1133       if (nz){
1134         bcol = bj + jmin;
1135         while (nz--){
1136           rs += PetscAbsScalar(rtmp[*bcol]);
1137           bcol++;
1138         }
1139       }
1140 
1141       sctx.rs = rs;
1142       sctx.pv = dk;
1143       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1144       if (newshift == 1){
1145         break;    /* sctx.shift_amount is updated */
1146       } else if (newshift == -1){
1147         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1148       }
1149 
1150       /* copy data into U(k,:) */
1151       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1152       jmin = bi[k]+1; jmax = bi[k+1];
1153       if (jmin < jmax) {
1154         for (j=jmin; j<jmax; j++){
1155           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1156         }
1157         /* add the k-th row into il and jl */
1158         il[k] = jmin;
1159         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1160       }
1161     }
1162   } while (sctx.chshift);
1163   ierr = PetscFree(il);CHKERRQ(ierr);
1164 
1165   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1166   C->factor       = FACTOR_CHOLESKY;
1167   C->assembled    = PETSC_TRUE;
1168   C->preallocated = PETSC_TRUE;
1169   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1170   if (sctx.nshift){
1171     if (shiftnz) {
1172       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1173     } else if (shiftpd) {
1174       ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr);
1175     }
1176   }
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1182 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1183 {
1184   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1185   Mat_SeqSBAIJ   *b;
1186   Mat            B;
1187   PetscErrorCode ierr;
1188   PetscTruth     perm_identity;
1189   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1190   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1191   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1192   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1193   PetscReal      fill=info->fill,levels=info->levels;
1194   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1195   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1196   PetscBT        lnkbt;
1197 
1198   PetscFunctionBegin;
1199   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1200   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1201 
1202   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1203   ui[0] = 0;
1204 
1205   /* special case that simply copies fill pattern */
1206   if (!levels && perm_identity) {
1207     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1208     for (i=0; i<am; i++) {
1209       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1210     }
1211     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1212     cols = uj;
1213     for (i=0; i<am; i++) {
1214       aj    = a->j + a->diag[i];
1215       ncols = ui[i+1] - ui[i];
1216       for (j=0; j<ncols; j++) *cols++ = *aj++;
1217     }
1218   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1219     /* initialization */
1220     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1221 
1222     /* jl: linked list for storing indices of the pivot rows
1223        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1224     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1225     il         = jl + am;
1226     uj_ptr     = (PetscInt**)(il + am);
1227     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1228     for (i=0; i<am; i++){
1229       jl[i] = am; il[i] = 0;
1230     }
1231 
1232     /* create and initialize a linked list for storing column indices of the active row k */
1233     nlnk = am + 1;
1234     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1235 
1236     /* initial FreeSpace size is fill*(ai[am]+1) */
1237     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1238     current_space = free_space;
1239     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1240     current_space_lvl = free_space_lvl;
1241 
1242     for (k=0; k<am; k++){  /* for each active row k */
1243       /* initialize lnk by the column indices of row rip[k] of A */
1244       nzk   = 0;
1245       ncols = ai[rip[k]+1] - ai[rip[k]];
1246       ncols_upper = 0;
1247       for (j=0; j<ncols; j++){
1248         i = *(aj + ai[rip[k]] + j);
1249         if (rip[i] >= k){ /* only take upper triangular entry */
1250           ajtmp[ncols_upper] = i;
1251           ncols_upper++;
1252         }
1253       }
1254       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1255       nzk += nlnk;
1256 
1257       /* update lnk by computing fill-in for each pivot row to be merged in */
1258       prow = jl[k]; /* 1st pivot row */
1259 
1260       while (prow < k){
1261         nextprow = jl[prow];
1262 
1263         /* merge prow into k-th row */
1264         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1265         jmax = ui[prow+1];
1266         ncols = jmax-jmin;
1267         i     = jmin - ui[prow];
1268         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1269         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1270         j     = *(uj - 1);
1271         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1272         nzk += nlnk;
1273 
1274         /* update il and jl for prow */
1275         if (jmin < jmax){
1276           il[prow] = jmin;
1277           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1278         }
1279         prow = nextprow;
1280       }
1281 
1282       /* if free space is not available, make more free space */
1283       if (current_space->local_remaining<nzk) {
1284         i = am - k + 1; /* num of unfactored rows */
1285         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1286         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1287         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1288         reallocs++;
1289       }
1290 
1291       /* copy data into free_space and free_space_lvl, then initialize lnk */
1292       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1293 
1294       /* add the k-th row into il and jl */
1295       if (nzk > 1){
1296         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1297         jl[k] = jl[i]; jl[i] = k;
1298         il[k] = ui[k] + 1;
1299       }
1300       uj_ptr[k]     = current_space->array;
1301       uj_lvl_ptr[k] = current_space_lvl->array;
1302 
1303       current_space->array           += nzk;
1304       current_space->local_used      += nzk;
1305       current_space->local_remaining -= nzk;
1306 
1307       current_space_lvl->array           += nzk;
1308       current_space_lvl->local_used      += nzk;
1309       current_space_lvl->local_remaining -= nzk;
1310 
1311       ui[k+1] = ui[k] + nzk;
1312     }
1313 
1314 #if defined(PETSC_USE_DEBUG)
1315     if (ai[am] != 0) {
1316       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1317       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1318       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1319       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1320     } else {
1321       ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1322     }
1323 #endif
1324 
1325     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1326     ierr = PetscFree(jl);CHKERRQ(ierr);
1327     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1328 
1329     /* destroy list of free space and other temporary array(s) */
1330     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1331     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1332     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1333     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1334 
1335   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1336 
1337   /* put together the new matrix in MATSEQSBAIJ format */
1338   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1339   B = *fact;
1340   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1341   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1342 
1343   b    = (Mat_SeqSBAIJ*)B->data;
1344   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1345   b->singlemalloc = PETSC_FALSE;
1346   /* the next line frees the default space generated by the Create() */
1347   ierr = PetscFree(b->a);CHKERRQ(ierr);
1348   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1349   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1350   b->j    = uj;
1351   b->i    = ui;
1352   b->diag = 0;
1353   b->ilen = 0;
1354   b->imax = 0;
1355   b->row  = perm;
1356   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1357   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1358   b->icol = perm;
1359   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1360   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1361   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1362   b->maxnz = b->nz = ui[am];
1363 
1364   B->factor                 = FACTOR_CHOLESKY;
1365   B->info.factor_mallocs    = reallocs;
1366   B->info.fill_ratio_given  = fill;
1367   if (ai[am] != 0) {
1368     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1369   } else {
1370     B->info.fill_ratio_needed = 0.0;
1371   }
1372   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1373   if (perm_identity){
1374     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1375     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1376   }
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1382 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1383 {
1384   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1385   Mat_SeqSBAIJ   *b;
1386   Mat            B;
1387   PetscErrorCode ierr;
1388   PetscTruth     perm_identity;
1389   PetscReal      fill = info->fill;
1390   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1391   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1392   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1393   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1394   PetscBT        lnkbt;
1395   IS             iperm;
1396 
1397   PetscFunctionBegin;
1398   /* check whether perm is the identity mapping */
1399   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1400   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1401 
1402   if (!perm_identity){
1403     /* check if perm is symmetric! */
1404     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1405     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1406     for (i=0; i<am; i++) {
1407       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1408     }
1409     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1410     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1411   }
1412 
1413   /* initialization */
1414   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1415   ui[0] = 0;
1416 
1417   /* jl: linked list for storing indices of the pivot rows
1418      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1419   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1420   il     = jl + am;
1421   cols   = il + am;
1422   ui_ptr = (PetscInt**)(cols + am);
1423   for (i=0; i<am; i++){
1424     jl[i] = am; il[i] = 0;
1425   }
1426 
1427   /* create and initialize a linked list for storing column indices of the active row k */
1428   nlnk = am + 1;
1429   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1430 
1431   /* initial FreeSpace size is fill*(ai[am]+1) */
1432   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1433   current_space = free_space;
1434 
1435   for (k=0; k<am; k++){  /* for each active row k */
1436     /* initialize lnk by the column indices of row rip[k] of A */
1437     nzk   = 0;
1438     ncols = ai[rip[k]+1] - ai[rip[k]];
1439     ncols_upper = 0;
1440     for (j=0; j<ncols; j++){
1441       i = rip[*(aj + ai[rip[k]] + j)];
1442       if (i >= k){ /* only take upper triangular entry */
1443         cols[ncols_upper] = i;
1444         ncols_upper++;
1445       }
1446     }
1447     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1448     nzk += nlnk;
1449 
1450     /* update lnk by computing fill-in for each pivot row to be merged in */
1451     prow = jl[k]; /* 1st pivot row */
1452 
1453     while (prow < k){
1454       nextprow = jl[prow];
1455       /* merge prow into k-th row */
1456       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1457       jmax = ui[prow+1];
1458       ncols = jmax-jmin;
1459       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1460       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1461       nzk += nlnk;
1462 
1463       /* update il and jl for prow */
1464       if (jmin < jmax){
1465         il[prow] = jmin;
1466         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1467       }
1468       prow = nextprow;
1469     }
1470 
1471     /* if free space is not available, make more free space */
1472     if (current_space->local_remaining<nzk) {
1473       i = am - k + 1; /* num of unfactored rows */
1474       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1475       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1476       reallocs++;
1477     }
1478 
1479     /* copy data into free space, then initialize lnk */
1480     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1481 
1482     /* add the k-th row into il and jl */
1483     if (nzk-1 > 0){
1484       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1485       jl[k] = jl[i]; jl[i] = k;
1486       il[k] = ui[k] + 1;
1487     }
1488     ui_ptr[k] = current_space->array;
1489     current_space->array           += nzk;
1490     current_space->local_used      += nzk;
1491     current_space->local_remaining -= nzk;
1492 
1493     ui[k+1] = ui[k] + nzk;
1494   }
1495 
1496 #if defined(PETSC_USE_DEBUG)
1497   if (ai[am] != 0) {
1498     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1499     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr);
1500     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr);
1501     ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr);
1502   } else {
1503      ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr);
1504   }
1505 #endif
1506 
1507   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1508   ierr = PetscFree(jl);CHKERRQ(ierr);
1509 
1510   /* destroy list of free space and other temporary array(s) */
1511   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1512   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1513   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1514 
1515   /* put together the new matrix in MATSEQSBAIJ format */
1516   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1517   B    = *fact;
1518   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1519   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1520 
1521   b = (Mat_SeqSBAIJ*)B->data;
1522   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1523   b->singlemalloc = PETSC_FALSE;
1524   /* the next line frees the default space generated by the Create() */
1525   ierr = PetscFree(b->a);CHKERRQ(ierr);
1526   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1527   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1528   b->j    = uj;
1529   b->i    = ui;
1530   b->diag = 0;
1531   b->ilen = 0;
1532   b->imax = 0;
1533   b->row  = perm;
1534   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1535   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1536   b->icol = perm;
1537   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1538   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1539   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1540   b->maxnz = b->nz = ui[am];
1541 
1542   B->factor                 = FACTOR_CHOLESKY;
1543   B->info.factor_mallocs    = reallocs;
1544   B->info.fill_ratio_given  = fill;
1545   if (ai[am] != 0) {
1546     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1547   } else {
1548     B->info.fill_ratio_needed = 0.0;
1549   }
1550   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1551   if (perm_identity){
1552     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1553     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557