xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 9ae221ffe77a9fbfac348b4ed5abed9f5e2e2e41)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
23 
24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->m;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   ierr = PetscFree(b->imax);CHKERRQ(ierr);
230   b->sorted        = PETSC_FALSE;
231   b->singlemalloc  = PETSC_FALSE;
232   /* the next line frees the default space generated by the MatCreate() */
233   ierr             = PetscFree(b->a);CHKERRQ(ierr);
234   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
235   b->a             = new_a;
236   b->j             = new_j;
237   b->i             = new_i;
238   b->ilen          = 0;
239   b->imax          = 0;
240   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241   b->row           = isirow;
242   b->col           = iscolf;
243   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
244   b->maxnz = b->nz = new_i[n];
245   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
246   (*fact)->info.factor_mallocs = 0;
247 
248   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
249 
250   /* check out for identical nodes. If found, use inode functions */
251   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
252 
253   af = ((double)b->nz)/((double)a->nz) + .001;
254   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
255   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
256   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
257   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
258 
259   PetscFunctionReturn(0);
260 #endif
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
266 {
267   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
268   IS             isicol;
269   PetscErrorCode ierr;
270   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
271   PetscInt       *bi,*bj,*ajtmp;
272   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
273   PetscReal      f;
274   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
275   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
276   PetscBT        lnkbt;
277 
278   PetscFunctionBegin;
279   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
280   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
281   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
282   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
283 
284   /* get new row pointers */
285   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
286   bi[0] = 0;
287 
288   /* bdiag is location of diagonal in factor */
289   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
290   bdiag[0] = 0;
291 
292   /* linked list for storing column indices of the active row */
293   nlnk = n + 1;
294   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
295 
296   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
297   im     = cols + n;
298   bi_ptr = (PetscInt**)(im + n);
299 
300   /* initial FreeSpace size is f*(ai[n]+1) */
301   f = info->fill;
302   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
303   current_space = free_space;
304 
305   for (i=0; i<n; i++) {
306     /* copy previous fill into linked list */
307     nzi = 0;
308     nnz = ai[r[i]+1] - ai[r[i]];
309     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
310     ajtmp = aj + ai[r[i]];
311     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
312     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
313     nzi += nlnk;
314 
315     /* add pivot rows into linked list */
316     row = lnk[n];
317     while (row < i) {
318       nzbd    = bdiag[row] - bi[row] + 1;
319       ajtmp   = bi_ptr[row] + nzbd;
320       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
321       im[row] = nzbd;
322       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
323       nzi     += nlnk;
324       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
325 
326       row = lnk[row];
327     }
328 
329     bi[i+1] = bi[i] + nzi;
330     im[i]   = nzi;
331 
332     /* mark bdiag */
333     nzbd = 0;
334     nnz  = nzi;
335     k    = lnk[n];
336     while (nnz-- && k < i){
337       nzbd++;
338       k = lnk[k];
339     }
340     bdiag[i] = bi[i] + nzbd;
341 
342     /* if free space is not available, make more free space */
343     if (current_space->local_remaining<nzi) {
344       nnz = (n - i)*nzi; /* estimated and max additional space needed */
345       printf("reallocate %d. Previous space %d\n",nnz,(int)(f*ai[n]));
346       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
347       reallocs++;
348     }
349 
350     /* copy data into free space, then initialize lnk */
351     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
352     bi_ptr[i] = current_space->array;
353     current_space->array           += nzi;
354     current_space->local_used      += nzi;
355     current_space->local_remaining -= nzi;
356   }
357   if (ai[n] != 0) {
358     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
359     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
360     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
361     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
362     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
363   } else {
364     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
365   }
366 
367   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
368   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
369 
370   /* destroy list of free space and other temporary array(s) */
371   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
372   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
374   ierr = PetscFree(cols);CHKERRQ(ierr);
375 
376   /* put together the new matrix */
377   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
378   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
379   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
380   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
381   b    = (Mat_SeqAIJ*)(*B)->data;
382   ierr = PetscFree(b->imax);CHKERRQ(ierr);
383   b->singlemalloc = PETSC_FALSE;
384   /* the next line frees the default space generated by the Create() */
385   ierr = PetscFree(b->a);CHKERRQ(ierr);
386   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
387   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
388   b->j          = bj;
389   b->i          = bi;
390   b->diag       = bdiag;
391   b->ilen       = 0;
392   b->imax       = 0;
393   b->row        = isrow;
394   b->col        = iscol;
395   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
396   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
397   b->icol       = isicol;
398   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
399 
400   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
401   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
402   b->maxnz = b->nz = bi[n] ;
403 
404   (*B)->factor                 =  FACTOR_LU;
405   (*B)->info.factor_mallocs    = reallocs;
406   (*B)->info.fill_ratio_given  = f;
407   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
408   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
409 
410   if (ai[n] != 0) {
411     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
412   } else {
413     (*B)->info.fill_ratio_needed = 0.0;
414   }
415   PetscFunctionReturn(0);
416 }
417 
418 /* ----------------------------------------------------------- */
419 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
423 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
424 {
425   Mat            C=*B;
426   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
427   IS             isrow = b->row,isicol = b->icol;
428   PetscErrorCode ierr;
429   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
430   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
431   PetscInt       *diag_offset = b->diag,diag,*pj;
432   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
433   PetscReal      zeropivot,rs,d,shift_nz;
434   PetscReal      row_shift,shift_top=0.;
435   PetscTruth     shift_pd;
436   LUShift_Ctx    sctx;
437   PetscInt       newshift;
438 
439   PetscFunctionBegin;
440   shift_nz  = info->shiftnz;
441   shift_pd  = info->shiftpd;
442   zeropivot = info->zeropivot;
443 
444   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
445   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
446   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
447   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
448   rtmps = rtmp; ics = ic;
449 
450   if (!a->diag) {
451     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
452   }
453   /* if both shift schemes are chosen by user, only use shift_pd */
454   if (shift_pd && shift_nz) shift_nz = 0.0;
455   if (shift_pd) { /* set shift_top=max{row_shift} */
456     PetscInt *aai = a->i,*ddiag = a->diag;
457     shift_top = 0;
458     for (i=0; i<n; i++) {
459       d = PetscAbsScalar((a->a)[ddiag[i]]);
460       /* calculate amt of shift needed for this row */
461       if (d<=0) {
462         row_shift = 0;
463       } else {
464         row_shift = -2*d;
465       }
466       v  = a->a+aai[i];
467       nz = aai[i+1] - aai[i];
468       for (j=0; j<nz; j++)
469 	row_shift += PetscAbsScalar(v[j]);
470       if (row_shift>shift_top) shift_top = row_shift;
471     }
472   }
473 
474   sctx.shift_amount = 0;
475   sctx.shift_top    = shift_top;
476   sctx.nshift       = 0;
477   sctx.nshift_max   = 5;
478   sctx.shift_lo     = 0.;
479   sctx.shift_hi     = 1.;
480   do {
481     sctx.lushift = PETSC_FALSE;
482     for (i=0; i<n; i++){
483       nz    = bi[i+1] - bi[i];
484       bjtmp = bj + bi[i];
485       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
486 
487       /* load in initial (unfactored row) */
488       nz    = a->i[r[i]+1] - a->i[r[i]];
489       ajtmp = a->j + a->i[r[i]];
490       v     = a->a + a->i[r[i]];
491       for (j=0; j<nz; j++) {
492         rtmp[ics[ajtmp[j]]] = v[j];
493       }
494       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
495 
496       row = *bjtmp++;
497       while  (row < i) {
498         pc = rtmp + row;
499         if (*pc != 0.0) {
500           pv         = b->a + diag_offset[row];
501           pj         = b->j + diag_offset[row] + 1;
502           multiplier = *pc / *pv++;
503           *pc        = multiplier;
504           nz         = bi[row+1] - diag_offset[row] - 1;
505           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
506           PetscLogFlops(2*nz);
507         }
508         row = *bjtmp++;
509       }
510       /* finished row so stick it into b->a */
511       pv   = b->a + bi[i] ;
512       pj   = b->j + bi[i] ;
513       nz   = bi[i+1] - bi[i];
514       diag = diag_offset[i] - bi[i];
515       rs   = 0.0;
516       for (j=0; j<nz; j++) {
517         pv[j] = rtmps[pj[j]];
518         if (j != diag) rs += PetscAbsScalar(pv[j]);
519       }
520 
521       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
522       sctx.rs  = rs;
523       sctx.pv  = pv[diag];
524       ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
525       if (newshift == 1){
526         break;    /* sctx.shift_amount is updated */
527       } else if (newshift == -1){
528         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
529       }
530     }
531 
532     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
533       /*
534        * if no shift in this attempt & shifting & started shifting & can refine,
535        * then try lower shift
536        */
537       sctx.shift_hi       = info->shift_fraction;
538       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
539       sctx.shift_amount   = info->shift_fraction * sctx.shift_top;
540       sctx.lushift        = PETSC_TRUE;
541       sctx.nshift++;
542     }
543   } while (sctx.lushift);
544 
545   /* invert diagonal entries for simplier triangular solves */
546   for (i=0; i<n; i++) {
547     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
548   }
549 
550   ierr = PetscFree(rtmp);CHKERRQ(ierr);
551   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
552   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
553   C->factor = FACTOR_LU;
554   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
555   C->assembled = PETSC_TRUE;
556   PetscLogFlops(C->n);
557   if (sctx.nshift){
558     if (shift_nz) {
559       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
560     } else if (shift_pd) {
561       b->lu_shift_fraction = info->shift_fraction;
562       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift);
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
570 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
571 {
572   PetscFunctionBegin;
573   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
574   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
575   PetscFunctionReturn(0);
576 }
577 
578 
579 /* ----------------------------------------------------------- */
580 #undef __FUNCT__
581 #define __FUNCT__ "MatLUFactor_SeqAIJ"
582 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
583 {
584   PetscErrorCode ierr;
585   Mat            C;
586 
587   PetscFunctionBegin;
588   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
589   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
590   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
591   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 /* ----------------------------------------------------------- */
595 #undef __FUNCT__
596 #define __FUNCT__ "MatSolve_SeqAIJ"
597 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
598 {
599   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
600   IS             iscol = a->col,isrow = a->row;
601   PetscErrorCode ierr;
602   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
603   PetscInt       nz,*rout,*cout;
604   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
605 
606   PetscFunctionBegin;
607   if (!n) PetscFunctionReturn(0);
608 
609   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
610   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
611   tmp  = a->solve_work;
612 
613   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
614   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
615 
616   /* forward solve the lower triangular */
617   tmp[0] = b[*r++];
618   tmps   = tmp;
619   for (i=1; i<n; i++) {
620     v   = aa + ai[i] ;
621     vi  = aj + ai[i] ;
622     nz  = a->diag[i] - ai[i];
623     sum = b[*r++];
624     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
625     tmp[i] = sum;
626   }
627 
628   /* backward solve the upper triangular */
629   for (i=n-1; i>=0; i--){
630     v   = aa + a->diag[i] + 1;
631     vi  = aj + a->diag[i] + 1;
632     nz  = ai[i+1] - a->diag[i] - 1;
633     sum = tmp[i];
634     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
635     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
636   }
637 
638   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
639   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
640   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
641   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
642   PetscLogFlops(2*a->nz - A->n);
643   PetscFunctionReturn(0);
644 }
645 
646 /* ----------------------------------------------------------- */
647 #undef __FUNCT__
648 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
649 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
650 {
651   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
652   PetscErrorCode ierr;
653   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
654   PetscScalar    *x,*b,*aa = a->a;
655 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
656   PetscInt       adiag_i,i,*vi,nz,ai_i;
657   PetscScalar    *v,sum;
658 #endif
659 
660   PetscFunctionBegin;
661   if (!n) PetscFunctionReturn(0);
662 
663   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
664   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
665 
666 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
667   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
668 #else
669   /* forward solve the lower triangular */
670   x[0] = b[0];
671   for (i=1; i<n; i++) {
672     ai_i = ai[i];
673     v    = aa + ai_i;
674     vi   = aj + ai_i;
675     nz   = adiag[i] - ai_i;
676     sum  = b[i];
677     while (nz--) sum -= *v++ * x[*vi++];
678     x[i] = sum;
679   }
680 
681   /* backward solve the upper triangular */
682   for (i=n-1; i>=0; i--){
683     adiag_i = adiag[i];
684     v       = aa + adiag_i + 1;
685     vi      = aj + adiag_i + 1;
686     nz      = ai[i+1] - adiag_i - 1;
687     sum     = x[i];
688     while (nz--) sum -= *v++ * x[*vi++];
689     x[i]    = sum*aa[adiag_i];
690   }
691 #endif
692   PetscLogFlops(2*a->nz - A->n);
693   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
694   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
700 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
701 {
702   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
703   IS             iscol = a->col,isrow = a->row;
704   PetscErrorCode ierr;
705   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
706   PetscInt       nz,*rout,*cout;
707   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
708 
709   PetscFunctionBegin;
710   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
711 
712   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
713   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
714   tmp  = a->solve_work;
715 
716   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
717   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
718 
719   /* forward solve the lower triangular */
720   tmp[0] = b[*r++];
721   for (i=1; i<n; i++) {
722     v   = aa + ai[i] ;
723     vi  = aj + ai[i] ;
724     nz  = a->diag[i] - ai[i];
725     sum = b[*r++];
726     while (nz--) sum -= *v++ * tmp[*vi++ ];
727     tmp[i] = sum;
728   }
729 
730   /* backward solve the upper triangular */
731   for (i=n-1; i>=0; i--){
732     v   = aa + a->diag[i] + 1;
733     vi  = aj + a->diag[i] + 1;
734     nz  = ai[i+1] - a->diag[i] - 1;
735     sum = tmp[i];
736     while (nz--) sum -= *v++ * tmp[*vi++ ];
737     tmp[i] = sum*aa[a->diag[i]];
738     x[*c--] += tmp[i];
739   }
740 
741   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
742   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
743   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
744   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
745   PetscLogFlops(2*a->nz);
746 
747   PetscFunctionReturn(0);
748 }
749 /* -------------------------------------------------------------------*/
750 #undef __FUNCT__
751 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
752 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
753 {
754   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
755   IS             iscol = a->col,isrow = a->row;
756   PetscErrorCode ierr;
757   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
758   PetscInt       nz,*rout,*cout,*diag = a->diag;
759   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
760 
761   PetscFunctionBegin;
762   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
763   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
764   tmp  = a->solve_work;
765 
766   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
767   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
768 
769   /* copy the b into temp work space according to permutation */
770   for (i=0; i<n; i++) tmp[i] = b[c[i]];
771 
772   /* forward solve the U^T */
773   for (i=0; i<n; i++) {
774     v   = aa + diag[i] ;
775     vi  = aj + diag[i] + 1;
776     nz  = ai[i+1] - diag[i] - 1;
777     s1  = tmp[i];
778     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
779     while (nz--) {
780       tmp[*vi++ ] -= (*v++)*s1;
781     }
782     tmp[i] = s1;
783   }
784 
785   /* backward solve the L^T */
786   for (i=n-1; i>=0; i--){
787     v   = aa + diag[i] - 1 ;
788     vi  = aj + diag[i] - 1 ;
789     nz  = diag[i] - ai[i];
790     s1  = tmp[i];
791     while (nz--) {
792       tmp[*vi-- ] -= (*v--)*s1;
793     }
794   }
795 
796   /* copy tmp into x according to permutation */
797   for (i=0; i<n; i++) x[r[i]] = tmp[i];
798 
799   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
800   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
801   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
802   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
803 
804   PetscLogFlops(2*a->nz-A->n);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
810 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
811 {
812   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
813   IS             iscol = a->col,isrow = a->row;
814   PetscErrorCode ierr;
815   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
816   PetscInt       nz,*rout,*cout,*diag = a->diag;
817   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
818 
819   PetscFunctionBegin;
820   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
821 
822   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
823   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
824   tmp = a->solve_work;
825 
826   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
827   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
828 
829   /* copy the b into temp work space according to permutation */
830   for (i=0; i<n; i++) tmp[i] = b[c[i]];
831 
832   /* forward solve the U^T */
833   for (i=0; i<n; i++) {
834     v   = aa + diag[i] ;
835     vi  = aj + diag[i] + 1;
836     nz  = ai[i+1] - diag[i] - 1;
837     tmp[i] *= *v++;
838     while (nz--) {
839       tmp[*vi++ ] -= (*v++)*tmp[i];
840     }
841   }
842 
843   /* backward solve the L^T */
844   for (i=n-1; i>=0; i--){
845     v   = aa + diag[i] - 1 ;
846     vi  = aj + diag[i] - 1 ;
847     nz  = diag[i] - ai[i];
848     while (nz--) {
849       tmp[*vi-- ] -= (*v--)*tmp[i];
850     }
851   }
852 
853   /* copy tmp into x according to permutation */
854   for (i=0; i<n; i++) x[r[i]] += tmp[i];
855 
856   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
857   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
858   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   PetscLogFlops(2*a->nz);
862   PetscFunctionReturn(0);
863 }
864 /* ----------------------------------------------------------------*/
865 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
869 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
870 {
871   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
872   IS             isicol;
873   PetscErrorCode ierr;
874   PetscInt       *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
875   PetscInt       *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
876   PetscInt       *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
877   PetscInt       incrlev,nnz,i,levels,diagonal_fill;
878   PetscTruth     col_identity,row_identity;
879   PetscReal      f;
880 
881   PetscFunctionBegin;
882   f             = info->fill;
883   levels        = (PetscInt)info->levels;
884   diagonal_fill = (PetscInt)info->diagonal_fill;
885   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
886 
887   /* special case that simply copies fill pattern */
888   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
889   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
890   if (!levels && row_identity && col_identity) {
891     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
892     (*fact)->factor = FACTOR_LU;
893     b               = (Mat_SeqAIJ*)(*fact)->data;
894     if (!b->diag) {
895       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
896     }
897     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
898     b->row              = isrow;
899     b->col              = iscol;
900     b->icol             = isicol;
901     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
902     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
903     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
904     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
905     PetscFunctionReturn(0);
906   }
907 
908   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
909   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
910 
911   /* get new row pointers */
912   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr);
913   ainew[0] = 0;
914   /* don't know how many column pointers are needed so estimate */
915   jmax = (PetscInt)(f*(ai[n]+1));
916   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr);
917   /* ajfill is level of fill for each fill entry */
918   ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr);
919   /* fill is a linked list of nonzeros in active row */
920   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr);
921   /* im is level for each filled value */
922   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr);
923   /* dloc is location of diagonal in factor */
924   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr);
925   dloc[0]  = 0;
926   for (prow=0; prow<n; prow++) {
927 
928     /* copy current row into linked list */
929     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
930     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
931     xi      = aj + ai[r[prow]];
932     fill[n] = n;
933     fill[prow] = -1; /* marker to indicate if diagonal exists */
934     while (nz--) {
935       fm  = n;
936       idx = ic[*xi++ ];
937       do {
938         m  = fm;
939         fm = fill[m];
940       } while (fm < idx);
941       fill[m]   = idx;
942       fill[idx] = fm;
943       im[idx]   = 0;
944     }
945 
946     /* make sure diagonal entry is included */
947     if (diagonal_fill && fill[prow] == -1) {
948       fm = n;
949       while (fill[fm] < prow) fm = fill[fm];
950       fill[prow] = fill[fm]; /* insert diagonal into linked list */
951       fill[fm]   = prow;
952       im[prow]   = 0;
953       nzf++;
954       dcount++;
955     }
956 
957     nzi = 0;
958     row = fill[n];
959     while (row < prow) {
960       incrlev = im[row] + 1;
961       nz      = dloc[row];
962       xi      = ajnew  + ainew[row]  + nz + 1;
963       flev    = ajfill + ainew[row]  + nz + 1;
964       nnz     = ainew[row+1] - ainew[row] - nz - 1;
965       fm      = row;
966       while (nnz-- > 0) {
967         idx = *xi++ ;
968         if (*flev + incrlev > levels) {
969           flev++;
970           continue;
971         }
972         do {
973           m  = fm;
974           fm = fill[m];
975         } while (fm < idx);
976         if (fm != idx) {
977           im[idx]   = *flev + incrlev;
978           fill[m]   = idx;
979           fill[idx] = fm;
980           fm        = idx;
981           nzf++;
982         } else {
983           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
984         }
985         flev++;
986       }
987       row = fill[row];
988       nzi++;
989     }
990     /* copy new filled row into permanent storage */
991     ainew[prow+1] = ainew[prow] + nzf;
992     if (ainew[prow+1] > jmax) {
993 
994       /* estimate how much additional space we will need */
995       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
996       /* just double the memory each time */
997       /*  maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
998       PetscInt maxadd = jmax;
999       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1000       jmax += maxadd;
1001 
1002       /* allocate a longer ajnew and ajfill */
1003       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1004       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1005       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1006       ajnew  = xi;
1007       ierr   = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr);
1008       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr);
1009       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1010       ajfill = xi;
1011       reallocs++; /* count how many times we realloc */
1012     }
1013     xi          = ajnew + ainew[prow] ;
1014     flev        = ajfill + ainew[prow] ;
1015     dloc[prow]  = nzi;
1016     fm          = fill[n];
1017     while (nzf--) {
1018       *xi++   = fm ;
1019       *flev++ = im[fm];
1020       fm      = fill[fm];
1021     }
1022     /* make sure row has diagonal entry */
1023     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1024       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1025     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1026     }
1027   }
1028   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1029   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1030   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1031   ierr = PetscFree(fill);CHKERRQ(ierr);
1032   ierr = PetscFree(im);CHKERRQ(ierr);
1033 
1034   {
1035     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1036     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1037     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1038     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1039     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1040     if (diagonal_fill) {
1041       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1042     }
1043   }
1044 
1045   /* put together the new matrix */
1046   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1047   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1048   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1049   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1050   b = (Mat_SeqAIJ*)(*fact)->data;
1051   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1052   b->singlemalloc = PETSC_FALSE;
1053   len = (ainew[n] )*sizeof(PetscScalar);
1054   /* the next line frees the default space generated by the Create() */
1055   ierr = PetscFree(b->a);CHKERRQ(ierr);
1056   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1057   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1058   b->j          = ajnew;
1059   b->i          = ainew;
1060   for (i=0; i<n; i++) dloc[i] += ainew[i];
1061   b->diag       = dloc;
1062   b->ilen       = 0;
1063   b->imax       = 0;
1064   b->row        = isrow;
1065   b->col        = iscol;
1066   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1067   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1068   b->icol       = isicol;
1069   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1070   /* In b structure:  Free imax, ilen, old a, old j.
1071      Allocate dloc, solve_work, new a, new j */
1072   ierr = PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1073   b->maxnz             = b->nz = ainew[n] ;
1074   (*fact)->factor = FACTOR_LU;
1075   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1076   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1077   (*fact)->info.factor_mallocs    = reallocs;
1078   (*fact)->info.fill_ratio_given  = f;
1079   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #include "src/mat/impls/sbaij/seq/sbaij.h"
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1086 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1087 {
1088   Mat            C = *B;
1089   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1090   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1091   IS             ip=b->row;
1092   PetscErrorCode ierr;
1093   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1094   PetscInt       *ai=a->i,*aj=a->j;
1095   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1096   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1097   PetscReal      zeropivot,rs,shiftnz;
1098   PetscTruth     shiftpd;
1099   ChShift_Ctx    sctx;
1100   PetscInt       newshift;
1101 
1102   PetscFunctionBegin;
1103   shiftnz   = info->shiftnz;
1104   shiftpd   = info->shiftpd;
1105   zeropivot = info->zeropivot;
1106 
1107   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1108 
1109   /* initialization */
1110   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1111   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1112   jl   = il + mbs;
1113   rtmp = (MatScalar*)(jl + mbs);
1114 
1115   sctx.shift_amount = 0;
1116   sctx.nshift       = 0;
1117   do {
1118     sctx.chshift = PETSC_FALSE;
1119     for (i=0; i<mbs; i++) {
1120       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1121     }
1122 
1123     for (k = 0; k<mbs; k++){
1124       bval = ba + bi[k];
1125       /* initialize k-th row by the perm[k]-th row of A */
1126       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1127       for (j = jmin; j < jmax; j++){
1128         col = rip[aj[j]];
1129         if (col >= k){ /* only take upper triangular entry */
1130           rtmp[col] = aa[j];
1131           *bval++  = 0.0; /* for in-place factorization */
1132         }
1133       }
1134       /* shift the diagonal of the matrix */
1135       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1136 
1137       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1138       dk = rtmp[k];
1139       i = jl[k]; /* first row to be added to k_th row  */
1140 
1141       while (i < k){
1142         nexti = jl[i]; /* next row to be added to k_th row */
1143 
1144         /* compute multiplier, update diag(k) and U(i,k) */
1145         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1146         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1147         dk += uikdi*ba[ili];
1148         ba[ili] = uikdi; /* -U(i,k) */
1149 
1150         /* add multiple of row i to k-th row */
1151         jmin = ili + 1; jmax = bi[i+1];
1152         if (jmin < jmax){
1153           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1154           /* update il and jl for row i */
1155           il[i] = jmin;
1156           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1157         }
1158         i = nexti;
1159       }
1160 
1161       /* shift the diagonals when zero pivot is detected */
1162       /* compute rs=sum of abs(off-diagonal) */
1163       rs   = 0.0;
1164       jmin = bi[k]+1;
1165       nz   = bi[k+1] - jmin;
1166       if (nz){
1167         bcol = bj + jmin;
1168         while (nz--){
1169           rs += PetscAbsScalar(rtmp[*bcol]);
1170           bcol++;
1171         }
1172       }
1173 
1174       sctx.rs = rs;
1175       sctx.pv = dk;
1176       ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr);
1177       if (newshift == 1){
1178         break;    /* sctx.shift_amount is updated */
1179       } else if (newshift == -1){
1180         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1181       }
1182 
1183       /* copy data into U(k,:) */
1184       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1185       jmin = bi[k]+1; jmax = bi[k+1];
1186       if (jmin < jmax) {
1187         for (j=jmin; j<jmax; j++){
1188           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1189         }
1190         /* add the k-th row into il and jl */
1191         il[k] = jmin;
1192         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1193       }
1194     }
1195   } while (sctx.chshift);
1196   ierr = PetscFree(il);CHKERRQ(ierr);
1197 
1198   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1199   C->factor       = FACTOR_CHOLESKY;
1200   C->assembled    = PETSC_TRUE;
1201   C->preallocated = PETSC_TRUE;
1202   PetscLogFlops(C->m);
1203   if (sctx.nshift){
1204     if (shiftnz) {
1205       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1206     } else if (shiftpd) {
1207       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1208     }
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1215 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1216 {
1217   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1218   Mat_SeqSBAIJ   *b;
1219   Mat            B;
1220   PetscErrorCode ierr;
1221   PetscTruth     perm_identity;
1222   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1223   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1224   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1225   PetscInt       ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1226   PetscReal      fill=info->fill,levels=info->levels;
1227   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1228   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1229   PetscBT        lnkbt;
1230 
1231   PetscFunctionBegin;
1232   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1233   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1234 
1235   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1236   ui[0] = 0;
1237 
1238   /* special case that simply copies fill pattern */
1239   if (!levels && perm_identity) {
1240     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1241     for (i=0; i<am; i++) {
1242       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1243     }
1244     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1245     cols = uj;
1246     for (i=0; i<am; i++) {
1247       aj    = a->j + a->diag[i];
1248       ncols = ui[i+1] - ui[i];
1249       for (j=0; j<ncols; j++) *cols++ = *aj++;
1250     }
1251   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1252     /* initialization */
1253     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1254 
1255     /* jl: linked list for storing indices of the pivot rows
1256        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1257     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1258     il         = jl + am;
1259     uj_ptr     = (PetscInt**)(il + am);
1260     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1261     for (i=0; i<am; i++){
1262       jl[i] = am; il[i] = 0;
1263     }
1264 
1265     /* create and initialize a linked list for storing column indices of the active row k */
1266     nlnk = am + 1;
1267     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1268 
1269     /* initial FreeSpace size is fill*(ai[am]+1) */
1270     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1271     current_space = free_space;
1272     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1273     current_space_lvl = free_space_lvl;
1274 
1275     for (k=0; k<am; k++){  /* for each active row k */
1276       /* initialize lnk by the column indices of row rip[k] of A */
1277       nzk   = 0;
1278       ncols = ai[rip[k]+1] - ai[rip[k]];
1279       ncols_upper = 0;
1280       cols        = cols_lvl + am;
1281       for (j=0; j<ncols; j++){
1282         i = rip[*(aj + ai[rip[k]] + j)];
1283         if (i >= k){ /* only take upper triangular entry */
1284           cols[ncols_upper] = i;
1285           cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
1286           ncols_upper++;
1287         }
1288       }
1289       ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1290       nzk += nlnk;
1291 
1292       /* update lnk by computing fill-in for each pivot row to be merged in */
1293       prow = jl[k]; /* 1st pivot row */
1294 
1295       while (prow < k){
1296         nextprow = jl[prow];
1297 
1298         /* merge prow into k-th row */
1299         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1300         jmax = ui[prow+1];
1301         ncols = jmax-jmin;
1302         i     = jmin - ui[prow];
1303         cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1304         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1305         ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1306         nzk += nlnk;
1307 
1308         /* update il and jl for prow */
1309         if (jmin < jmax){
1310           il[prow] = jmin;
1311           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1312         }
1313         prow = nextprow;
1314       }
1315 
1316       /* if free space is not available, make more free space */
1317       if (current_space->local_remaining<nzk) {
1318         i = am - k + 1; /* num of unfactored rows */
1319         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1320         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1321         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1322         reallocs++;
1323       }
1324 
1325       /* copy data into free_space and free_space_lvl, then initialize lnk */
1326       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1327 
1328       /* add the k-th row into il and jl */
1329       if (nzk > 1){
1330         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1331         jl[k] = jl[i]; jl[i] = k;
1332         il[k] = ui[k] + 1;
1333       }
1334       uj_ptr[k]     = current_space->array;
1335       uj_lvl_ptr[k] = current_space_lvl->array;
1336 
1337       current_space->array           += nzk;
1338       current_space->local_used      += nzk;
1339       current_space->local_remaining -= nzk;
1340 
1341       current_space_lvl->array           += nzk;
1342       current_space_lvl->local_used      += nzk;
1343       current_space_lvl->local_remaining -= nzk;
1344 
1345       ui[k+1] = ui[k] + nzk;
1346     }
1347 
1348     if (ai[am] != 0) {
1349       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1350       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1351       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1352       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1353     } else {
1354       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1355     }
1356 
1357     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1358     ierr = PetscFree(jl);CHKERRQ(ierr);
1359     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1360 
1361     /* destroy list of free space and other temporary array(s) */
1362     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1363     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1364     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1365     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1366 
1367   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1368 
1369   /* put together the new matrix in MATSEQSBAIJ format */
1370   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1371   B = *fact;
1372   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1373   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1374 
1375   b    = (Mat_SeqSBAIJ*)B->data;
1376   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1377   b->singlemalloc = PETSC_FALSE;
1378   /* the next line frees the default space generated by the Create() */
1379   ierr = PetscFree(b->a);CHKERRQ(ierr);
1380   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1381   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1382   b->j    = uj;
1383   b->i    = ui;
1384   b->diag = 0;
1385   b->ilen = 0;
1386   b->imax = 0;
1387   b->row  = perm;
1388   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1389   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1390   b->icol = perm;
1391   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1392   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1393   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1394   b->maxnz = b->nz = ui[am];
1395 
1396   B->factor                 = FACTOR_CHOLESKY;
1397   B->info.factor_mallocs    = reallocs;
1398   B->info.fill_ratio_given  = fill;
1399   if (ai[am] != 0) {
1400     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1401   } else {
1402     B->info.fill_ratio_needed = 0.0;
1403   }
1404   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1405   if (perm_identity){
1406     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1407     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1414 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1415 {
1416   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1417   Mat_SeqSBAIJ   *b;
1418   Mat            B;
1419   PetscErrorCode ierr;
1420   PetscTruth     perm_identity;
1421   PetscReal      fill = info->fill;
1422   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1423   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1424   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1425   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1426   PetscBT        lnkbt;
1427   IS             iperm;
1428 
1429   PetscFunctionBegin;
1430   /* check whether perm is the identity mapping */
1431   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1432   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1433 
1434   if (!perm_identity){
1435     /* check if perm is symmetric! */
1436     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1437     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1438     for (i=0; i<am; i++) {
1439       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1440     }
1441     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1442     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1443   }
1444 
1445   /* initialization */
1446   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1447   ui[0] = 0;
1448 
1449   /* jl: linked list for storing indices of the pivot rows
1450      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1451   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1452   il     = jl + am;
1453   cols   = il + am;
1454   ui_ptr = (PetscInt**)(cols + am);
1455   for (i=0; i<am; i++){
1456     jl[i] = am; il[i] = 0;
1457   }
1458 
1459   /* create and initialize a linked list for storing column indices of the active row k */
1460   nlnk = am + 1;
1461   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1462 
1463   /* initial FreeSpace size is fill*(ai[am]+1) */
1464   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1465   current_space = free_space;
1466 
1467   for (k=0; k<am; k++){  /* for each active row k */
1468     /* initialize lnk by the column indices of row rip[k] of A */
1469     nzk   = 0;
1470     ncols = ai[rip[k]+1] - ai[rip[k]];
1471     ncols_upper = 0;
1472     for (j=0; j<ncols; j++){
1473       i = rip[*(aj + ai[rip[k]] + j)];
1474       if (i >= k){ /* only take upper triangular entry */
1475         cols[ncols_upper] = i;
1476         ncols_upper++;
1477       }
1478     }
1479     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1480     nzk += nlnk;
1481 
1482     /* update lnk by computing fill-in for each pivot row to be merged in */
1483     prow = jl[k]; /* 1st pivot row */
1484 
1485     while (prow < k){
1486       nextprow = jl[prow];
1487       /* merge prow into k-th row */
1488       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1489       jmax = ui[prow+1];
1490       ncols = jmax-jmin;
1491       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1492       ierr = PetscLLAddSorted(ncols,uj_ptr,prow,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1493       nzk += nlnk;
1494 
1495       /* update il and jl for prow */
1496       if (jmin < jmax){
1497         il[prow] = jmin;
1498         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1499       }
1500       prow = nextprow;
1501     }
1502 
1503     /* if free space is not available, make more free space */
1504     if (current_space->local_remaining<nzk) {
1505       i = am - k + 1; /* num of unfactored rows */
1506       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1507       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1508       reallocs++;
1509     }
1510 
1511     /* copy data into free space, then initialize lnk */
1512     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1513 
1514     /* add the k-th row into il and jl */
1515     if (nzk-1 > 0){
1516       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1517       jl[k] = jl[i]; jl[i] = k;
1518       il[k] = ui[k] + 1;
1519     }
1520     ui_ptr[k] = current_space->array;
1521     current_space->array           += nzk;
1522     current_space->local_used      += nzk;
1523     current_space->local_remaining -= nzk;
1524 
1525     ui[k+1] = ui[k] + nzk;
1526   }
1527 
1528   if (ai[am] != 0) {
1529     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1530     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1531     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1532     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1533   } else {
1534      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1535   }
1536 
1537   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1538   ierr = PetscFree(jl);CHKERRQ(ierr);
1539 
1540   /* destroy list of free space and other temporary array(s) */
1541   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1542   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1543   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1544 
1545   /* put together the new matrix in MATSEQSBAIJ format */
1546   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1547   B    = *fact;
1548   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1549   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1550 
1551   b = (Mat_SeqSBAIJ*)B->data;
1552   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1553   b->singlemalloc = PETSC_FALSE;
1554   /* the next line frees the default space generated by the Create() */
1555   ierr = PetscFree(b->a);CHKERRQ(ierr);
1556   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1557   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1558   b->j    = uj;
1559   b->i    = ui;
1560   b->diag = 0;
1561   b->ilen = 0;
1562   b->imax = 0;
1563   b->row  = perm;
1564   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1565   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1566   b->icol = perm;
1567   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1568   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1569   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1570   b->maxnz = b->nz = ui[am];
1571 
1572   B->factor                 = FACTOR_CHOLESKY;
1573   B->info.factor_mallocs    = reallocs;
1574   B->info.fill_ratio_given  = fill;
1575   if (ai[am] != 0) {
1576     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1577   } else {
1578     B->info.fill_ratio_needed = 0.0;
1579   }
1580   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1581   if (perm_identity){
1582     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1583     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587