xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision a0a356effb918aab8bf03fb5d2f1228efdf7181a)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
29   /* ------------------------------------------------------------
30 
31           This interface was contribed by Tony Caola
32 
33      This routine is an interface to the pivoting drop-tolerance
34      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
35      SPARSEKIT2.
36 
37      The SPARSEKIT2 routines used here are covered by the GNU
38      copyright; see the file gnu in this directory.
39 
40      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
41      help in getting this routine ironed out.
42 
43      The major drawback to this routine is that if info->fill is
44      not large enough it fails rather than allocating more space;
45      this can be fixed by hacking/improving the f2c version of
46      Yousef Saad's code.
47 
48      ------------------------------------------------------------
49 */
50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
51 {
52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
53   PetscFunctionBegin;
54   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
55   You can obtain the drop tolerance routines by installing PETSc from\n\
56   www.mcs.anl.gov/petsc\n");
57 #else
58   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
59   IS             iscolf,isicol,isirow;
60   PetscTruth     reorder;
61   PetscErrorCode ierr,sierr;
62   PetscInt       *c,*r,*ic,i,n = A->m;
63   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
64   PetscInt       *ordcol,*iwk,*iperm,*jw;
65   PetscInt       jmax,lfill,job,*o_i,*o_j;
66   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
67   PetscReal      af;
68 
69   PetscFunctionBegin;
70 
71   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
72   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
73   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
74   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
75   lfill   = (PetscInt)(info->dtcount/2.0);
76   jmax    = (PetscInt)(info->fill*a->nz);
77 
78 
79   /* ------------------------------------------------------------
80      If reorder=.TRUE., then the original matrix has to be
81      reordered to reflect the user selected ordering scheme, and
82      then de-reordered so it is in it's original format.
83      Because Saad's dperm() is NOT in place, we have to copy
84      the original matrix and allocate more storage. . .
85      ------------------------------------------------------------
86   */
87 
88   /* set reorder to true if either isrow or iscol is not identity */
89   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
90   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
91   reorder = PetscNot(reorder);
92 
93 
94   /* storage for ilu factor */
95   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
98   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
99 
100   /* ------------------------------------------------------------
101      Make sure that everything is Fortran formatted (1-Based)
102      ------------------------------------------------------------
103   */
104   for (i=old_i[0];i<old_i[n];i++) {
105     old_j[i]++;
106   }
107   for(i=0;i<n+1;i++) {
108     old_i[i]++;
109   };
110 
111 
112   if (reorder) {
113     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
114     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
115     for(i=0;i<n;i++) {
116       r[i]  = r[i]+1;
117       c[i]  = c[i]+1;
118     }
119     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
122     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123     for (i=0;i<n;i++) {
124       r[i]  = r[i]-1;
125       c[i]  = c[i]-1;
126     }
127     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
128     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
222   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
223   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
224   (*fact)->factor    = FACTOR_LU;
225   (*fact)->assembled = PETSC_TRUE;
226 
227   b = (Mat_SeqAIJ*)(*fact)->data;
228   ierr = PetscFree(b->imax);CHKERRQ(ierr);
229   b->sorted        = PETSC_FALSE;
230   b->singlemalloc  = PETSC_FALSE;
231   /* the next line frees the default space generated by the MatCreate() */
232   ierr             = PetscFree(b->a);CHKERRQ(ierr);
233   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
248 
249   af = ((double)b->nz)/((double)a->nz) + .001;
250   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
251   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
254 
255   ierr = MatILUDTFactor_Inode(A,info,isrow,iscol,fact);CHKERRQ(ierr);
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
264 {
265   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
266   IS             isicol;
267   PetscErrorCode ierr;
268   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
269   PetscInt       *bi,*bj,*ajtmp;
270   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
271   PetscReal      f;
272   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
273   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
274   PetscBT        lnkbt;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
284   bi[0] = 0;
285 
286   /* bdiag is location of diagonal in factor */
287   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
288   bdiag[0] = 0;
289 
290   /* linked list for storing column indices of the active row */
291   nlnk = n + 1;
292   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
293 
294   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
295   im     = cols + n;
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
310     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1;
317       ajtmp   = bi_ptr[row] + nzbd;
318       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
319       im[row] = nzbd;
320       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
321       nzi     += nlnk;
322       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
323 
324       row = lnk[row];
325     }
326 
327     bi[i+1] = bi[i] + nzi;
328     im[i]   = nzi;
329 
330     /* mark bdiag */
331     nzbd = 0;
332     nnz  = nzi;
333     k    = lnk[n];
334     while (nnz-- && k < i){
335       nzbd++;
336       k = lnk[k];
337     }
338     bdiag[i] = bi[i] + nzbd;
339 
340     /* if free space is not available, make more free space */
341     if (current_space->local_remaining<nzi) {
342       nnz = (n - i)*nzi; /* estimated and max additional space needed */
343       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
344       reallocs++;
345     }
346 
347     /* copy data into free space, then initialize lnk */
348     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
349     bi_ptr[i] = current_space->array;
350     current_space->array           += nzi;
351     current_space->local_used      += nzi;
352     current_space->local_remaining -= nzi;
353   }
354   if (ai[n] != 0) {
355     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
356     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
357     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
358     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
359     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
360   } else {
361     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
362   }
363 
364   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
365   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
366 
367   /* destroy list of free space and other temporary array(s) */
368   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
369   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
370   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
371   ierr = PetscFree(cols);CHKERRQ(ierr);
372 
373   /* put together the new matrix */
374   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
375   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
376   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
377   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
378   b    = (Mat_SeqAIJ*)(*B)->data;
379   ierr = PetscFree(b->imax);CHKERRQ(ierr);
380   b->singlemalloc = PETSC_FALSE;
381   /* the next line frees the default space generated by the Create() */
382   ierr = PetscFree(b->a);CHKERRQ(ierr);
383   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
384   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
385   b->j          = bj;
386   b->i          = bi;
387   b->diag       = bdiag;
388   b->ilen       = 0;
389   b->imax       = 0;
390   b->row        = isrow;
391   b->col        = iscol;
392   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
393   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
394   b->icol       = isicol;
395   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
396 
397   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
398   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
399   b->maxnz = b->nz = bi[n] ;
400 
401   (*B)->factor                 =  FACTOR_LU;
402   (*B)->info.factor_mallocs    = reallocs;
403   (*B)->info.fill_ratio_given  = f;
404 
405   if (ai[n] != 0) {
406     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
407   } else {
408     (*B)->info.fill_ratio_needed = 0.0;
409   }
410   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412   PetscFunctionReturn(0);
413 }
414 
415 /* ----------------------------------------------------------- */
416 #undef __FUNCT__
417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
419 {
420   Mat            C=*B;
421   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
422   IS             isrow = b->row,isicol = b->icol;
423   PetscErrorCode ierr;
424   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
425   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
426   PetscInt       *diag_offset = b->diag,diag,*pj;
427   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
428   PetscReal      zeropivot,rs,d,shift_nz;
429   PetscReal      row_shift,shift_top=0.;
430   PetscTruth     shift_pd;
431   LUShift_Ctx    sctx;
432   PetscInt       newshift;
433 
434   PetscFunctionBegin;
435   shift_nz  = info->shiftnz;
436   shift_pd  = info->shiftpd;
437   zeropivot = info->zeropivot;
438 
439   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
440   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
441   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
442   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
443   rtmps = rtmp; ics = ic;
444 
445   if (!a->diag) {
446     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
447   }
448   /* if both shift schemes are chosen by user, only use shift_pd */
449   if (shift_pd && shift_nz) shift_nz = 0.0;
450   if (shift_pd) { /* set shift_top=max{row_shift} */
451     PetscInt *aai = a->i,*ddiag = a->diag;
452     shift_top = 0;
453     for (i=0; i<n; i++) {
454       d = PetscRealPart((a->a)[ddiag[i]]);
455       /* calculate amt of shift needed for this row */
456       if (d<=0) {
457         row_shift = 0;
458       } else {
459         row_shift = -2*d;
460       }
461       v  = a->a+aai[i];
462       nz = aai[i+1] - aai[i];
463       for (j=0; j<nz; j++)
464 	row_shift += PetscAbsScalar(v[j]);
465       if (row_shift>shift_top) shift_top = row_shift;
466     }
467     if (shift_top == 0.0) shift_top += 1.e-12;
468     sctx.shift_top    = shift_top;
469     sctx.nshift_max   = 5;
470     sctx.shift_lo     = 0.;
471     sctx.shift_hi     = 1.;
472   }
473 
474   sctx.shift_amount = 0;
475   sctx.nshift       = 0;
476   do {
477     sctx.lushift = PETSC_FALSE;
478     for (i=0; i<n; i++){
479       nz    = bi[i+1] - bi[i];
480       bjtmp = bj + bi[i];
481       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
482 
483       /* load in initial (unfactored row) */
484       nz    = a->i[r[i]+1] - a->i[r[i]];
485       ajtmp = a->j + a->i[r[i]];
486       v     = a->a + a->i[r[i]];
487       for (j=0; j<nz; j++) {
488         rtmp[ics[ajtmp[j]]] = v[j];
489       }
490       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
491 
492       row = *bjtmp++;
493       while  (row < i) {
494         pc = rtmp + row;
495         if (*pc != 0.0) {
496           pv         = b->a + diag_offset[row];
497           pj         = b->j + diag_offset[row] + 1;
498           multiplier = *pc / *pv++;
499           *pc        = multiplier;
500           nz         = bi[row+1] - diag_offset[row] - 1;
501           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
502           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
503         }
504         row = *bjtmp++;
505       }
506       /* finished row so stick it into b->a */
507       pv   = b->a + bi[i] ;
508       pj   = b->j + bi[i] ;
509       nz   = bi[i+1] - bi[i];
510       diag = diag_offset[i] - bi[i];
511       rs   = 0.0;
512       for (j=0; j<nz; j++) {
513         pv[j] = rtmps[pj[j]];
514         if (j != diag) rs += PetscAbsScalar(pv[j]);
515       }
516 
517       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
518       sctx.rs  = rs;
519       sctx.pv  = pv[diag];
520       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
521       if (newshift == 1){
522         break;    /* sctx.shift_amount is updated */
523       } else if (newshift == -1){
524         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
525       }
526     }
527 
528     if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
529       /*
530        * if no shift in this attempt & shifting & started shifting & can refine,
531        * then try lower shift
532        */
533       sctx.shift_hi        = info->shift_fraction;
534       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
535       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
536       sctx.lushift         = PETSC_TRUE;
537       sctx.nshift++;
538     }
539   } while (sctx.lushift);
540 
541   /* invert diagonal entries for simplier triangular solves */
542   for (i=0; i<n; i++) {
543     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
544   }
545 
546   ierr = PetscFree(rtmp);CHKERRQ(ierr);
547   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
548   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
549   C->factor = FACTOR_LU;
550   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
551   C->assembled = PETSC_TRUE;
552   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
553   if (sctx.nshift){
554     if (shift_nz) {
555       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
556     } else if (shift_pd) {
557       b->lu_shift_fraction = info->shift_fraction;
558       PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,shift_top);
559     }
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
566 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
567 {
568   PetscFunctionBegin;
569   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
570   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
571   PetscFunctionReturn(0);
572 }
573 
574 
575 /* ----------------------------------------------------------- */
576 #undef __FUNCT__
577 #define __FUNCT__ "MatLUFactor_SeqAIJ"
578 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
579 {
580   PetscErrorCode ierr;
581   Mat            C;
582 
583   PetscFunctionBegin;
584   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
585   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
586   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
587   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 /* ----------------------------------------------------------- */
591 #undef __FUNCT__
592 #define __FUNCT__ "MatSolve_SeqAIJ"
593 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
594 {
595   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
596   IS             iscol = a->col,isrow = a->row;
597   PetscErrorCode ierr;
598   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
599   PetscInt       nz,*rout,*cout;
600   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
601 
602   PetscFunctionBegin;
603   if (!n) PetscFunctionReturn(0);
604 
605   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
606   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
607   tmp  = a->solve_work;
608 
609   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
610   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
611 
612   /* forward solve the lower triangular */
613   tmp[0] = b[*r++];
614   tmps   = tmp;
615   for (i=1; i<n; i++) {
616     v   = aa + ai[i] ;
617     vi  = aj + ai[i] ;
618     nz  = a->diag[i] - ai[i];
619     sum = b[*r++];
620     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
621     tmp[i] = sum;
622   }
623 
624   /* backward solve the upper triangular */
625   for (i=n-1; i>=0; i--){
626     v   = aa + a->diag[i] + 1;
627     vi  = aj + a->diag[i] + 1;
628     nz  = ai[i+1] - a->diag[i] - 1;
629     sum = tmp[i];
630     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
631     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
632   }
633 
634   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
635   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
636   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
637   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
638   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
639   PetscFunctionReturn(0);
640 }
641 
642 /* ----------------------------------------------------------- */
643 #undef __FUNCT__
644 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
645 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
646 {
647   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
648   PetscErrorCode ierr;
649   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
650   PetscScalar    *x,*b,*aa = a->a;
651 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
652   PetscInt       adiag_i,i,*vi,nz,ai_i;
653   PetscScalar    *v,sum;
654 #endif
655 
656   PetscFunctionBegin;
657   if (!n) PetscFunctionReturn(0);
658 
659   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
660   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
661 
662 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
663   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
664 #else
665   /* forward solve the lower triangular */
666   x[0] = b[0];
667   for (i=1; i<n; i++) {
668     ai_i = ai[i];
669     v    = aa + ai_i;
670     vi   = aj + ai_i;
671     nz   = adiag[i] - ai_i;
672     sum  = b[i];
673     while (nz--) sum -= *v++ * x[*vi++];
674     x[i] = sum;
675   }
676 
677   /* backward solve the upper triangular */
678   for (i=n-1; i>=0; i--){
679     adiag_i = adiag[i];
680     v       = aa + adiag_i + 1;
681     vi      = aj + adiag_i + 1;
682     nz      = ai[i+1] - adiag_i - 1;
683     sum     = x[i];
684     while (nz--) sum -= *v++ * x[*vi++];
685     x[i]    = sum*aa[adiag_i];
686   }
687 #endif
688   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
689   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
690   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
696 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
697 {
698   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
699   IS             iscol = a->col,isrow = a->row;
700   PetscErrorCode ierr;
701   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
702   PetscInt       nz,*rout,*cout;
703   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
704 
705   PetscFunctionBegin;
706   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
707 
708   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   tmp  = a->solve_work;
711 
712   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
713   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
714 
715   /* forward solve the lower triangular */
716   tmp[0] = b[*r++];
717   for (i=1; i<n; i++) {
718     v   = aa + ai[i] ;
719     vi  = aj + ai[i] ;
720     nz  = a->diag[i] - ai[i];
721     sum = b[*r++];
722     while (nz--) sum -= *v++ * tmp[*vi++ ];
723     tmp[i] = sum;
724   }
725 
726   /* backward solve the upper triangular */
727   for (i=n-1; i>=0; i--){
728     v   = aa + a->diag[i] + 1;
729     vi  = aj + a->diag[i] + 1;
730     nz  = ai[i+1] - a->diag[i] - 1;
731     sum = tmp[i];
732     while (nz--) sum -= *v++ * tmp[*vi++ ];
733     tmp[i] = sum*aa[a->diag[i]];
734     x[*c--] += tmp[i];
735   }
736 
737   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
739   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
740   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
741   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
742 
743   PetscFunctionReturn(0);
744 }
745 /* -------------------------------------------------------------------*/
746 #undef __FUNCT__
747 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
748 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
749 {
750   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
751   IS             iscol = a->col,isrow = a->row;
752   PetscErrorCode ierr;
753   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
754   PetscInt       nz,*rout,*cout,*diag = a->diag;
755   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
756 
757   PetscFunctionBegin;
758   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
759   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
760   tmp  = a->solve_work;
761 
762   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
763   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
764 
765   /* copy the b into temp work space according to permutation */
766   for (i=0; i<n; i++) tmp[i] = b[c[i]];
767 
768   /* forward solve the U^T */
769   for (i=0; i<n; i++) {
770     v   = aa + diag[i] ;
771     vi  = aj + diag[i] + 1;
772     nz  = ai[i+1] - diag[i] - 1;
773     s1  = tmp[i];
774     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
775     while (nz--) {
776       tmp[*vi++ ] -= (*v++)*s1;
777     }
778     tmp[i] = s1;
779   }
780 
781   /* backward solve the L^T */
782   for (i=n-1; i>=0; i--){
783     v   = aa + diag[i] - 1 ;
784     vi  = aj + diag[i] - 1 ;
785     nz  = diag[i] - ai[i];
786     s1  = tmp[i];
787     while (nz--) {
788       tmp[*vi-- ] -= (*v--)*s1;
789     }
790   }
791 
792   /* copy tmp into x according to permutation */
793   for (i=0; i<n; i++) x[r[i]] = tmp[i];
794 
795   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
796   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
797   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
798   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
799 
800   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
806 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
807 {
808   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
809   IS             iscol = a->col,isrow = a->row;
810   PetscErrorCode ierr;
811   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
812   PetscInt       nz,*rout,*cout,*diag = a->diag;
813   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
814 
815   PetscFunctionBegin;
816   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
817 
818   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820   tmp = a->solve_work;
821 
822   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
823   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
824 
825   /* copy the b into temp work space according to permutation */
826   for (i=0; i<n; i++) tmp[i] = b[c[i]];
827 
828   /* forward solve the U^T */
829   for (i=0; i<n; i++) {
830     v   = aa + diag[i] ;
831     vi  = aj + diag[i] + 1;
832     nz  = ai[i+1] - diag[i] - 1;
833     tmp[i] *= *v++;
834     while (nz--) {
835       tmp[*vi++ ] -= (*v++)*tmp[i];
836     }
837   }
838 
839   /* backward solve the L^T */
840   for (i=n-1; i>=0; i--){
841     v   = aa + diag[i] - 1 ;
842     vi  = aj + diag[i] - 1 ;
843     nz  = diag[i] - ai[i];
844     while (nz--) {
845       tmp[*vi-- ] -= (*v--)*tmp[i];
846     }
847   }
848 
849   /* copy tmp into x according to permutation */
850   for (i=0; i<n; i++) x[r[i]] += tmp[i];
851 
852   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
853   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
854   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 /* ----------------------------------------------------------------*/
861 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
865 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
866 {
867   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
868   IS             isicol;
869   PetscErrorCode ierr;
870   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
871   PetscInt       *bi,*cols,nnz,*cols_lvl;
872   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
873   PetscInt       i,levels,diagonal_fill;
874   PetscTruth     col_identity,row_identity;
875   PetscReal      f;
876   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
877   PetscBT        lnkbt;
878   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
879   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
880   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
881 
882   PetscFunctionBegin;
883   f             = info->fill;
884   levels        = (PetscInt)info->levels;
885   diagonal_fill = (PetscInt)info->diagonal_fill;
886   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
887 
888   /* special case that simply copies fill pattern */
889   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
890   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
891   if (!levels && row_identity && col_identity) {
892     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
893     (*fact)->factor = FACTOR_LU;
894     b               = (Mat_SeqAIJ*)(*fact)->data;
895     if (!b->diag) {
896       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
897     }
898     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
899     b->row              = isrow;
900     b->col              = iscol;
901     b->icol             = isicol;
902     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
903     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
904     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
905     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
906     PetscFunctionReturn(0);
907   }
908 
909   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
910   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
911 
912   /* get new row pointers */
913   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
914   bi[0] = 0;
915   /* bdiag is location of diagonal in factor */
916   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
917   bdiag[0]  = 0;
918 
919   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
920   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
921 
922   /* create a linked list for storing column indices of the active row */
923   nlnk = n + 1;
924   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
925 
926   /* initial FreeSpace size is f*(ai[n]+1) */
927   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
928   current_space = free_space;
929   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
930   current_space_lvl = free_space_lvl;
931 
932   for (i=0; i<n; i++) {
933     nzi = 0;
934     /* copy current row into linked list */
935     nnz  = ai[r[i]+1] - ai[r[i]];
936     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
937     cols = aj + ai[r[i]];
938     lnk[i] = -1; /* marker to indicate if diagonal exists */
939     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
940     nzi += nlnk;
941 
942     /* make sure diagonal entry is included */
943     if (diagonal_fill && lnk[i] == -1) {
944       fm = n;
945       while (lnk[fm] < i) fm = lnk[fm];
946       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
947       lnk[fm]    = i;
948       lnk_lvl[i] = 0;
949       nzi++; dcount++;
950     }
951 
952     /* add pivot rows into the active row */
953     nzbd = 0;
954     prow = lnk[n];
955     while (prow < i) {
956       nnz      = bdiag[prow];
957       cols     = bj_ptr[prow] + nnz + 1;
958       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
959       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
960       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
961       nzi += nlnk;
962       prow = lnk[prow];
963       nzbd++;
964     }
965     bdiag[i] = nzbd;
966     bi[i+1]  = bi[i] + nzi;
967 
968     /* if free space is not available, make more free space */
969     if (current_space->local_remaining<nzi) {
970       nnz = nzi*(n - i); /* estimated and max additional space needed */
971       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
972       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
973       reallocs++;
974     }
975 
976     /* copy data into free_space and free_space_lvl, then initialize lnk */
977     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
978     bj_ptr[i]    = current_space->array;
979     bjlvl_ptr[i] = current_space_lvl->array;
980 
981     /* make sure the active row i has diagonal entry */
982     if (*(bj_ptr[i]+bdiag[i]) != i) {
983       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
984     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
985     }
986 
987     current_space->array           += nzi;
988     current_space->local_used      += nzi;
989     current_space->local_remaining -= nzi;
990     current_space_lvl->array           += nzi;
991     current_space_lvl->local_used      += nzi;
992     current_space_lvl->local_remaining -= nzi;
993   }
994 
995   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
996   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
997 
998   /* destroy list of free space and other temporary arrays */
999   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1000   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1001   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1002   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1003   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1004 
1005   {
1006     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1007     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1008     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1009     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1010     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1011     if (diagonal_fill) {
1012       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1013     }
1014   }
1015 
1016   /* put together the new matrix */
1017   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1018   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1019   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1020   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1021   b = (Mat_SeqAIJ*)(*fact)->data;
1022   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1023   b->singlemalloc = PETSC_FALSE;
1024   len = (bi[n] )*sizeof(PetscScalar);
1025   /* the next line frees the default space generated by the Create() */
1026   ierr = PetscFree(b->a);CHKERRQ(ierr);
1027   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1028   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1029   b->j          = bj;
1030   b->i          = bi;
1031   for (i=0; i<n; i++) bdiag[i] += bi[i];
1032   b->diag       = bdiag;
1033   b->ilen       = 0;
1034   b->imax       = 0;
1035   b->row        = isrow;
1036   b->col        = iscol;
1037   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1038   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1039   b->icol       = isicol;
1040   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1041   /* In b structure:  Free imax, ilen, old a, old j.
1042      Allocate bdiag, solve_work, new a, new j */
1043   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1044   b->maxnz             = b->nz = bi[n] ;
1045   (*fact)->factor = FACTOR_LU;
1046   (*fact)->info.factor_mallocs    = reallocs;
1047   (*fact)->info.fill_ratio_given  = f;
1048   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1049 
1050   ierr = MatILUFactorSymbolic_Inode(A,info,isrow,iscol,fact);CHKERRQ(ierr);
1051   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1052 
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #include "src/mat/impls/sbaij/seq/sbaij.h"
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1059 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1060 {
1061   Mat            C = *B;
1062   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1063   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1064   IS             ip=b->row;
1065   PetscErrorCode ierr;
1066   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1067   PetscInt       *ai=a->i,*aj=a->j;
1068   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1069   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1070   PetscReal      zeropivot,rs,shiftnz;
1071   PetscTruth     shiftpd;
1072   ChShift_Ctx    sctx;
1073   PetscInt       newshift;
1074 
1075   PetscFunctionBegin;
1076   shiftnz   = info->shiftnz;
1077   shiftpd   = info->shiftpd;
1078   zeropivot = info->zeropivot;
1079 
1080   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1081 
1082   /* initialization */
1083   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1084   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1085   jl   = il + mbs;
1086   rtmp = (MatScalar*)(jl + mbs);
1087 
1088   sctx.shift_amount = 0;
1089   sctx.nshift       = 0;
1090   do {
1091     sctx.chshift = PETSC_FALSE;
1092     for (i=0; i<mbs; i++) {
1093       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1094     }
1095 
1096     for (k = 0; k<mbs; k++){
1097       bval = ba + bi[k];
1098       /* initialize k-th row by the perm[k]-th row of A */
1099       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1100       for (j = jmin; j < jmax; j++){
1101         col = rip[aj[j]];
1102         if (col >= k){ /* only take upper triangular entry */
1103           rtmp[col] = aa[j];
1104           *bval++  = 0.0; /* for in-place factorization */
1105         }
1106       }
1107       /* shift the diagonal of the matrix */
1108       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1109 
1110       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1111       dk = rtmp[k];
1112       i = jl[k]; /* first row to be added to k_th row  */
1113 
1114       while (i < k){
1115         nexti = jl[i]; /* next row to be added to k_th row */
1116 
1117         /* compute multiplier, update diag(k) and U(i,k) */
1118         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1119         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1120         dk += uikdi*ba[ili];
1121         ba[ili] = uikdi; /* -U(i,k) */
1122 
1123         /* add multiple of row i to k-th row */
1124         jmin = ili + 1; jmax = bi[i+1];
1125         if (jmin < jmax){
1126           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1127           /* update il and jl for row i */
1128           il[i] = jmin;
1129           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1130         }
1131         i = nexti;
1132       }
1133 
1134       /* shift the diagonals when zero pivot is detected */
1135       /* compute rs=sum of abs(off-diagonal) */
1136       rs   = 0.0;
1137       jmin = bi[k]+1;
1138       nz   = bi[k+1] - jmin;
1139       if (nz){
1140         bcol = bj + jmin;
1141         while (nz--){
1142           rs += PetscAbsScalar(rtmp[*bcol]);
1143           bcol++;
1144         }
1145       }
1146 
1147       sctx.rs = rs;
1148       sctx.pv = dk;
1149       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1150       if (newshift == 1){
1151         break;    /* sctx.shift_amount is updated */
1152       } else if (newshift == -1){
1153         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1154       }
1155 
1156       /* copy data into U(k,:) */
1157       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1158       jmin = bi[k]+1; jmax = bi[k+1];
1159       if (jmin < jmax) {
1160         for (j=jmin; j<jmax; j++){
1161           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1162         }
1163         /* add the k-th row into il and jl */
1164         il[k] = jmin;
1165         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1166       }
1167     }
1168   } while (sctx.chshift);
1169   ierr = PetscFree(il);CHKERRQ(ierr);
1170 
1171   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1172   C->factor       = FACTOR_CHOLESKY;
1173   C->assembled    = PETSC_TRUE;
1174   C->preallocated = PETSC_TRUE;
1175   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1176   if (sctx.nshift){
1177     if (shiftnz) {
1178       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1179     } else if (shiftpd) {
1180       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1181     }
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1188 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1189 {
1190   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1191   Mat_SeqSBAIJ   *b;
1192   Mat            B;
1193   PetscErrorCode ierr;
1194   PetscTruth     perm_identity;
1195   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1196   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1197   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1198   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1199   PetscReal      fill=info->fill,levels=info->levels;
1200   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1201   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1202   PetscBT        lnkbt;
1203 
1204   PetscFunctionBegin;
1205   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1206   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1207 
1208   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1209   ui[0] = 0;
1210 
1211   /* special case that simply copies fill pattern */
1212   if (!levels && perm_identity) {
1213     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1214     for (i=0; i<am; i++) {
1215       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1216     }
1217     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1218     cols = uj;
1219     for (i=0; i<am; i++) {
1220       aj    = a->j + a->diag[i];
1221       ncols = ui[i+1] - ui[i];
1222       for (j=0; j<ncols; j++) *cols++ = *aj++;
1223     }
1224   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1225     /* initialization */
1226     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1227 
1228     /* jl: linked list for storing indices of the pivot rows
1229        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1230     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1231     il         = jl + am;
1232     uj_ptr     = (PetscInt**)(il + am);
1233     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1234     for (i=0; i<am; i++){
1235       jl[i] = am; il[i] = 0;
1236     }
1237 
1238     /* create and initialize a linked list for storing column indices of the active row k */
1239     nlnk = am + 1;
1240     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1241 
1242     /* initial FreeSpace size is fill*(ai[am]+1) */
1243     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1244     current_space = free_space;
1245     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1246     current_space_lvl = free_space_lvl;
1247 
1248     for (k=0; k<am; k++){  /* for each active row k */
1249       /* initialize lnk by the column indices of row rip[k] of A */
1250       nzk   = 0;
1251       ncols = ai[rip[k]+1] - ai[rip[k]];
1252       ncols_upper = 0;
1253       for (j=0; j<ncols; j++){
1254         i = *(aj + ai[rip[k]] + j);
1255         if (rip[i] >= k){ /* only take upper triangular entry */
1256           ajtmp[ncols_upper] = i;
1257           ncols_upper++;
1258         }
1259       }
1260       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1261       nzk += nlnk;
1262 
1263       /* update lnk by computing fill-in for each pivot row to be merged in */
1264       prow = jl[k]; /* 1st pivot row */
1265 
1266       while (prow < k){
1267         nextprow = jl[prow];
1268 
1269         /* merge prow into k-th row */
1270         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1271         jmax = ui[prow+1];
1272         ncols = jmax-jmin;
1273         i     = jmin - ui[prow];
1274         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1275         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1276         j     = *(uj - 1);
1277         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1278         nzk += nlnk;
1279 
1280         /* update il and jl for prow */
1281         if (jmin < jmax){
1282           il[prow] = jmin;
1283           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1284         }
1285         prow = nextprow;
1286       }
1287 
1288       /* if free space is not available, make more free space */
1289       if (current_space->local_remaining<nzk) {
1290         i = am - k + 1; /* num of unfactored rows */
1291         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1292         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1293         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1294         reallocs++;
1295       }
1296 
1297       /* copy data into free_space and free_space_lvl, then initialize lnk */
1298       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1299 
1300       /* add the k-th row into il and jl */
1301       if (nzk > 1){
1302         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1303         jl[k] = jl[i]; jl[i] = k;
1304         il[k] = ui[k] + 1;
1305       }
1306       uj_ptr[k]     = current_space->array;
1307       uj_lvl_ptr[k] = current_space_lvl->array;
1308 
1309       current_space->array           += nzk;
1310       current_space->local_used      += nzk;
1311       current_space->local_remaining -= nzk;
1312 
1313       current_space_lvl->array           += nzk;
1314       current_space_lvl->local_used      += nzk;
1315       current_space_lvl->local_remaining -= nzk;
1316 
1317       ui[k+1] = ui[k] + nzk;
1318     }
1319 
1320     if (ai[am] != 0) {
1321       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1322       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1323       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1324       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1325     } else {
1326       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1327     }
1328 
1329     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1330     ierr = PetscFree(jl);CHKERRQ(ierr);
1331     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1332 
1333     /* destroy list of free space and other temporary array(s) */
1334     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1335     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1336     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1337     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1338 
1339   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1340 
1341   /* put together the new matrix in MATSEQSBAIJ format */
1342   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1343   B = *fact;
1344   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1345   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1346 
1347   b    = (Mat_SeqSBAIJ*)B->data;
1348   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1349   b->singlemalloc = PETSC_FALSE;
1350   /* the next line frees the default space generated by the Create() */
1351   ierr = PetscFree(b->a);CHKERRQ(ierr);
1352   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1353   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1354   b->j    = uj;
1355   b->i    = ui;
1356   b->diag = 0;
1357   b->ilen = 0;
1358   b->imax = 0;
1359   b->row  = perm;
1360   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1361   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1362   b->icol = perm;
1363   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1364   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1365   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1366   b->maxnz = b->nz = ui[am];
1367 
1368   B->factor                 = FACTOR_CHOLESKY;
1369   B->info.factor_mallocs    = reallocs;
1370   B->info.fill_ratio_given  = fill;
1371   if (ai[am] != 0) {
1372     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1373   } else {
1374     B->info.fill_ratio_needed = 0.0;
1375   }
1376   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1377   if (perm_identity){
1378     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1379     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1386 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1387 {
1388   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1389   Mat_SeqSBAIJ   *b;
1390   Mat            B;
1391   PetscErrorCode ierr;
1392   PetscTruth     perm_identity;
1393   PetscReal      fill = info->fill;
1394   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1395   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1396   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1397   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1398   PetscBT        lnkbt;
1399   IS             iperm;
1400 
1401   PetscFunctionBegin;
1402   /* check whether perm is the identity mapping */
1403   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1404   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1405 
1406   if (!perm_identity){
1407     /* check if perm is symmetric! */
1408     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1409     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1410     for (i=0; i<am; i++) {
1411       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1412     }
1413     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1414     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1415   }
1416 
1417   /* initialization */
1418   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1419   ui[0] = 0;
1420 
1421   /* jl: linked list for storing indices of the pivot rows
1422      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1423   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1424   il     = jl + am;
1425   cols   = il + am;
1426   ui_ptr = (PetscInt**)(cols + am);
1427   for (i=0; i<am; i++){
1428     jl[i] = am; il[i] = 0;
1429   }
1430 
1431   /* create and initialize a linked list for storing column indices of the active row k */
1432   nlnk = am + 1;
1433   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1434 
1435   /* initial FreeSpace size is fill*(ai[am]+1) */
1436   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1437   current_space = free_space;
1438 
1439   for (k=0; k<am; k++){  /* for each active row k */
1440     /* initialize lnk by the column indices of row rip[k] of A */
1441     nzk   = 0;
1442     ncols = ai[rip[k]+1] - ai[rip[k]];
1443     ncols_upper = 0;
1444     for (j=0; j<ncols; j++){
1445       i = rip[*(aj + ai[rip[k]] + j)];
1446       if (i >= k){ /* only take upper triangular entry */
1447         cols[ncols_upper] = i;
1448         ncols_upper++;
1449       }
1450     }
1451     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1452     nzk += nlnk;
1453 
1454     /* update lnk by computing fill-in for each pivot row to be merged in */
1455     prow = jl[k]; /* 1st pivot row */
1456 
1457     while (prow < k){
1458       nextprow = jl[prow];
1459       /* merge prow into k-th row */
1460       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1461       jmax = ui[prow+1];
1462       ncols = jmax-jmin;
1463       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1464       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1465       nzk += nlnk;
1466 
1467       /* update il and jl for prow */
1468       if (jmin < jmax){
1469         il[prow] = jmin;
1470         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1471       }
1472       prow = nextprow;
1473     }
1474 
1475     /* if free space is not available, make more free space */
1476     if (current_space->local_remaining<nzk) {
1477       i = am - k + 1; /* num of unfactored rows */
1478       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1479       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1480       reallocs++;
1481     }
1482 
1483     /* copy data into free space, then initialize lnk */
1484     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1485 
1486     /* add the k-th row into il and jl */
1487     if (nzk-1 > 0){
1488       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1489       jl[k] = jl[i]; jl[i] = k;
1490       il[k] = ui[k] + 1;
1491     }
1492     ui_ptr[k] = current_space->array;
1493     current_space->array           += nzk;
1494     current_space->local_used      += nzk;
1495     current_space->local_remaining -= nzk;
1496 
1497     ui[k+1] = ui[k] + nzk;
1498   }
1499 
1500   if (ai[am] != 0) {
1501     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1502     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1503     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1504     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1505   } else {
1506      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1507   }
1508 
1509   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1510   ierr = PetscFree(jl);CHKERRQ(ierr);
1511 
1512   /* destroy list of free space and other temporary array(s) */
1513   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1514   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1515   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1516 
1517   /* put together the new matrix in MATSEQSBAIJ format */
1518   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1519   B    = *fact;
1520   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1521   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1522 
1523   b = (Mat_SeqSBAIJ*)B->data;
1524   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1525   b->singlemalloc = PETSC_FALSE;
1526   /* the next line frees the default space generated by the Create() */
1527   ierr = PetscFree(b->a);CHKERRQ(ierr);
1528   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1529   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1530   b->j    = uj;
1531   b->i    = ui;
1532   b->diag = 0;
1533   b->ilen = 0;
1534   b->imax = 0;
1535   b->row  = perm;
1536   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1537   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1538   b->icol = perm;
1539   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1540   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1541   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1542   b->maxnz = b->nz = ui[am];
1543 
1544   B->factor                 = FACTOR_CHOLESKY;
1545   B->info.factor_mallocs    = reallocs;
1546   B->info.fill_ratio_given  = fill;
1547   if (ai[am] != 0) {
1548     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1549   } else {
1550     B->info.fill_ratio_needed = 0.0;
1551   }
1552   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1553   if (perm_identity){
1554     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1555     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559