xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision da7a1d00dfe608d858874203c214eae652b1dd2d)
1 
2 #include "src/mat/impls/aij/seq/aij.h"
3 #include "src/inline/dot.h"
4 #include "src/inline/spops.h"
5 #include "petscbt.h"
6 #include "src/mat/utils/freespace.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
22 
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
29   /* ------------------------------------------------------------
30 
31           This interface was contribed by Tony Caola
32 
33      This routine is an interface to the pivoting drop-tolerance
34      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
35      SPARSEKIT2.
36 
37      The SPARSEKIT2 routines used here are covered by the GNU
38      copyright; see the file gnu in this directory.
39 
40      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
41      help in getting this routine ironed out.
42 
43      The major drawback to this routine is that if info->fill is
44      not large enough it fails rather than allocating more space;
45      this can be fixed by hacking/improving the f2c version of
46      Yousef Saad's code.
47 
48      ------------------------------------------------------------
49 */
50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
51 {
52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
53   PetscFunctionBegin;
54   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
55   You can obtain the drop tolerance routines by installing PETSc from\n\
56   www.mcs.anl.gov/petsc\n");
57 #else
58   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
59   IS             iscolf,isicol,isirow;
60   PetscTruth     reorder;
61   PetscErrorCode ierr,sierr;
62   PetscInt       *c,*r,*ic,i,n = A->m;
63   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
64   PetscInt       *ordcol,*iwk,*iperm,*jw;
65   PetscInt       jmax,lfill,job,*o_i,*o_j;
66   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
67   PetscReal      af;
68 
69   PetscFunctionBegin;
70 
71   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
72   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
73   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
74   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
75   lfill   = (PetscInt)(info->dtcount/2.0);
76   jmax    = (PetscInt)(info->fill*a->nz);
77 
78 
79   /* ------------------------------------------------------------
80      If reorder=.TRUE., then the original matrix has to be
81      reordered to reflect the user selected ordering scheme, and
82      then de-reordered so it is in it's original format.
83      Because Saad's dperm() is NOT in place, we have to copy
84      the original matrix and allocate more storage. . .
85      ------------------------------------------------------------
86   */
87 
88   /* set reorder to true if either isrow or iscol is not identity */
89   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
90   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
91   reorder = PetscNot(reorder);
92 
93 
94   /* storage for ilu factor */
95   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
96   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
98   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
99 
100   /* ------------------------------------------------------------
101      Make sure that everything is Fortran formatted (1-Based)
102      ------------------------------------------------------------
103   */
104   for (i=old_i[0];i<old_i[n];i++) {
105     old_j[i]++;
106   }
107   for(i=0;i<n+1;i++) {
108     old_i[i]++;
109   };
110 
111 
112   if (reorder) {
113     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
114     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
115     for(i=0;i<n;i++) {
116       r[i]  = r[i]+1;
117       c[i]  = c[i]+1;
118     }
119     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
120     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
122     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123     for (i=0;i<n;i++) {
124       r[i]  = r[i]-1;
125       c[i]  = c[i]-1;
126     }
127     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
128     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
222   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
223   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
224   (*fact)->factor    = FACTOR_LU;
225   (*fact)->assembled = PETSC_TRUE;
226 
227   b = (Mat_SeqAIJ*)(*fact)->data;
228   ierr = PetscFree(b->imax);CHKERRQ(ierr);
229   b->sorted        = PETSC_FALSE;
230   b->singlemalloc  = PETSC_FALSE;
231   /* the next line frees the default space generated by the MatCreate() */
232   ierr             = PetscFree(b->a);CHKERRQ(ierr);
233   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
248 
249   af = ((double)b->nz)/((double)a->nz) + .001;
250   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
251   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
252   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
253   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
254 
255   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
256 
257   PetscFunctionReturn(0);
258 #endif
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
264 {
265   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
266   IS             isicol;
267   PetscErrorCode ierr;
268   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
269   PetscInt       *bi,*bj,*ajtmp;
270   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
271   PetscReal      f;
272   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
273   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
274   PetscBT        lnkbt;
275 
276   PetscFunctionBegin;
277   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
279   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
280   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
281 
282   /* get new row pointers */
283   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
284   bi[0] = 0;
285 
286   /* bdiag is location of diagonal in factor */
287   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
288   bdiag[0] = 0;
289 
290   /* linked list for storing column indices of the active row */
291   nlnk = n + 1;
292   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
293 
294   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr);
295   im     = cols + n;
296   bi_ptr = (PetscInt**)(im + n);
297 
298   /* initial FreeSpace size is f*(ai[n]+1) */
299   f = info->fill;
300   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
301   current_space = free_space;
302 
303   for (i=0; i<n; i++) {
304     /* copy previous fill into linked list */
305     nzi = 0;
306     nnz = ai[r[i]+1] - ai[r[i]];
307     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp = aj + ai[r[i]];
309     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
310     ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
311     nzi += nlnk;
312 
313     /* add pivot rows into linked list */
314     row = lnk[n];
315     while (row < i) {
316       nzbd    = bdiag[row] - bi[row] + 1;
317       ajtmp   = bi_ptr[row] + nzbd;
318       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
319       im[row] = nzbd;
320       ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr);
321       nzi     += nlnk;
322       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */
323 
324       row = lnk[row];
325     }
326 
327     bi[i+1] = bi[i] + nzi;
328     im[i]   = nzi;
329 
330     /* mark bdiag */
331     nzbd = 0;
332     nnz  = nzi;
333     k    = lnk[n];
334     while (nnz-- && k < i){
335       nzbd++;
336       k = lnk[k];
337     }
338     bdiag[i] = bi[i] + nzbd;
339 
340     /* if free space is not available, make more free space */
341     if (current_space->local_remaining<nzi) {
342       nnz = (n - i)*nzi; /* estimated and max additional space needed */
343       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
344       reallocs++;
345     }
346 
347     /* copy data into free space, then initialize lnk */
348     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
349     bi_ptr[i] = current_space->array;
350     current_space->array           += nzi;
351     current_space->local_used      += nzi;
352     current_space->local_remaining -= nzi;
353   }
354   if (ai[n] != 0) {
355     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
356     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
357     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
358     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
359     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
360   } else {
361     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
362   }
363 
364   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
365   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
366 
367   /* destroy list of free space and other temporary array(s) */
368   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
369   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
370   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
371   ierr = PetscFree(cols);CHKERRQ(ierr);
372 
373   /* put together the new matrix */
374   ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr);
375   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
376   ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr);
377   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
378   b    = (Mat_SeqAIJ*)(*B)->data;
379   ierr = PetscFree(b->imax);CHKERRQ(ierr);
380   b->singlemalloc = PETSC_FALSE;
381   /* the next line frees the default space generated by the Create() */
382   ierr = PetscFree(b->a);CHKERRQ(ierr);
383   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
384   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
385   b->j          = bj;
386   b->i          = bi;
387   b->diag       = bdiag;
388   b->ilen       = 0;
389   b->imax       = 0;
390   b->row        = isrow;
391   b->col        = iscol;
392   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
393   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
394   b->icol       = isicol;
395   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
396 
397   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
398   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
399   b->maxnz = b->nz = bi[n] ;
400 
401   (*B)->factor                 =  FACTOR_LU;
402   (*B)->info.factor_mallocs    = reallocs;
403   (*B)->info.fill_ratio_given  = f;
404 
405   if (ai[n] != 0) {
406     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
407   } else {
408     (*B)->info.fill_ratio_needed = 0.0;
409   }
410   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
411   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
412   PetscFunctionReturn(0);
413 }
414 
415 /* ----------------------------------------------------------- */
416 #undef __FUNCT__
417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
419 {
420   Mat            C=*B;
421   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
422   IS             isrow = b->row,isicol = b->icol;
423   PetscErrorCode ierr;
424   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
425   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
426   PetscInt       *diag_offset = b->diag,diag,*pj;
427   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
428   PetscReal      rs,d;
429   LUShift_Ctx    sctx;
430   PetscInt       newshift;
431 
432   PetscFunctionBegin;
433   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
434   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
435   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
436   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
437   rtmps = rtmp; ics = ic;
438 
439   if (!a->diag) {
440     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
441   }
442   /* if both shift schemes are chosen by user, only use info->shiftpd */
443   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
444   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
445     PetscInt *aai = a->i,*ddiag = a->diag;
446     sctx.shift_top = 0;
447     for (i=0; i<n; i++) {
448       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
449       d  = (a->a)[ddiag[i]];
450       rs = -PetscAbsScalar(d) - PetscRealPart(d);
451       v  = a->a+aai[i];
452       nz = aai[i+1] - aai[i];
453       for (j=0; j<nz; j++)
454 	rs += PetscAbsScalar(v[j]);
455       if (rs>sctx.shift_top) sctx.shift_top = rs;
456     }
457     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
458     sctx.shift_top    *= 1.1;
459     sctx.nshift_max   = 5;
460     sctx.shift_lo     = 0.;
461     sctx.shift_hi     = 1.;
462   }
463 
464   sctx.shift_amount = 0;
465   sctx.nshift       = 0;
466   do {
467     sctx.lushift = PETSC_FALSE;
468     for (i=0; i<n; i++){
469       nz    = bi[i+1] - bi[i];
470       bjtmp = bj + bi[i];
471       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
472 
473       /* load in initial (unfactored row) */
474       nz    = a->i[r[i]+1] - a->i[r[i]];
475       ajtmp = a->j + a->i[r[i]];
476       v     = a->a + a->i[r[i]];
477       for (j=0; j<nz; j++) {
478         rtmp[ics[ajtmp[j]]] = v[j];
479       }
480       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
481 
482       row = *bjtmp++;
483       while  (row < i) {
484         pc = rtmp + row;
485         if (*pc != 0.0) {
486           pv         = b->a + diag_offset[row];
487           pj         = b->j + diag_offset[row] + 1;
488           multiplier = *pc / *pv++;
489           *pc        = multiplier;
490           nz         = bi[row+1] - diag_offset[row] - 1;
491           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
492           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
493         }
494         row = *bjtmp++;
495       }
496       /* finished row so stick it into b->a */
497       pv   = b->a + bi[i] ;
498       pj   = b->j + bi[i] ;
499       nz   = bi[i+1] - bi[i];
500       diag = diag_offset[i] - bi[i];
501       rs   = 0.0;
502       for (j=0; j<nz; j++) {
503         pv[j] = rtmps[pj[j]];
504         if (j != diag) rs += PetscAbsScalar(pv[j]);
505       }
506 
507       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
508       sctx.rs  = rs;
509       sctx.pv  = pv[diag];
510       ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
511       if (newshift == 1){
512         break;    /* sctx.shift_amount is updated */
513       } else if (newshift == -1){
514         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
515       }
516     }
517 
518     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
519       /*
520        * if no shift in this attempt & shifting & started shifting & can refine,
521        * then try lower shift
522        */
523       sctx.shift_hi        = info->shift_fraction;
524       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
525       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
526       sctx.lushift         = PETSC_TRUE;
527       sctx.nshift++;
528     }
529   } while (sctx.lushift);
530 
531   /* invert diagonal entries for simplier triangular solves */
532   for (i=0; i<n; i++) {
533     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
534   }
535 
536   ierr = PetscFree(rtmp);CHKERRQ(ierr);
537   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
538   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
539   C->factor = FACTOR_LU;
540   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
541   C->assembled = PETSC_TRUE;
542   ierr = PetscLogFlops(C->n);CHKERRQ(ierr);
543   if (sctx.nshift){
544     if (info->shiftnz) {
545       PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
546     } else if (info->shiftpd) {
547       PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
548     }
549   }
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
555 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
556 {
557   PetscFunctionBegin;
558   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
559   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
560   PetscFunctionReturn(0);
561 }
562 
563 
564 /* ----------------------------------------------------------- */
565 #undef __FUNCT__
566 #define __FUNCT__ "MatLUFactor_SeqAIJ"
567 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
568 {
569   PetscErrorCode ierr;
570   Mat            C;
571 
572   PetscFunctionBegin;
573   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
574   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
575   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
576   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 /* ----------------------------------------------------------- */
580 #undef __FUNCT__
581 #define __FUNCT__ "MatSolve_SeqAIJ"
582 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
583 {
584   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
585   IS             iscol = a->col,isrow = a->row;
586   PetscErrorCode ierr;
587   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
588   PetscInt       nz,*rout,*cout;
589   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
590 
591   PetscFunctionBegin;
592   if (!n) PetscFunctionReturn(0);
593 
594   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
595   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
596   tmp  = a->solve_work;
597 
598   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
599   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
600 
601   /* forward solve the lower triangular */
602   tmp[0] = b[*r++];
603   tmps   = tmp;
604   for (i=1; i<n; i++) {
605     v   = aa + ai[i] ;
606     vi  = aj + ai[i] ;
607     nz  = a->diag[i] - ai[i];
608     sum = b[*r++];
609     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
610     tmp[i] = sum;
611   }
612 
613   /* backward solve the upper triangular */
614   for (i=n-1; i>=0; i--){
615     v   = aa + a->diag[i] + 1;
616     vi  = aj + a->diag[i] + 1;
617     nz  = ai[i+1] - a->diag[i] - 1;
618     sum = tmp[i];
619     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
620     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
621   }
622 
623   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
624   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
625   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
626   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
627   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 /* ----------------------------------------------------------- */
632 #undef __FUNCT__
633 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
634 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
635 {
636   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
637   PetscErrorCode ierr;
638   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
639   PetscScalar    *x,*b,*aa = a->a;
640 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
641   PetscInt       adiag_i,i,*vi,nz,ai_i;
642   PetscScalar    *v,sum;
643 #endif
644 
645   PetscFunctionBegin;
646   if (!n) PetscFunctionReturn(0);
647 
648   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
649   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
650 
651 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
652   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
653 #else
654   /* forward solve the lower triangular */
655   x[0] = b[0];
656   for (i=1; i<n; i++) {
657     ai_i = ai[i];
658     v    = aa + ai_i;
659     vi   = aj + ai_i;
660     nz   = adiag[i] - ai_i;
661     sum  = b[i];
662     while (nz--) sum -= *v++ * x[*vi++];
663     x[i] = sum;
664   }
665 
666   /* backward solve the upper triangular */
667   for (i=n-1; i>=0; i--){
668     adiag_i = adiag[i];
669     v       = aa + adiag_i + 1;
670     vi      = aj + adiag_i + 1;
671     nz      = ai[i+1] - adiag_i - 1;
672     sum     = x[i];
673     while (nz--) sum -= *v++ * x[*vi++];
674     x[i]    = sum*aa[adiag_i];
675   }
676 #endif
677   ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr);
678   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
679   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
685 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
686 {
687   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
688   IS             iscol = a->col,isrow = a->row;
689   PetscErrorCode ierr;
690   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
691   PetscInt       nz,*rout,*cout;
692   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
693 
694   PetscFunctionBegin;
695   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
696 
697   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
698   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
699   tmp  = a->solve_work;
700 
701   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
702   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
703 
704   /* forward solve the lower triangular */
705   tmp[0] = b[*r++];
706   for (i=1; i<n; i++) {
707     v   = aa + ai[i] ;
708     vi  = aj + ai[i] ;
709     nz  = a->diag[i] - ai[i];
710     sum = b[*r++];
711     while (nz--) sum -= *v++ * tmp[*vi++ ];
712     tmp[i] = sum;
713   }
714 
715   /* backward solve the upper triangular */
716   for (i=n-1; i>=0; i--){
717     v   = aa + a->diag[i] + 1;
718     vi  = aj + a->diag[i] + 1;
719     nz  = ai[i+1] - a->diag[i] - 1;
720     sum = tmp[i];
721     while (nz--) sum -= *v++ * tmp[*vi++ ];
722     tmp[i] = sum*aa[a->diag[i]];
723     x[*c--] += tmp[i];
724   }
725 
726   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
727   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
728   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
729   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
730   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
731 
732   PetscFunctionReturn(0);
733 }
734 /* -------------------------------------------------------------------*/
735 #undef __FUNCT__
736 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
737 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
738 {
739   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
740   IS             iscol = a->col,isrow = a->row;
741   PetscErrorCode ierr;
742   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
743   PetscInt       nz,*rout,*cout,*diag = a->diag;
744   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
745 
746   PetscFunctionBegin;
747   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
748   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
749   tmp  = a->solve_work;
750 
751   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
752   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
753 
754   /* copy the b into temp work space according to permutation */
755   for (i=0; i<n; i++) tmp[i] = b[c[i]];
756 
757   /* forward solve the U^T */
758   for (i=0; i<n; i++) {
759     v   = aa + diag[i] ;
760     vi  = aj + diag[i] + 1;
761     nz  = ai[i+1] - diag[i] - 1;
762     s1  = tmp[i];
763     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
764     while (nz--) {
765       tmp[*vi++ ] -= (*v++)*s1;
766     }
767     tmp[i] = s1;
768   }
769 
770   /* backward solve the L^T */
771   for (i=n-1; i>=0; i--){
772     v   = aa + diag[i] - 1 ;
773     vi  = aj + diag[i] - 1 ;
774     nz  = diag[i] - ai[i];
775     s1  = tmp[i];
776     while (nz--) {
777       tmp[*vi-- ] -= (*v--)*s1;
778     }
779   }
780 
781   /* copy tmp into x according to permutation */
782   for (i=0; i<n; i++) x[r[i]] = tmp[i];
783 
784   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
785   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
786   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
787   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
788 
789   ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr);
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
795 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
796 {
797   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
798   IS             iscol = a->col,isrow = a->row;
799   PetscErrorCode ierr;
800   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
801   PetscInt       nz,*rout,*cout,*diag = a->diag;
802   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
803 
804   PetscFunctionBegin;
805   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
806 
807   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
808   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
809   tmp = a->solve_work;
810 
811   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
812   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
813 
814   /* copy the b into temp work space according to permutation */
815   for (i=0; i<n; i++) tmp[i] = b[c[i]];
816 
817   /* forward solve the U^T */
818   for (i=0; i<n; i++) {
819     v   = aa + diag[i] ;
820     vi  = aj + diag[i] + 1;
821     nz  = ai[i+1] - diag[i] - 1;
822     tmp[i] *= *v++;
823     while (nz--) {
824       tmp[*vi++ ] -= (*v++)*tmp[i];
825     }
826   }
827 
828   /* backward solve the L^T */
829   for (i=n-1; i>=0; i--){
830     v   = aa + diag[i] - 1 ;
831     vi  = aj + diag[i] - 1 ;
832     nz  = diag[i] - ai[i];
833     while (nz--) {
834       tmp[*vi-- ] -= (*v--)*tmp[i];
835     }
836   }
837 
838   /* copy tmp into x according to permutation */
839   for (i=0; i<n; i++) x[r[i]] += tmp[i];
840 
841   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
842   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
843   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
844   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
845 
846   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 /* ----------------------------------------------------------------*/
850 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
854 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
855 {
856   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
857   IS             isicol;
858   PetscErrorCode ierr;
859   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
860   PetscInt       *bi,*cols,nnz,*cols_lvl;
861   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
862   PetscInt       i,levels,diagonal_fill;
863   PetscTruth     col_identity,row_identity;
864   PetscReal      f;
865   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
866   PetscBT        lnkbt;
867   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
868   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
869   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
870 
871   PetscFunctionBegin;
872   f             = info->fill;
873   levels        = (PetscInt)info->levels;
874   diagonal_fill = (PetscInt)info->diagonal_fill;
875   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
876 
877   /* special case that simply copies fill pattern */
878   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
879   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
880   if (!levels && row_identity && col_identity) {
881     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
882     (*fact)->factor = FACTOR_LU;
883     b               = (Mat_SeqAIJ*)(*fact)->data;
884     if (!b->diag) {
885       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
886     }
887     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
888     b->row              = isrow;
889     b->col              = iscol;
890     b->icol             = isicol;
891     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
892     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
893     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
894     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
895     PetscFunctionReturn(0);
896   }
897 
898   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
899   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
900 
901   /* get new row pointers */
902   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
903   bi[0] = 0;
904   /* bdiag is location of diagonal in factor */
905   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
906   bdiag[0]  = 0;
907 
908   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
909   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
910 
911   /* create a linked list for storing column indices of the active row */
912   nlnk = n + 1;
913   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
914 
915   /* initial FreeSpace size is f*(ai[n]+1) */
916   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
917   current_space = free_space;
918   ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
919   current_space_lvl = free_space_lvl;
920 
921   for (i=0; i<n; i++) {
922     nzi = 0;
923     /* copy current row into linked list */
924     nnz  = ai[r[i]+1] - ai[r[i]];
925     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
926     cols = aj + ai[r[i]];
927     lnk[i] = -1; /* marker to indicate if diagonal exists */
928     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
929     nzi += nlnk;
930 
931     /* make sure diagonal entry is included */
932     if (diagonal_fill && lnk[i] == -1) {
933       fm = n;
934       while (lnk[fm] < i) fm = lnk[fm];
935       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
936       lnk[fm]    = i;
937       lnk_lvl[i] = 0;
938       nzi++; dcount++;
939     }
940 
941     /* add pivot rows into the active row */
942     nzbd = 0;
943     prow = lnk[n];
944     while (prow < i) {
945       nnz      = bdiag[prow];
946       cols     = bj_ptr[prow] + nnz + 1;
947       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
948       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
949       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
950       nzi += nlnk;
951       prow = lnk[prow];
952       nzbd++;
953     }
954     bdiag[i] = nzbd;
955     bi[i+1]  = bi[i] + nzi;
956 
957     /* if free space is not available, make more free space */
958     if (current_space->local_remaining<nzi) {
959       nnz = nzi*(n - i); /* estimated and max additional space needed */
960       ierr = GetMoreSpace(nnz,&current_space);CHKERRQ(ierr);
961       ierr = GetMoreSpace(nnz,&current_space_lvl);CHKERRQ(ierr);
962       reallocs++;
963     }
964 
965     /* copy data into free_space and free_space_lvl, then initialize lnk */
966     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
967     bj_ptr[i]    = current_space->array;
968     bjlvl_ptr[i] = current_space_lvl->array;
969 
970     /* make sure the active row i has diagonal entry */
971     if (*(bj_ptr[i]+bdiag[i]) != i) {
972       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
973     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
974     }
975 
976     current_space->array           += nzi;
977     current_space->local_used      += nzi;
978     current_space->local_remaining -= nzi;
979     current_space_lvl->array           += nzi;
980     current_space_lvl->local_used      += nzi;
981     current_space_lvl->local_remaining -= nzi;
982   }
983 
984   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
985   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
986 
987   /* destroy list of free space and other temporary arrays */
988   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
989   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
990   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
991   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
992   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
993 
994   {
995     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
996     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
997     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
998     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
999     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1000     if (diagonal_fill) {
1001       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1002     }
1003   }
1004 
1005   /* put together the new matrix */
1006   ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr);
1007   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1008   ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr);
1009   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1010   b = (Mat_SeqAIJ*)(*fact)->data;
1011   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1012   b->singlemalloc = PETSC_FALSE;
1013   len = (bi[n] )*sizeof(PetscScalar);
1014   /* the next line frees the default space generated by the Create() */
1015   ierr = PetscFree(b->a);CHKERRQ(ierr);
1016   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1017   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1018   b->j          = bj;
1019   b->i          = bi;
1020   for (i=0; i<n; i++) bdiag[i] += bi[i];
1021   b->diag       = bdiag;
1022   b->ilen       = 0;
1023   b->imax       = 0;
1024   b->row        = isrow;
1025   b->col        = iscol;
1026   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1027   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1028   b->icol       = isicol;
1029   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1030   /* In b structure:  Free imax, ilen, old a, old j.
1031      Allocate bdiag, solve_work, new a, new j */
1032   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1033   b->maxnz             = b->nz = bi[n] ;
1034   (*fact)->factor = FACTOR_LU;
1035   (*fact)->info.factor_mallocs    = reallocs;
1036   (*fact)->info.fill_ratio_given  = f;
1037   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1038 
1039   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1040   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1041 
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #include "src/mat/impls/sbaij/seq/sbaij.h"
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1048 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1049 {
1050   Mat            C = *B;
1051   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1052   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1053   IS             ip=b->row;
1054   PetscErrorCode ierr;
1055   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1056   PetscInt       *ai=a->i,*aj=a->j;
1057   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1058   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1059   PetscReal      zeropivot,rs,shiftnz;
1060   PetscTruth     shiftpd;
1061   ChShift_Ctx    sctx;
1062   PetscInt       newshift;
1063 
1064   PetscFunctionBegin;
1065   shiftnz   = info->shiftnz;
1066   shiftpd   = info->shiftpd;
1067   zeropivot = info->zeropivot;
1068 
1069   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1070 
1071   /* initialization */
1072   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1073   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1074   jl   = il + mbs;
1075   rtmp = (MatScalar*)(jl + mbs);
1076 
1077   sctx.shift_amount = 0;
1078   sctx.nshift       = 0;
1079   do {
1080     sctx.chshift = PETSC_FALSE;
1081     for (i=0; i<mbs; i++) {
1082       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1083     }
1084 
1085     for (k = 0; k<mbs; k++){
1086       bval = ba + bi[k];
1087       /* initialize k-th row by the perm[k]-th row of A */
1088       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1089       for (j = jmin; j < jmax; j++){
1090         col = rip[aj[j]];
1091         if (col >= k){ /* only take upper triangular entry */
1092           rtmp[col] = aa[j];
1093           *bval++  = 0.0; /* for in-place factorization */
1094         }
1095       }
1096       /* shift the diagonal of the matrix */
1097       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1098 
1099       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1100       dk = rtmp[k];
1101       i = jl[k]; /* first row to be added to k_th row  */
1102 
1103       while (i < k){
1104         nexti = jl[i]; /* next row to be added to k_th row */
1105 
1106         /* compute multiplier, update diag(k) and U(i,k) */
1107         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1108         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1109         dk += uikdi*ba[ili];
1110         ba[ili] = uikdi; /* -U(i,k) */
1111 
1112         /* add multiple of row i to k-th row */
1113         jmin = ili + 1; jmax = bi[i+1];
1114         if (jmin < jmax){
1115           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1116           /* update il and jl for row i */
1117           il[i] = jmin;
1118           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1119         }
1120         i = nexti;
1121       }
1122 
1123       /* shift the diagonals when zero pivot is detected */
1124       /* compute rs=sum of abs(off-diagonal) */
1125       rs   = 0.0;
1126       jmin = bi[k]+1;
1127       nz   = bi[k+1] - jmin;
1128       if (nz){
1129         bcol = bj + jmin;
1130         while (nz--){
1131           rs += PetscAbsScalar(rtmp[*bcol]);
1132           bcol++;
1133         }
1134       }
1135 
1136       sctx.rs = rs;
1137       sctx.pv = dk;
1138       ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr);
1139       if (newshift == 1){
1140         break;    /* sctx.shift_amount is updated */
1141       } else if (newshift == -1){
1142         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1143       }
1144 
1145       /* copy data into U(k,:) */
1146       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1147       jmin = bi[k]+1; jmax = bi[k+1];
1148       if (jmin < jmax) {
1149         for (j=jmin; j<jmax; j++){
1150           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1151         }
1152         /* add the k-th row into il and jl */
1153         il[k] = jmin;
1154         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1155       }
1156     }
1157   } while (sctx.chshift);
1158   ierr = PetscFree(il);CHKERRQ(ierr);
1159 
1160   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1161   C->factor       = FACTOR_CHOLESKY;
1162   C->assembled    = PETSC_TRUE;
1163   C->preallocated = PETSC_TRUE;
1164   ierr = PetscLogFlops(C->m);CHKERRQ(ierr);
1165   if (sctx.nshift){
1166     if (shiftnz) {
1167       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1168     } else if (shiftpd) {
1169       PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount);
1170     }
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1177 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1178 {
1179   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1180   Mat_SeqSBAIJ   *b;
1181   Mat            B;
1182   PetscErrorCode ierr;
1183   PetscTruth     perm_identity;
1184   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1185   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1186   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1187   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1188   PetscReal      fill=info->fill,levels=info->levels;
1189   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1190   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1191   PetscBT        lnkbt;
1192 
1193   PetscFunctionBegin;
1194   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1195   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1196 
1197   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1198   ui[0] = 0;
1199 
1200   /* special case that simply copies fill pattern */
1201   if (!levels && perm_identity) {
1202     ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
1203     for (i=0; i<am; i++) {
1204       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1205     }
1206     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1207     cols = uj;
1208     for (i=0; i<am; i++) {
1209       aj    = a->j + a->diag[i];
1210       ncols = ui[i+1] - ui[i];
1211       for (j=0; j<ncols; j++) *cols++ = *aj++;
1212     }
1213   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1214     /* initialization */
1215     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1216 
1217     /* jl: linked list for storing indices of the pivot rows
1218        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1219     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1220     il         = jl + am;
1221     uj_ptr     = (PetscInt**)(il + am);
1222     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1223     for (i=0; i<am; i++){
1224       jl[i] = am; il[i] = 0;
1225     }
1226 
1227     /* create and initialize a linked list for storing column indices of the active row k */
1228     nlnk = am + 1;
1229     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1230 
1231     /* initial FreeSpace size is fill*(ai[am]+1) */
1232     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1233     current_space = free_space;
1234     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1235     current_space_lvl = free_space_lvl;
1236 
1237     for (k=0; k<am; k++){  /* for each active row k */
1238       /* initialize lnk by the column indices of row rip[k] of A */
1239       nzk   = 0;
1240       ncols = ai[rip[k]+1] - ai[rip[k]];
1241       ncols_upper = 0;
1242       for (j=0; j<ncols; j++){
1243         i = *(aj + ai[rip[k]] + j);
1244         if (rip[i] >= k){ /* only take upper triangular entry */
1245           ajtmp[ncols_upper] = i;
1246           ncols_upper++;
1247         }
1248       }
1249       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1250       nzk += nlnk;
1251 
1252       /* update lnk by computing fill-in for each pivot row to be merged in */
1253       prow = jl[k]; /* 1st pivot row */
1254 
1255       while (prow < k){
1256         nextprow = jl[prow];
1257 
1258         /* merge prow into k-th row */
1259         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1260         jmax = ui[prow+1];
1261         ncols = jmax-jmin;
1262         i     = jmin - ui[prow];
1263         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1264         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1265         j     = *(uj - 1);
1266         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1267         nzk += nlnk;
1268 
1269         /* update il and jl for prow */
1270         if (jmin < jmax){
1271           il[prow] = jmin;
1272           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1273         }
1274         prow = nextprow;
1275       }
1276 
1277       /* if free space is not available, make more free space */
1278       if (current_space->local_remaining<nzk) {
1279         i = am - k + 1; /* num of unfactored rows */
1280         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1281         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1282         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1283         reallocs++;
1284       }
1285 
1286       /* copy data into free_space and free_space_lvl, then initialize lnk */
1287       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1288 
1289       /* add the k-th row into il and jl */
1290       if (nzk > 1){
1291         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1292         jl[k] = jl[i]; jl[i] = k;
1293         il[k] = ui[k] + 1;
1294       }
1295       uj_ptr[k]     = current_space->array;
1296       uj_lvl_ptr[k] = current_space_lvl->array;
1297 
1298       current_space->array           += nzk;
1299       current_space->local_used      += nzk;
1300       current_space->local_remaining -= nzk;
1301 
1302       current_space_lvl->array           += nzk;
1303       current_space_lvl->local_used      += nzk;
1304       current_space_lvl->local_remaining -= nzk;
1305 
1306       ui[k+1] = ui[k] + nzk;
1307     }
1308 
1309     if (ai[am] != 0) {
1310       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1311       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1312       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1313       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1314     } else {
1315       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1316     }
1317 
1318     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1319     ierr = PetscFree(jl);CHKERRQ(ierr);
1320     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1321 
1322     /* destroy list of free space and other temporary array(s) */
1323     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1324     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1325     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1326     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1327 
1328   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1329 
1330   /* put together the new matrix in MATSEQSBAIJ format */
1331   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1332   B = *fact;
1333   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1334   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1335 
1336   b    = (Mat_SeqSBAIJ*)B->data;
1337   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1338   b->singlemalloc = PETSC_FALSE;
1339   /* the next line frees the default space generated by the Create() */
1340   ierr = PetscFree(b->a);CHKERRQ(ierr);
1341   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1342   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1343   b->j    = uj;
1344   b->i    = ui;
1345   b->diag = 0;
1346   b->ilen = 0;
1347   b->imax = 0;
1348   b->row  = perm;
1349   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1350   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1351   b->icol = perm;
1352   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1353   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1354   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1355   b->maxnz = b->nz = ui[am];
1356 
1357   B->factor                 = FACTOR_CHOLESKY;
1358   B->info.factor_mallocs    = reallocs;
1359   B->info.fill_ratio_given  = fill;
1360   if (ai[am] != 0) {
1361     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1362   } else {
1363     B->info.fill_ratio_needed = 0.0;
1364   }
1365   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1366   if (perm_identity){
1367     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1368     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1375 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1376 {
1377   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1378   Mat_SeqSBAIJ   *b;
1379   Mat            B;
1380   PetscErrorCode ierr;
1381   PetscTruth     perm_identity;
1382   PetscReal      fill = info->fill;
1383   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1384   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1385   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1386   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1387   PetscBT        lnkbt;
1388   IS             iperm;
1389 
1390   PetscFunctionBegin;
1391   /* check whether perm is the identity mapping */
1392   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1393   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1394 
1395   if (!perm_identity){
1396     /* check if perm is symmetric! */
1397     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1398     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1399     for (i=0; i<am; i++) {
1400       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1401     }
1402     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1403     ierr = ISDestroy(iperm);CHKERRQ(ierr);
1404   }
1405 
1406   /* initialization */
1407   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1408   ui[0] = 0;
1409 
1410   /* jl: linked list for storing indices of the pivot rows
1411      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1412   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1413   il     = jl + am;
1414   cols   = il + am;
1415   ui_ptr = (PetscInt**)(cols + am);
1416   for (i=0; i<am; i++){
1417     jl[i] = am; il[i] = 0;
1418   }
1419 
1420   /* create and initialize a linked list for storing column indices of the active row k */
1421   nlnk = am + 1;
1422   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1423 
1424   /* initial FreeSpace size is fill*(ai[am]+1) */
1425   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1426   current_space = free_space;
1427 
1428   for (k=0; k<am; k++){  /* for each active row k */
1429     /* initialize lnk by the column indices of row rip[k] of A */
1430     nzk   = 0;
1431     ncols = ai[rip[k]+1] - ai[rip[k]];
1432     ncols_upper = 0;
1433     for (j=0; j<ncols; j++){
1434       i = rip[*(aj + ai[rip[k]] + j)];
1435       if (i >= k){ /* only take upper triangular entry */
1436         cols[ncols_upper] = i;
1437         ncols_upper++;
1438       }
1439     }
1440     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1441     nzk += nlnk;
1442 
1443     /* update lnk by computing fill-in for each pivot row to be merged in */
1444     prow = jl[k]; /* 1st pivot row */
1445 
1446     while (prow < k){
1447       nextprow = jl[prow];
1448       /* merge prow into k-th row */
1449       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1450       jmax = ui[prow+1];
1451       ncols = jmax-jmin;
1452       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1453       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1454       nzk += nlnk;
1455 
1456       /* update il and jl for prow */
1457       if (jmin < jmax){
1458         il[prow] = jmin;
1459         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1460       }
1461       prow = nextprow;
1462     }
1463 
1464     /* if free space is not available, make more free space */
1465     if (current_space->local_remaining<nzk) {
1466       i = am - k + 1; /* num of unfactored rows */
1467       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1468       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1469       reallocs++;
1470     }
1471 
1472     /* copy data into free space, then initialize lnk */
1473     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1474 
1475     /* add the k-th row into il and jl */
1476     if (nzk-1 > 0){
1477       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1478       jl[k] = jl[i]; jl[i] = k;
1479       il[k] = ui[k] + 1;
1480     }
1481     ui_ptr[k] = current_space->array;
1482     current_space->array           += nzk;
1483     current_space->local_used      += nzk;
1484     current_space->local_remaining -= nzk;
1485 
1486     ui[k+1] = ui[k] + nzk;
1487   }
1488 
1489   if (ai[am] != 0) {
1490     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1491     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1492     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1493     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1494   } else {
1495      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n");
1496   }
1497 
1498   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1499   ierr = PetscFree(jl);CHKERRQ(ierr);
1500 
1501   /* destroy list of free space and other temporary array(s) */
1502   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1503   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1504   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1505 
1506   /* put together the new matrix in MATSEQSBAIJ format */
1507   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1508   B    = *fact;
1509   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1510   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
1511 
1512   b = (Mat_SeqSBAIJ*)B->data;
1513   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1514   b->singlemalloc = PETSC_FALSE;
1515   /* the next line frees the default space generated by the Create() */
1516   ierr = PetscFree(b->a);CHKERRQ(ierr);
1517   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1518   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1519   b->j    = uj;
1520   b->i    = ui;
1521   b->diag = 0;
1522   b->ilen = 0;
1523   b->imax = 0;
1524   b->row  = perm;
1525   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1526   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1527   b->icol = perm;
1528   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1529   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1530   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1531   b->maxnz = b->nz = ui[am];
1532 
1533   B->factor                 = FACTOR_CHOLESKY;
1534   B->info.factor_mallocs    = reallocs;
1535   B->info.fill_ratio_given  = fill;
1536   if (ai[am] != 0) {
1537     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1538   } else {
1539     B->info.fill_ratio_needed = 0.0;
1540   }
1541   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1542   if (perm_identity){
1543     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1544     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548