xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e738e422d369b78394e381558b62ef7dbfd55e60)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   const PetscInt *c,*r,*ic;
64   PetscInt       i,n = A->rmap->n,*cc,*cr;
65   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   PetscInt       *ordcol,*iwk,*iperm,*jw;
67   PetscInt       jmax,lfill,job,*o_i,*o_j;
68   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69   PetscReal      af;
70 
71   PetscFunctionBegin;
72 
73   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
74   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
75   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
76   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
77   lfill   = (PetscInt)(info->dtcount/2.0);
78   jmax    = (PetscInt)(info->fill*a->nz);
79 
80 
81   /* ------------------------------------------------------------
82      If reorder=.TRUE., then the original matrix has to be
83      reordered to reflect the user selected ordering scheme, and
84      then de-reordered so it is in it's original format.
85      Because Saad's dperm() is NOT in place, we have to copy
86      the original matrix and allocate more storage. . .
87      ------------------------------------------------------------
88   */
89 
90   /* set reorder to true if either isrow or iscol is not identity */
91   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
92   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
93   reorder = PetscNot(reorder);
94 
95 
96   /* storage for ilu factor */
97   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
99   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
100   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
101 
102   /* ------------------------------------------------------------
103      Make sure that everything is Fortran formatted (1-Based)
104      ------------------------------------------------------------
105   */
106   for (i=old_i[0];i<old_i[n];i++) {
107     old_j[i]++;
108   }
109   for(i=0;i<n+1;i++) {
110     old_i[i]++;
111   };
112 
113 
114   if (reorder) {
115     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
116     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
117     ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr);
118     for(i=0;i<n;i++) {
119       cr[i]  = r[i]+1;
120       cc[i]  = c[i]+1;
121     }
122     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
123     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
124     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job);
128     ierr = PetscFree2(cc,cr);CHKERRQ(ierr);
129     o_a = old_a2;
130     o_j = old_j2;
131     o_i = old_i2;
132   } else {
133     o_a = old_a;
134     o_j = old_j;
135     o_i = old_i;
136   }
137 
138   /* ------------------------------------------------------------
139      Call Saad's ilutp() routine to generate the factorization
140      ------------------------------------------------------------
141   */
142 
143   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
145   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
146 
147   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
148   if (sierr) {
149     switch (sierr) {
150       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
151       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
153       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
154       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
155       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
156     }
157   }
158 
159   ierr = PetscFree(w);CHKERRQ(ierr);
160   ierr = PetscFree(jw);CHKERRQ(ierr);
161 
162   /* ------------------------------------------------------------
163      Saad's routine gives the result in Modified Sparse Row (msr)
164      Convert to Compressed Sparse Row format (csr)
165      ------------------------------------------------------------
166   */
167 
168   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
170 
171   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172 
173   ierr = PetscFree(iwk);CHKERRQ(ierr);
174   ierr = PetscFree(wk);CHKERRQ(ierr);
175 
176   if (reorder) {
177     ierr = PetscFree(old_a2);CHKERRQ(ierr);
178     ierr = PetscFree(old_j2);CHKERRQ(ierr);
179     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180   } else {
181     /* fix permutation of old_j that the factorization introduced */
182     for (i=old_i[0]; i<old_i[n]; i++) {
183       old_j[i-1] = iperm[old_j[i-1]-1];
184     }
185   }
186 
187   /* get rid of the shift to indices starting at 1 */
188   for (i=0; i<n+1; i++) {
189     old_i[i]--;
190   }
191   for (i=old_i[0];i<old_i[n];i++) {
192     old_j[i]--;
193   }
194 
195   /* Make the factored matrix 0-based */
196   for (i=0; i<n+1; i++) {
197     new_i[i]--;
198   }
199   for (i=new_i[0];i<new_i[n];i++) {
200     new_j[i]--;
201   }
202 
203   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
204   /*-- permute the right-hand-side and solution vectors           --*/
205   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
206   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
208   for(i=0; i<n; i++) {
209     ordcol[i] = ic[iperm[i]-1];
210   };
211   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
212   ierr = ISDestroy(isicol);CHKERRQ(ierr);
213 
214   ierr = PetscFree(iperm);CHKERRQ(ierr);
215 
216   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
217   ierr = PetscFree(ordcol);CHKERRQ(ierr);
218 
219   /*----- put together the new matrix -----*/
220 
221   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
222   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
223   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
224   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
225   (*fact)->factor    = MAT_FACTOR_LU;
226   (*fact)->assembled = PETSC_TRUE;
227 
228   b = (Mat_SeqAIJ*)(*fact)->data;
229   b->free_a        = PETSC_TRUE;
230   b->free_ij       = PETSC_TRUE;
231   b->singlemalloc  = PETSC_FALSE;
232   b->a             = new_a;
233   b->j             = new_j;
234   b->i             = new_i;
235   b->ilen          = 0;
236   b->imax          = 0;
237   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238   b->row           = isirow;
239   b->col           = iscolf;
240   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
241   b->maxnz = b->nz = new_i[n];
242   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
243   (*fact)->info.factor_mallocs = 0;
244 
245   af = ((double)b->nz)/((double)a->nz) + .001;
246   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
247   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
249   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
250 
251   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
252 
253   PetscFunctionReturn(0);
254 #endif
255 }
256 
257 EXTERN_C_BEGIN
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
261 {
262   PetscFunctionBegin;
263   *flg = PETSC_TRUE;
264   PetscFunctionReturn(0);
265 }
266 EXTERN_C_END
267 
268 EXTERN_C_BEGIN
269 #undef __FUNCT__
270 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
272 {
273   PetscInt           n = A->rmap->n;
274   PetscErrorCode     ierr;
275 
276   PetscFunctionBegin;
277   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
278   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
279   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
280     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
281     (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
282     (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
283   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
284     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
285     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
286     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
287     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
288   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
289   (*B)->factor = ftype;
290   PetscFunctionReturn(0);
291 }
292 EXTERN_C_END
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
296 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,MatFactorInfo *info)
297 {
298   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
299   IS                 isicol;
300   PetscErrorCode     ierr;
301   const PetscInt     *r,*ic;
302   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
303   PetscInt           *bi,*bj,*ajtmp;
304   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
305   PetscReal          f;
306   PetscInt           nlnk,*lnk,k,**bi_ptr;
307   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
308   PetscBT            lnkbt;
309 
310   PetscFunctionBegin;
311   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
312   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
313   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
314   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
315 
316   /* get new row pointers */
317   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
318   bi[0] = 0;
319 
320   /* bdiag is location of diagonal in factor */
321   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
322   bdiag[0] = 0;
323 
324   /* linked list for storing column indices of the active row */
325   nlnk = n + 1;
326   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
327 
328   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
329 
330   /* initial FreeSpace size is f*(ai[n]+1) */
331   f = info->fill;
332   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
333   current_space = free_space;
334 
335   for (i=0; i<n; i++) {
336     /* copy previous fill into linked list */
337     nzi = 0;
338     nnz = ai[r[i]+1] - ai[r[i]];
339     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
340     ajtmp = aj + ai[r[i]];
341     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
342     nzi += nlnk;
343 
344     /* add pivot rows into linked list */
345     row = lnk[n];
346     while (row < i) {
347       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
348       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
349       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
350       nzi += nlnk;
351       row  = lnk[row];
352     }
353     bi[i+1] = bi[i] + nzi;
354     im[i]   = nzi;
355 
356     /* mark bdiag */
357     nzbd = 0;
358     nnz  = nzi;
359     k    = lnk[n];
360     while (nnz-- && k < i){
361       nzbd++;
362       k = lnk[k];
363     }
364     bdiag[i] = bi[i] + nzbd;
365 
366     /* if free space is not available, make more free space */
367     if (current_space->local_remaining<nzi) {
368       nnz = (n - i)*nzi; /* estimated and max additional space needed */
369       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
370       reallocs++;
371     }
372 
373     /* copy data into free space, then initialize lnk */
374     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
375     bi_ptr[i] = current_space->array;
376     current_space->array           += nzi;
377     current_space->local_used      += nzi;
378     current_space->local_remaining -= nzi;
379   }
380 #if defined(PETSC_USE_INFO)
381   if (ai[n] != 0) {
382     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
383     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
384     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
385     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
386     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
387   } else {
388     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
389   }
390 #endif
391 
392   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
393   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
394 
395   /* destroy list of free space and other temporary array(s) */
396   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
397   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
398   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
399   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
400 
401   /* put together the new matrix */
402   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
403   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
404   b    = (Mat_SeqAIJ*)(B)->data;
405   b->free_a       = PETSC_TRUE;
406   b->free_ij      = PETSC_TRUE;
407   b->singlemalloc = PETSC_FALSE;
408   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
409   b->j          = bj;
410   b->i          = bi;
411   b->diag       = bdiag;
412   b->ilen       = 0;
413   b->imax       = 0;
414   b->row        = isrow;
415   b->col        = iscol;
416   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
417   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
418   b->icol       = isicol;
419   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
420 
421   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
422   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
423   b->maxnz = b->nz = bi[n] ;
424 
425   (B)->factor                 =  MAT_FACTOR_LU;
426   (B)->info.factor_mallocs    = reallocs;
427   (B)->info.fill_ratio_given  = f;
428 
429   if (ai[n] != 0) {
430     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431   } else {
432     (B)->info.fill_ratio_needed = 0.0;
433   }
434   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
435   (B)->ops->solve            = MatSolve_SeqAIJ;
436   (B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
437   /* switch to inodes if appropriate */
438   ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     Trouble in factorization, should we dump the original matrix?
444 */
445 #undef __FUNCT__
446 #define __FUNCT__ "MatFactorDumpMatrix"
447 PetscErrorCode MatFactorDumpMatrix(Mat A)
448 {
449   PetscErrorCode ierr;
450   PetscTruth     flg;
451 
452   PetscFunctionBegin;
453   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
454   if (flg) {
455     PetscViewer viewer;
456     char        filename[PETSC_MAX_PATH_LEN];
457 
458     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
459     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
460     ierr = MatView(A,viewer);CHKERRQ(ierr);
461     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
462   }
463   PetscFunctionReturn(0);
464 }
465 
466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
467 
468 /* ----------------------------------------------------------- */
469 #undef __FUNCT__
470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,MatFactorInfo *info)
472 {
473   Mat            C=B;
474   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
475   IS             isrow = b->row,isicol = b->icol;
476   PetscErrorCode ierr;
477   const PetscInt  *r,*ic,*ics;
478   PetscInt       i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
479   PetscInt       *ajtmp,*bjtmp,nz,row;
480   PetscInt       *diag_offset = b->diag,diag,*pj;
481   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
482   MatScalar      *v,*pv;
483   PetscScalar    d;
484   PetscReal      rs;
485   LUShift_Ctx    sctx;
486   PetscInt       newshift,*ddiag;
487 
488   PetscFunctionBegin;
489   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
490   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
491   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
492   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
493   rtmps = rtmp; ics = ic;
494 
495   sctx.shift_top  = 0;
496   sctx.nshift_max = 0;
497   sctx.shift_lo   = 0;
498   sctx.shift_hi   = 0;
499 
500   /* if both shift schemes are chosen by user, only use info->shiftpd */
501   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
502     PetscInt *aai = a->i;
503     ddiag          = a->diag;
504     sctx.shift_top = 0;
505     for (i=0; i<n; i++) {
506       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
507       d  = (a->a)[ddiag[i]];
508       rs = -PetscAbsScalar(d) - PetscRealPart(d);
509       v  = a->a+aai[i];
510       nz = aai[i+1] - aai[i];
511       for (j=0; j<nz; j++)
512 	rs += PetscAbsScalar(v[j]);
513       if (rs>sctx.shift_top) sctx.shift_top = rs;
514     }
515     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
516     sctx.shift_top    *= 1.1;
517     sctx.nshift_max   = 5;
518     sctx.shift_lo     = 0.;
519     sctx.shift_hi     = 1.;
520   }
521 
522   sctx.shift_amount = 0;
523   sctx.nshift       = 0;
524   do {
525     sctx.lushift = PETSC_FALSE;
526     for (i=0; i<n; i++){
527       nz    = bi[i+1] - bi[i];
528       bjtmp = bj + bi[i];
529       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
530 
531       /* load in initial (unfactored row) */
532       nz    = a->i[r[i]+1] - a->i[r[i]];
533       ajtmp = a->j + a->i[r[i]];
534       v     = a->a + a->i[r[i]];
535       for (j=0; j<nz; j++) {
536         rtmp[ics[ajtmp[j]]] = v[j];
537       }
538       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
539 
540       row = *bjtmp++;
541       while  (row < i) {
542         pc = rtmp + row;
543         if (*pc != 0.0) {
544           pv         = b->a + diag_offset[row];
545           pj         = b->j + diag_offset[row] + 1;
546           multiplier = *pc / *pv++;
547           *pc        = multiplier;
548           nz         = bi[row+1] - diag_offset[row] - 1;
549           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
550           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
551         }
552         row = *bjtmp++;
553       }
554       /* finished row so stick it into b->a */
555       pv   = b->a + bi[i] ;
556       pj   = b->j + bi[i] ;
557       nz   = bi[i+1] - bi[i];
558       diag = diag_offset[i] - bi[i];
559       rs   = 0.0;
560       for (j=0; j<nz; j++) {
561         pv[j] = rtmps[pj[j]];
562         if (j != diag) rs += PetscAbsScalar(pv[j]);
563       }
564 
565       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
566       sctx.rs  = rs;
567       sctx.pv  = pv[diag];
568       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
569       if (newshift == 1) break;
570     }
571 
572     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573       /*
574        * if no shift in this attempt & shifting & started shifting & can refine,
575        * then try lower shift
576        */
577       sctx.shift_hi        = info->shift_fraction;
578       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
580       sctx.lushift         = PETSC_TRUE;
581       sctx.nshift++;
582     }
583   } while (sctx.lushift);
584 
585   /* invert diagonal entries for simplier triangular solves */
586   for (i=0; i<n; i++) {
587     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
588   }
589   ierr = PetscFree(rtmp);CHKERRQ(ierr);
590   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
592   if (b->inode.use) {
593     C->ops->solve   = MatSolve_Inode;
594   } else {
595     C->ops->solve   = MatSolve_SeqAIJ;
596   }
597   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
598   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
599   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
600   C->ops->matsolve           = MatMatSolve_SeqAIJ;
601   C->assembled    = PETSC_TRUE;
602   C->preallocated = PETSC_TRUE;
603   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
604   if (sctx.nshift){
605      if (info->shiftpd) {
606       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
607     } else if (info->shiftnz) {
608       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
609     }
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 /*
615    This routine implements inplace ILU(0) with row or/and column permutations.
616    Input:
617      A - original matrix
618    Output;
619      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
620          a->j (col index) is permuted by the inverse of colperm, then sorted
621          a->a reordered accordingly with a->j
622          a->diag (ptr to diagonal elements) is updated.
623 */
624 #undef __FUNCT__
625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,MatFactorInfo *info)
627 {
628   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
629   IS             isrow = a->row,isicol = a->icol;
630   PetscErrorCode ierr;
631   const PetscInt *r,*ic,*ics;
632   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
633   PetscInt       *ajtmp,nz,row;
634   PetscInt       *diag = a->diag,nbdiag,*pj;
635   PetscScalar    *rtmp,*pc,multiplier,d;
636   MatScalar      *v,*pv;
637   PetscReal      rs;
638   LUShift_Ctx    sctx;
639   PetscInt       newshift;
640 
641   PetscFunctionBegin;
642   if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
643   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
644   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
645   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
646   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
647   ics = ic;
648 
649   sctx.shift_top  = 0;
650   sctx.nshift_max = 0;
651   sctx.shift_lo   = 0;
652   sctx.shift_hi   = 0;
653 
654   /* if both shift schemes are chosen by user, only use info->shiftpd */
655   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
656     sctx.shift_top = 0;
657     for (i=0; i<n; i++) {
658       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659       d  = (a->a)[diag[i]];
660       rs = -PetscAbsScalar(d) - PetscRealPart(d);
661       v  = a->a+ai[i];
662       nz = ai[i+1] - ai[i];
663       for (j=0; j<nz; j++)
664 	rs += PetscAbsScalar(v[j]);
665       if (rs>sctx.shift_top) sctx.shift_top = rs;
666     }
667     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
668     sctx.shift_top    *= 1.1;
669     sctx.nshift_max   = 5;
670     sctx.shift_lo     = 0.;
671     sctx.shift_hi     = 1.;
672   }
673 
674   sctx.shift_amount = 0;
675   sctx.nshift       = 0;
676   do {
677     sctx.lushift = PETSC_FALSE;
678     for (i=0; i<n; i++){
679       /* load in initial unfactored row */
680       nz    = ai[r[i]+1] - ai[r[i]];
681       ajtmp = aj + ai[r[i]];
682       v     = a->a + ai[r[i]];
683       /* sort permuted ajtmp and values v accordingly */
684       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
685       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
686 
687       diag[r[i]] = ai[r[i]];
688       for (j=0; j<nz; j++) {
689         rtmp[ajtmp[j]] = v[j];
690         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
691       }
692       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
693 
694       row = *ajtmp++;
695       while  (row < i) {
696         pc = rtmp + row;
697         if (*pc != 0.0) {
698           pv         = a->a + diag[r[row]];
699           pj         = aj + diag[r[row]] + 1;
700 
701           multiplier = *pc / *pv++;
702           *pc        = multiplier;
703           nz         = ai[r[row]+1] - diag[r[row]] - 1;
704           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
705           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
706         }
707         row = *ajtmp++;
708       }
709       /* finished row so overwrite it onto a->a */
710       pv   = a->a + ai[r[i]] ;
711       pj   = aj + ai[r[i]] ;
712       nz   = ai[r[i]+1] - ai[r[i]];
713       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
714 
715       rs   = 0.0;
716       for (j=0; j<nz; j++) {
717         pv[j] = rtmp[pj[j]];
718         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
719       }
720 
721       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
722       sctx.rs  = rs;
723       sctx.pv  = pv[nbdiag];
724       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
725       if (newshift == 1) break;
726     }
727 
728     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
729       /*
730        * if no shift in this attempt & shifting & started shifting & can refine,
731        * then try lower shift
732        */
733       sctx.shift_hi        = info->shift_fraction;
734       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
735       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
736       sctx.lushift         = PETSC_TRUE;
737       sctx.nshift++;
738     }
739   } while (sctx.lushift);
740 
741   /* invert diagonal entries for simplier triangular solves */
742   for (i=0; i<n; i++) {
743     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
744   }
745 
746   ierr = PetscFree(rtmp);CHKERRQ(ierr);
747   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
748   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
749   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
750   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
751   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
752   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
753   A->assembled = PETSC_TRUE;
754   A->preallocated = PETSC_TRUE;
755   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
756   if (sctx.nshift){
757     if (info->shiftpd) {
758       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
759     } else if (info->shiftnz) {
760       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
761     }
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 /* ----------------------------------------------------------- */
767 #undef __FUNCT__
768 #define __FUNCT__ "MatLUFactor_SeqAIJ"
769 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
770 {
771   PetscErrorCode ierr;
772   Mat            C;
773 
774   PetscFunctionBegin;
775   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
776   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
777   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
778   A->ops->solve            = C->ops->solve;
779   A->ops->solvetranspose   = C->ops->solvetranspose;
780   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
781   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 /* ----------------------------------------------------------- */
785 #undef __FUNCT__
786 #define __FUNCT__ "MatSolve_SeqAIJ"
787 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
788 {
789   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
790   IS                iscol = a->col,isrow = a->row;
791   PetscErrorCode    ierr;
792   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
793   PetscInt          nz;
794   const PetscInt    *rout,*cout,*r,*c;
795   PetscScalar       *x,*tmp,*tmps,sum;
796   const PetscScalar *b;
797   const MatScalar   *aa = a->a,*v;
798 
799   PetscFunctionBegin;
800   if (!n) PetscFunctionReturn(0);
801 
802   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
803   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
804   tmp  = a->solve_work;
805 
806   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
807   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
808 
809   /* forward solve the lower triangular */
810   tmp[0] = b[*r++];
811   tmps   = tmp;
812   for (i=1; i<n; i++) {
813     v   = aa + ai[i] ;
814     vi  = aj + ai[i] ;
815     nz  = a->diag[i] - ai[i];
816     sum = b[*r++];
817     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
818     tmp[i] = sum;
819   }
820 
821   /* backward solve the upper triangular */
822   for (i=n-1; i>=0; i--){
823     v   = aa + a->diag[i] + 1;
824     vi  = aj + a->diag[i] + 1;
825     nz  = ai[i+1] - a->diag[i] - 1;
826     sum = tmp[i];
827     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
828     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
829   }
830 
831   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
832   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
833   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
834   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
835   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
836   PetscFunctionReturn(0);
837 }
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatMatSolve_SeqAIJ"
841 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
842 {
843   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
844   IS              iscol = a->col,isrow = a->row;
845   PetscErrorCode  ierr;
846   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
847   PetscInt        nz,neq;
848   const PetscInt  *rout,*cout,*r,*c;
849   PetscScalar     *x,*b,*tmp,*tmps,sum;
850   const MatScalar *aa = a->a,*v;
851   PetscTruth      bisdense,xisdense;
852 
853   PetscFunctionBegin;
854   if (!n) PetscFunctionReturn(0);
855 
856   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
857   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
858   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
859   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
860 
861   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
862   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
863 
864   tmp  = a->solve_work;
865   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
866   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
867 
868   for (neq=0; neq<B->cmap->n; neq++){
869     /* forward solve the lower triangular */
870     tmp[0] = b[r[0]];
871     tmps   = tmp;
872     for (i=1; i<n; i++) {
873       v   = aa + ai[i] ;
874       vi  = aj + ai[i] ;
875       nz  = a->diag[i] - ai[i];
876       sum = b[r[i]];
877       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
878       tmp[i] = sum;
879     }
880     /* backward solve the upper triangular */
881     for (i=n-1; i>=0; i--){
882       v   = aa + a->diag[i] + 1;
883       vi  = aj + a->diag[i] + 1;
884       nz  = ai[i+1] - a->diag[i] - 1;
885       sum = tmp[i];
886       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
887       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
888     }
889 
890     b += n;
891     x += n;
892   }
893   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
894   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
895   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
896   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
897   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
903 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
904 {
905   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
906   IS              iscol = a->col,isrow = a->row;
907   PetscErrorCode  ierr;
908   const PetscInt  *r,*c,*rout,*cout;
909   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
910   PetscInt        nz,row;
911   PetscScalar     *x,*b,*tmp,*tmps,sum;
912   const MatScalar *aa = a->a,*v;
913 
914   PetscFunctionBegin;
915   if (!n) PetscFunctionReturn(0);
916 
917   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   tmp  = a->solve_work;
920 
921   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
922   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
923 
924   /* forward solve the lower triangular */
925   tmp[0] = b[*r++];
926   tmps   = tmp;
927   for (row=1; row<n; row++) {
928     i   = rout[row]; /* permuted row */
929     v   = aa + ai[i] ;
930     vi  = aj + ai[i] ;
931     nz  = a->diag[i] - ai[i];
932     sum = b[*r++];
933     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
934     tmp[row] = sum;
935   }
936 
937   /* backward solve the upper triangular */
938   for (row=n-1; row>=0; row--){
939     i   = rout[row]; /* permuted row */
940     v   = aa + a->diag[i] + 1;
941     vi  = aj + a->diag[i] + 1;
942     nz  = ai[i+1] - a->diag[i] - 1;
943     sum = tmp[row];
944     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
945     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
946   }
947 
948   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
949   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
950   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
951   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
952   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 /* ----------------------------------------------------------- */
957 #undef __FUNCT__
958 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
959 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
960 {
961   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
962   PetscErrorCode    ierr;
963   PetscInt          n = A->rmap->n;
964   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
965   PetscScalar       *x;
966   const PetscScalar *b;
967   const MatScalar   *aa = a->a;
968 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
969   PetscInt          adiag_i,i,nz,ai_i;
970   const MatScalar   *v;
971   PetscScalar       sum;
972 #endif
973 
974   PetscFunctionBegin;
975   if (!n) PetscFunctionReturn(0);
976 
977   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
978   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
979 
980 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
981   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
982 #else
983   /* forward solve the lower triangular */
984   x[0] = b[0];
985   for (i=1; i<n; i++) {
986     ai_i = ai[i];
987     v    = aa + ai_i;
988     vi   = aj + ai_i;
989     nz   = adiag[i] - ai_i;
990     sum  = b[i];
991     while (nz--) sum -= *v++ * x[*vi++];
992     x[i] = sum;
993   }
994 
995   /* backward solve the upper triangular */
996   for (i=n-1; i>=0; i--){
997     adiag_i = adiag[i];
998     v       = aa + adiag_i + 1;
999     vi      = aj + adiag_i + 1;
1000     nz      = ai[i+1] - adiag_i - 1;
1001     sum     = x[i];
1002     while (nz--) sum -= *v++ * x[*vi++];
1003     x[i]    = sum*aa[adiag_i];
1004   }
1005 #endif
1006   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1007   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1008   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1014 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1015 {
1016   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1017   IS              iscol = a->col,isrow = a->row;
1018   PetscErrorCode  ierr;
1019   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1020   PetscInt        nz;
1021   const PetscInt  *rout,*cout,*r,*c;
1022   PetscScalar     *x,*b,*tmp,sum;
1023   const MatScalar *aa = a->a,*v;
1024 
1025   PetscFunctionBegin;
1026   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1027 
1028   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1029   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1030   tmp  = a->solve_work;
1031 
1032   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1033   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1034 
1035   /* forward solve the lower triangular */
1036   tmp[0] = b[*r++];
1037   for (i=1; i<n; i++) {
1038     v   = aa + ai[i] ;
1039     vi  = aj + ai[i] ;
1040     nz  = a->diag[i] - ai[i];
1041     sum = b[*r++];
1042     while (nz--) sum -= *v++ * tmp[*vi++ ];
1043     tmp[i] = sum;
1044   }
1045 
1046   /* backward solve the upper triangular */
1047   for (i=n-1; i>=0; i--){
1048     v   = aa + a->diag[i] + 1;
1049     vi  = aj + a->diag[i] + 1;
1050     nz  = ai[i+1] - a->diag[i] - 1;
1051     sum = tmp[i];
1052     while (nz--) sum -= *v++ * tmp[*vi++ ];
1053     tmp[i] = sum*aa[a->diag[i]];
1054     x[*c--] += tmp[i];
1055   }
1056 
1057   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1058   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1059   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1060   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1061   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1062 
1063   PetscFunctionReturn(0);
1064 }
1065 /* -------------------------------------------------------------------*/
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1068 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1069 {
1070   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1071   IS              iscol = a->col,isrow = a->row;
1072   PetscErrorCode  ierr;
1073   const PetscInt  *rout,*cout,*r,*c;
1074   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1075   PetscInt        nz,*diag = a->diag;
1076   PetscScalar     *x,*b,*tmp,s1;
1077   const MatScalar *aa = a->a,*v;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1081   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1082   tmp  = a->solve_work;
1083 
1084   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1085   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1086 
1087   /* copy the b into temp work space according to permutation */
1088   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1089 
1090   /* forward solve the U^T */
1091   for (i=0; i<n; i++) {
1092     v   = aa + diag[i] ;
1093     vi  = aj + diag[i] + 1;
1094     nz  = ai[i+1] - diag[i] - 1;
1095     s1  = tmp[i];
1096     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1097     while (nz--) {
1098       tmp[*vi++ ] -= (*v++)*s1;
1099     }
1100     tmp[i] = s1;
1101   }
1102 
1103   /* backward solve the L^T */
1104   for (i=n-1; i>=0; i--){
1105     v   = aa + diag[i] - 1 ;
1106     vi  = aj + diag[i] - 1 ;
1107     nz  = diag[i] - ai[i];
1108     s1  = tmp[i];
1109     while (nz--) {
1110       tmp[*vi-- ] -= (*v--)*s1;
1111     }
1112   }
1113 
1114   /* copy tmp into x according to permutation */
1115   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1116 
1117   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1118   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1119   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1120   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1121 
1122   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1128 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1129 {
1130   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1131   IS              iscol = a->col,isrow = a->row;
1132   PetscErrorCode  ierr;
1133   const PetscInt  *r,*c,*rout,*cout;
1134   PetscInt        i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1135   PetscInt        nz,*diag = a->diag;
1136   PetscScalar     *x,*b,*tmp;
1137   const MatScalar *aa = a->a,*v;
1138 
1139   PetscFunctionBegin;
1140   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1141 
1142   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1143   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1144   tmp = a->solve_work;
1145 
1146   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1147   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1148 
1149   /* copy the b into temp work space according to permutation */
1150   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1151 
1152   /* forward solve the U^T */
1153   for (i=0; i<n; i++) {
1154     v   = aa + diag[i] ;
1155     vi  = aj + diag[i] + 1;
1156     nz  = ai[i+1] - diag[i] - 1;
1157     tmp[i] *= *v++;
1158     while (nz--) {
1159       tmp[*vi++ ] -= (*v++)*tmp[i];
1160     }
1161   }
1162 
1163   /* backward solve the L^T */
1164   for (i=n-1; i>=0; i--){
1165     v   = aa + diag[i] - 1 ;
1166     vi  = aj + diag[i] - 1 ;
1167     nz  = diag[i] - ai[i];
1168     while (nz--) {
1169       tmp[*vi-- ] -= (*v--)*tmp[i];
1170     }
1171   }
1172 
1173   /* copy tmp into x according to permutation */
1174   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1175 
1176   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1177   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1178   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1180 
1181   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 /* ----------------------------------------------------------------*/
1185 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1186 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption);
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1190 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,MatFactorInfo *info)
1191 {
1192   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1193   IS                 isicol;
1194   PetscErrorCode     ierr;
1195   const PetscInt     *r,*ic;
1196   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1197   PetscInt           *bi,*cols,nnz,*cols_lvl;
1198   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1199   PetscInt           i,levels,diagonal_fill;
1200   PetscTruth         col_identity,row_identity;
1201   PetscReal          f;
1202   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1203   PetscBT            lnkbt;
1204   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1205   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1206   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1207   PetscTruth         missing;
1208 
1209   PetscFunctionBegin;
1210   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1211   f             = info->fill;
1212   levels        = (PetscInt)info->levels;
1213   diagonal_fill = (PetscInt)info->diagonal_fill;
1214   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1215 
1216   /* special case that simply copies fill pattern */
1217   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1218   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1219   if (!levels && row_identity && col_identity) {
1220     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr);
1221     fact->factor = MAT_FACTOR_ILU;
1222     (fact)->info.factor_mallocs    = 0;
1223     (fact)->info.fill_ratio_given  = info->fill;
1224     (fact)->info.fill_ratio_needed = 1.0;
1225     b               = (Mat_SeqAIJ*)(fact)->data;
1226     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1227     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1228     b->row              = isrow;
1229     b->col              = iscol;
1230     b->icol             = isicol;
1231     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1232     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1233     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1234     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1235     ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1236     PetscFunctionReturn(0);
1237   }
1238 
1239   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1240   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1241 
1242   /* get new row pointers */
1243   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1244   bi[0] = 0;
1245   /* bdiag is location of diagonal in factor */
1246   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1247   bdiag[0]  = 0;
1248 
1249   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1250   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1251 
1252   /* create a linked list for storing column indices of the active row */
1253   nlnk = n + 1;
1254   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1255 
1256   /* initial FreeSpace size is f*(ai[n]+1) */
1257   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1258   current_space = free_space;
1259   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1260   current_space_lvl = free_space_lvl;
1261 
1262   for (i=0; i<n; i++) {
1263     nzi = 0;
1264     /* copy current row into linked list */
1265     nnz  = ai[r[i]+1] - ai[r[i]];
1266     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1267     cols = aj + ai[r[i]];
1268     lnk[i] = -1; /* marker to indicate if diagonal exists */
1269     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1270     nzi += nlnk;
1271 
1272     /* make sure diagonal entry is included */
1273     if (diagonal_fill && lnk[i] == -1) {
1274       fm = n;
1275       while (lnk[fm] < i) fm = lnk[fm];
1276       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1277       lnk[fm]    = i;
1278       lnk_lvl[i] = 0;
1279       nzi++; dcount++;
1280     }
1281 
1282     /* add pivot rows into the active row */
1283     nzbd = 0;
1284     prow = lnk[n];
1285     while (prow < i) {
1286       nnz      = bdiag[prow];
1287       cols     = bj_ptr[prow] + nnz + 1;
1288       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1289       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1290       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1291       nzi += nlnk;
1292       prow = lnk[prow];
1293       nzbd++;
1294     }
1295     bdiag[i] = nzbd;
1296     bi[i+1]  = bi[i] + nzi;
1297 
1298     /* if free space is not available, make more free space */
1299     if (current_space->local_remaining<nzi) {
1300       nnz = nzi*(n - i); /* estimated and max additional space needed */
1301       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1302       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1303       reallocs++;
1304     }
1305 
1306     /* copy data into free_space and free_space_lvl, then initialize lnk */
1307     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1308     bj_ptr[i]    = current_space->array;
1309     bjlvl_ptr[i] = current_space_lvl->array;
1310 
1311     /* make sure the active row i has diagonal entry */
1312     if (*(bj_ptr[i]+bdiag[i]) != i) {
1313       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1314     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1315     }
1316 
1317     current_space->array           += nzi;
1318     current_space->local_used      += nzi;
1319     current_space->local_remaining -= nzi;
1320     current_space_lvl->array           += nzi;
1321     current_space_lvl->local_used      += nzi;
1322     current_space_lvl->local_remaining -= nzi;
1323   }
1324 
1325   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1326   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1327 
1328   /* destroy list of free space and other temporary arrays */
1329   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1330   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1331   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1332   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1333   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1334 
1335 #if defined(PETSC_USE_INFO)
1336   {
1337     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1338     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1339     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1340     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1341     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1342     if (diagonal_fill) {
1343       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1344     }
1345   }
1346 #endif
1347 
1348   /* put together the new matrix */
1349   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1350   b = (Mat_SeqAIJ*)(fact)->data;
1351   b->free_a       = PETSC_TRUE;
1352   b->free_ij      = PETSC_TRUE;
1353   b->singlemalloc = PETSC_FALSE;
1354   len = (bi[n] )*sizeof(PetscScalar);
1355   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1356   b->j          = bj;
1357   b->i          = bi;
1358   for (i=0; i<n; i++) bdiag[i] += bi[i];
1359   b->diag       = bdiag;
1360   b->ilen       = 0;
1361   b->imax       = 0;
1362   b->row        = isrow;
1363   b->col        = iscol;
1364   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1365   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1366   b->icol       = isicol;
1367   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1368   /* In b structure:  Free imax, ilen, old a, old j.
1369      Allocate bdiag, solve_work, new a, new j */
1370   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1371   b->maxnz             = b->nz = bi[n] ;
1372   (fact)->info.factor_mallocs    = reallocs;
1373   (fact)->info.fill_ratio_given  = f;
1374   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1375   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1376   ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #include "src/mat/impls/sbaij/seq/sbaij.h"
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1383 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,MatFactorInfo *info)
1384 {
1385   Mat            C = B;
1386   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1387   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1388   IS             ip=b->row,iip = b->icol;
1389   PetscErrorCode ierr;
1390   const PetscInt *rip,*riip;
1391   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1392   PetscInt       *ai=a->i,*aj=a->j;
1393   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1394   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1395   PetscReal      zeropivot,rs,shiftnz;
1396   PetscReal      shiftpd;
1397   ChShift_Ctx    sctx;
1398   PetscInt       newshift;
1399   PetscTruth     perm_identity;
1400 
1401   PetscFunctionBegin;
1402 
1403   shiftnz   = info->shiftnz;
1404   shiftpd   = info->shiftpd;
1405   zeropivot = info->zeropivot;
1406 
1407   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1408   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1409 
1410   /* initialization */
1411   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1412   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1413   jl   = il + mbs;
1414   rtmp = (MatScalar*)(jl + mbs);
1415 
1416   sctx.shift_amount = 0;
1417   sctx.nshift       = 0;
1418   do {
1419     sctx.chshift = PETSC_FALSE;
1420     for (i=0; i<mbs; i++) {
1421       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1422     }
1423 
1424     for (k = 0; k<mbs; k++){
1425       bval = ba + bi[k];
1426       /* initialize k-th row by the perm[k]-th row of A */
1427       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1428       for (j = jmin; j < jmax; j++){
1429         col = riip[aj[j]];
1430         if (col >= k){ /* only take upper triangular entry */
1431           rtmp[col] = aa[j];
1432           *bval++  = 0.0; /* for in-place factorization */
1433         }
1434       }
1435       /* shift the diagonal of the matrix */
1436       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1437 
1438       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1439       dk = rtmp[k];
1440       i = jl[k]; /* first row to be added to k_th row  */
1441 
1442       while (i < k){
1443         nexti = jl[i]; /* next row to be added to k_th row */
1444 
1445         /* compute multiplier, update diag(k) and U(i,k) */
1446         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1447         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1448         dk += uikdi*ba[ili];
1449         ba[ili] = uikdi; /* -U(i,k) */
1450 
1451         /* add multiple of row i to k-th row */
1452         jmin = ili + 1; jmax = bi[i+1];
1453         if (jmin < jmax){
1454           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1455           /* update il and jl for row i */
1456           il[i] = jmin;
1457           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1458         }
1459         i = nexti;
1460       }
1461 
1462       /* shift the diagonals when zero pivot is detected */
1463       /* compute rs=sum of abs(off-diagonal) */
1464       rs   = 0.0;
1465       jmin = bi[k]+1;
1466       nz   = bi[k+1] - jmin;
1467       bcol = bj + jmin;
1468       while (nz--){
1469         rs += PetscAbsScalar(rtmp[*bcol]);
1470         bcol++;
1471       }
1472 
1473       sctx.rs = rs;
1474       sctx.pv = dk;
1475       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1476 
1477       if (newshift == 1) {
1478         if (!sctx.shift_amount) {
1479           sctx.shift_amount = 1e-5;
1480         }
1481         break;
1482       }
1483 
1484       /* copy data into U(k,:) */
1485       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1486       jmin = bi[k]+1; jmax = bi[k+1];
1487       if (jmin < jmax) {
1488         for (j=jmin; j<jmax; j++){
1489           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1490         }
1491         /* add the k-th row into il and jl */
1492         il[k] = jmin;
1493         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1494       }
1495     }
1496   } while (sctx.chshift);
1497   ierr = PetscFree(il);CHKERRQ(ierr);
1498 
1499   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1500   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1501 
1502   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1503   if (perm_identity){
1504     (B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1505     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1506     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1507     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1508   } else {
1509     (B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1510     (B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1511     (B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1512     (B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1513   }
1514 
1515   C->assembled    = PETSC_TRUE;
1516   C->preallocated = PETSC_TRUE;
1517   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1518   if (sctx.nshift){
1519     if (shiftnz) {
1520       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1521     } else if (shiftpd) {
1522       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1523     }
1524   }
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1530 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,MatFactorInfo *info)
1531 {
1532   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1533   Mat_SeqSBAIJ       *b;
1534   PetscErrorCode     ierr;
1535   PetscTruth         perm_identity,missing;
1536   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1537   const PetscInt     *rip,*riip;
1538   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1539   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1540   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1541   PetscReal          fill=info->fill,levels=info->levels;
1542   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1543   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1544   PetscBT            lnkbt;
1545   IS                 iperm;
1546 
1547   PetscFunctionBegin;
1548   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1549   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1550   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1551   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1552   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1553 
1554   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1555   ui[0] = 0;
1556 
1557   /* ICC(0) without matrix ordering: simply copies fill pattern */
1558   if (!levels && perm_identity) {
1559 
1560     for (i=0; i<am; i++) {
1561       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1562     }
1563     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1564     cols = uj;
1565     for (i=0; i<am; i++) {
1566       aj    = a->j + a->diag[i];
1567       ncols = ui[i+1] - ui[i];
1568       for (j=0; j<ncols; j++) *cols++ = *aj++;
1569     }
1570   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1571     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1572     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1573 
1574     /* initialization */
1575     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1576 
1577     /* jl: linked list for storing indices of the pivot rows
1578        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1579     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1580     il         = jl + am;
1581     uj_ptr     = (PetscInt**)(il + am);
1582     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1583     for (i=0; i<am; i++){
1584       jl[i] = am; il[i] = 0;
1585     }
1586 
1587     /* create and initialize a linked list for storing column indices of the active row k */
1588     nlnk = am + 1;
1589     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1590 
1591     /* initial FreeSpace size is fill*(ai[am]+1) */
1592     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1593     current_space = free_space;
1594     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1595     current_space_lvl = free_space_lvl;
1596 
1597     for (k=0; k<am; k++){  /* for each active row k */
1598       /* initialize lnk by the column indices of row rip[k] of A */
1599       nzk   = 0;
1600       ncols = ai[rip[k]+1] - ai[rip[k]];
1601       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1602       ncols_upper = 0;
1603       for (j=0; j<ncols; j++){
1604         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1605         if (riip[i] >= k){ /* only take upper triangular entry */
1606           ajtmp[ncols_upper] = i;
1607           ncols_upper++;
1608         }
1609       }
1610       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1611       nzk += nlnk;
1612 
1613       /* update lnk by computing fill-in for each pivot row to be merged in */
1614       prow = jl[k]; /* 1st pivot row */
1615 
1616       while (prow < k){
1617         nextprow = jl[prow];
1618 
1619         /* merge prow into k-th row */
1620         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1621         jmax = ui[prow+1];
1622         ncols = jmax-jmin;
1623         i     = jmin - ui[prow];
1624         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1625         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1626         j     = *(uj - 1);
1627         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1628         nzk += nlnk;
1629 
1630         /* update il and jl for prow */
1631         if (jmin < jmax){
1632           il[prow] = jmin;
1633           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1634         }
1635         prow = nextprow;
1636       }
1637 
1638       /* if free space is not available, make more free space */
1639       if (current_space->local_remaining<nzk) {
1640         i = am - k + 1; /* num of unfactored rows */
1641         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1642         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1643         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1644         reallocs++;
1645       }
1646 
1647       /* copy data into free_space and free_space_lvl, then initialize lnk */
1648       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1649       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1650 
1651       /* add the k-th row into il and jl */
1652       if (nzk > 1){
1653         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1654         jl[k] = jl[i]; jl[i] = k;
1655         il[k] = ui[k] + 1;
1656       }
1657       uj_ptr[k]     = current_space->array;
1658       uj_lvl_ptr[k] = current_space_lvl->array;
1659 
1660       current_space->array           += nzk;
1661       current_space->local_used      += nzk;
1662       current_space->local_remaining -= nzk;
1663 
1664       current_space_lvl->array           += nzk;
1665       current_space_lvl->local_used      += nzk;
1666       current_space_lvl->local_remaining -= nzk;
1667 
1668       ui[k+1] = ui[k] + nzk;
1669     }
1670 
1671 #if defined(PETSC_USE_INFO)
1672     if (ai[am] != 0) {
1673       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1674       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1675       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1676       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1677     } else {
1678       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1679     }
1680 #endif
1681 
1682     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1683     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1684     ierr = PetscFree(jl);CHKERRQ(ierr);
1685     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1686 
1687     /* destroy list of free space and other temporary array(s) */
1688     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1689     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1690     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1691     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1692 
1693   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1694 
1695   /* put together the new matrix in MATSEQSBAIJ format */
1696 
1697   b    = (Mat_SeqSBAIJ*)(fact)->data;
1698   b->singlemalloc = PETSC_FALSE;
1699   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1700   b->j    = uj;
1701   b->i    = ui;
1702   b->diag = 0;
1703   b->ilen = 0;
1704   b->imax = 0;
1705   b->row  = perm;
1706   b->col  = perm;
1707   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1708   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1709   b->icol = iperm;
1710   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1711   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1712   ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1713   b->maxnz   = b->nz = ui[am];
1714   b->free_a  = PETSC_TRUE;
1715   b->free_ij = PETSC_TRUE;
1716 
1717   (fact)->info.factor_mallocs    = reallocs;
1718   (fact)->info.fill_ratio_given  = fill;
1719   if (ai[am] != 0) {
1720     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1721   } else {
1722     (fact)->info.fill_ratio_needed = 0.0;
1723   }
1724   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1730 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,MatFactorInfo *info)
1731 {
1732   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1733   Mat_SeqSBAIJ       *b;
1734   PetscErrorCode     ierr;
1735   PetscTruth         perm_identity;
1736   PetscReal          fill = info->fill;
1737   const PetscInt     *rip,*riip;
1738   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1739   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1740   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1741   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1742   PetscBT            lnkbt;
1743   IS                 iperm;
1744 
1745   PetscFunctionBegin;
1746   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1747   /* check whether perm is the identity mapping */
1748   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1749   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1750   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1751   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1752 
1753   /* initialization */
1754   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1755   ui[0] = 0;
1756 
1757   /* jl: linked list for storing indices of the pivot rows
1758      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1759   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1760   il     = jl + am;
1761   cols   = il + am;
1762   ui_ptr = (PetscInt**)(cols + am);
1763   for (i=0; i<am; i++){
1764     jl[i] = am; il[i] = 0;
1765   }
1766 
1767   /* create and initialize a linked list for storing column indices of the active row k */
1768   nlnk = am + 1;
1769   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1770 
1771   /* initial FreeSpace size is fill*(ai[am]+1) */
1772   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1773   current_space = free_space;
1774 
1775   for (k=0; k<am; k++){  /* for each active row k */
1776     /* initialize lnk by the column indices of row rip[k] of A */
1777     nzk   = 0;
1778     ncols = ai[rip[k]+1] - ai[rip[k]];
1779     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1780     ncols_upper = 0;
1781     for (j=0; j<ncols; j++){
1782       i = riip[*(aj + ai[rip[k]] + j)];
1783       if (i >= k){ /* only take upper triangular entry */
1784         cols[ncols_upper] = i;
1785         ncols_upper++;
1786       }
1787     }
1788     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1789     nzk += nlnk;
1790 
1791     /* update lnk by computing fill-in for each pivot row to be merged in */
1792     prow = jl[k]; /* 1st pivot row */
1793 
1794     while (prow < k){
1795       nextprow = jl[prow];
1796       /* merge prow into k-th row */
1797       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1798       jmax = ui[prow+1];
1799       ncols = jmax-jmin;
1800       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1801       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1802       nzk += nlnk;
1803 
1804       /* update il and jl for prow */
1805       if (jmin < jmax){
1806         il[prow] = jmin;
1807         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1808       }
1809       prow = nextprow;
1810     }
1811 
1812     /* if free space is not available, make more free space */
1813     if (current_space->local_remaining<nzk) {
1814       i = am - k + 1; /* num of unfactored rows */
1815       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1816       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1817       reallocs++;
1818     }
1819 
1820     /* copy data into free space, then initialize lnk */
1821     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1822 
1823     /* add the k-th row into il and jl */
1824     if (nzk-1 > 0){
1825       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1826       jl[k] = jl[i]; jl[i] = k;
1827       il[k] = ui[k] + 1;
1828     }
1829     ui_ptr[k] = current_space->array;
1830     current_space->array           += nzk;
1831     current_space->local_used      += nzk;
1832     current_space->local_remaining -= nzk;
1833 
1834     ui[k+1] = ui[k] + nzk;
1835   }
1836 
1837 #if defined(PETSC_USE_INFO)
1838   if (ai[am] != 0) {
1839     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1840     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1841     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1842     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1843   } else {
1844      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1845   }
1846 #endif
1847 
1848   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1849   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1850   ierr = PetscFree(jl);CHKERRQ(ierr);
1851 
1852   /* destroy list of free space and other temporary array(s) */
1853   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1854   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1855   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1856 
1857   /* put together the new matrix in MATSEQSBAIJ format */
1858 
1859   b = (Mat_SeqSBAIJ*)(fact)->data;
1860   b->singlemalloc = PETSC_FALSE;
1861   b->free_a       = PETSC_TRUE;
1862   b->free_ij      = PETSC_TRUE;
1863   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1864   b->j    = uj;
1865   b->i    = ui;
1866   b->diag = 0;
1867   b->ilen = 0;
1868   b->imax = 0;
1869   b->row  = perm;
1870   b->col  = perm;
1871   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1872   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1873   b->icol = iperm;
1874   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1875   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1876   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1877   b->maxnz = b->nz = ui[am];
1878 
1879   (fact)->info.factor_mallocs    = reallocs;
1880   (fact)->info.fill_ratio_given  = fill;
1881   if (ai[am] != 0) {
1882     (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1883   } else {
1884     (fact)->info.fill_ratio_needed = 0.0;
1885   }
1886   (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1887   PetscFunctionReturn(0);
1888 }
1889