xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision c2586ae5925c3d89f53dfea860a3965c702e6fcb)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->rmap->n;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   MatScalar      *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr);
223   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
224   ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
225   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
226   (*fact)->factor    = MAT_FACTOR_LU;
227   (*fact)->assembled = PETSC_TRUE;
228 
229   b = (Mat_SeqAIJ*)(*fact)->data;
230   b->free_a        = PETSC_TRUE;
231   b->free_ij       = PETSC_TRUE;
232   b->singlemalloc  = PETSC_FALSE;
233   b->a             = new_a;
234   b->j             = new_j;
235   b->i             = new_i;
236   b->ilen          = 0;
237   b->imax          = 0;
238   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239   b->row           = isirow;
240   b->col           = iscolf;
241   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
242   b->maxnz = b->nz = new_i[n];
243   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
244   (*fact)->info.factor_mallocs = 0;
245 
246   af = ((double)b->nz)/((double)a->nz) + .001;
247   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
248   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
249   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
250   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
251 
252   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
253 
254   PetscFunctionReturn(0);
255 #endif
256 }
257 
258 EXTERN_C_BEGIN
259 #undef __FUNCT__
260 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
261 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
262 {
263   PetscFunctionBegin;
264   *flg = PETSC_TRUE;
265   PetscFunctionReturn(0);
266 }
267 EXTERN_C_END
268 
269 EXTERN_C_BEGIN
270 #undef __FUNCT__
271 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
272 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
273 {
274   PetscInt           n = A->rmap->n;
275   PetscErrorCode     ierr;
276 
277   PetscFunctionBegin;
278   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
279   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
280   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
281     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
282     (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
283     (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ;
284   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
285     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
286     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
287     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
288     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
289   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
290   (*B)->factor = ftype;
291   PetscFunctionReturn(0);
292 }
293 EXTERN_C_END
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
297 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
298 {
299   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
300   IS                 isicol;
301   PetscErrorCode     ierr;
302   PetscInt           *r,*ic,i,n=A->rmap->n,*ai=a->i,*aj=a->j;
303   PetscInt           *bi,*bj,*ajtmp;
304   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
305   PetscReal          f;
306   PetscInt           nlnk,*lnk,k,**bi_ptr;
307   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
308   PetscBT            lnkbt;
309 
310   PetscFunctionBegin;
311   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
312   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
313   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
314   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
315 
316   /* get new row pointers */
317   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
318   bi[0] = 0;
319 
320   /* bdiag is location of diagonal in factor */
321   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
322   bdiag[0] = 0;
323 
324   /* linked list for storing column indices of the active row */
325   nlnk = n + 1;
326   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
327 
328   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
329 
330   /* initial FreeSpace size is f*(ai[n]+1) */
331   f = info->fill;
332   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
333   current_space = free_space;
334 
335   for (i=0; i<n; i++) {
336     /* copy previous fill into linked list */
337     nzi = 0;
338     nnz = ai[r[i]+1] - ai[r[i]];
339     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
340     ajtmp = aj + ai[r[i]];
341     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
342     nzi += nlnk;
343 
344     /* add pivot rows into linked list */
345     row = lnk[n];
346     while (row < i) {
347       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
348       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
349       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
350       nzi += nlnk;
351       row  = lnk[row];
352     }
353     bi[i+1] = bi[i] + nzi;
354     im[i]   = nzi;
355 
356     /* mark bdiag */
357     nzbd = 0;
358     nnz  = nzi;
359     k    = lnk[n];
360     while (nnz-- && k < i){
361       nzbd++;
362       k = lnk[k];
363     }
364     bdiag[i] = bi[i] + nzbd;
365 
366     /* if free space is not available, make more free space */
367     if (current_space->local_remaining<nzi) {
368       nnz = (n - i)*nzi; /* estimated and max additional space needed */
369       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
370       reallocs++;
371     }
372 
373     /* copy data into free space, then initialize lnk */
374     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
375     bi_ptr[i] = current_space->array;
376     current_space->array           += nzi;
377     current_space->local_used      += nzi;
378     current_space->local_remaining -= nzi;
379   }
380 #if defined(PETSC_USE_INFO)
381   if (ai[n] != 0) {
382     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
383     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
384     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
385     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
386     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
387   } else {
388     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
389   }
390 #endif
391 
392   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
393   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
394 
395   /* destroy list of free space and other temporary array(s) */
396   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
397   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
398   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
399   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
400 
401   /* put together the new matrix */
402   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
403   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
404   b    = (Mat_SeqAIJ*)(*B)->data;
405   b->free_a       = PETSC_TRUE;
406   b->free_ij      = PETSC_TRUE;
407   b->singlemalloc = PETSC_FALSE;
408   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
409   b->j          = bj;
410   b->i          = bi;
411   b->diag       = bdiag;
412   b->ilen       = 0;
413   b->imax       = 0;
414   b->row        = isrow;
415   b->col        = iscol;
416   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
417   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
418   b->icol       = isicol;
419   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
420 
421   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
422   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
423   b->maxnz = b->nz = bi[n] ;
424 
425   (*B)->factor                 =  MAT_FACTOR_LU;
426   (*B)->info.factor_mallocs    = reallocs;
427   (*B)->info.fill_ratio_given  = f;
428 
429   if (ai[n] != 0) {
430     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431   } else {
432     (*B)->info.fill_ratio_needed = 0.0;
433   }
434   (*B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
435   (*B)->ops->solve            = MatSolve_SeqAIJ;
436   (*B)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ;
437   /* switch to inodes if appropriate */
438   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     Trouble in factorization, should we dump the original matrix?
444 */
445 #undef __FUNCT__
446 #define __FUNCT__ "MatFactorDumpMatrix"
447 PetscErrorCode MatFactorDumpMatrix(Mat A)
448 {
449   PetscErrorCode ierr;
450   PetscTruth     flg;
451 
452   PetscFunctionBegin;
453   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
454   if (flg) {
455     PetscViewer viewer;
456     char        filename[PETSC_MAX_PATH_LEN];
457 
458     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
459     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
460     ierr = MatView(A,viewer);CHKERRQ(ierr);
461     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
462   }
463   PetscFunctionReturn(0);
464 }
465 
466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec);
467 
468 /* ----------------------------------------------------------- */
469 #undef __FUNCT__
470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
472 {
473   Mat            C=*B;
474   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
475   IS             isrow = b->row,isicol = b->icol;
476   PetscErrorCode ierr;
477   PetscInt       *r,*ic,i,j,n=A->rmap->n,*bi=b->i,*bj=b->j;
478   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
479   PetscInt       *diag_offset = b->diag,diag,*pj;
480   PetscScalar    *rtmp,*pc,multiplier,*rtmps;
481   MatScalar      *v,*pv;
482   PetscScalar    d;
483   PetscReal      rs;
484   LUShift_Ctx    sctx;
485   PetscInt       newshift,*ddiag;
486 
487   PetscFunctionBegin;
488   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
489   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
490   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
491   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
492   rtmps = rtmp; ics = ic;
493 
494   sctx.shift_top  = 0;
495   sctx.nshift_max = 0;
496   sctx.shift_lo   = 0;
497   sctx.shift_hi   = 0;
498 
499   /* if both shift schemes are chosen by user, only use info->shiftpd */
500   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
501   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
502     PetscInt *aai = a->i;
503     ddiag          = a->diag;
504     sctx.shift_top = 0;
505     for (i=0; i<n; i++) {
506       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
507       d  = (a->a)[ddiag[i]];
508       rs = -PetscAbsScalar(d) - PetscRealPart(d);
509       v  = a->a+aai[i];
510       nz = aai[i+1] - aai[i];
511       for (j=0; j<nz; j++)
512 	rs += PetscAbsScalar(v[j]);
513       if (rs>sctx.shift_top) sctx.shift_top = rs;
514     }
515     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
516     sctx.shift_top    *= 1.1;
517     sctx.nshift_max   = 5;
518     sctx.shift_lo     = 0.;
519     sctx.shift_hi     = 1.;
520   }
521 
522   sctx.shift_amount = 0;
523   sctx.nshift       = 0;
524   do {
525     sctx.lushift = PETSC_FALSE;
526     for (i=0; i<n; i++){
527       nz    = bi[i+1] - bi[i];
528       bjtmp = bj + bi[i];
529       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
530 
531       /* load in initial (unfactored row) */
532       nz    = a->i[r[i]+1] - a->i[r[i]];
533       ajtmp = a->j + a->i[r[i]];
534       v     = a->a + a->i[r[i]];
535       for (j=0; j<nz; j++) {
536         rtmp[ics[ajtmp[j]]] = v[j];
537       }
538       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
539 
540       row = *bjtmp++;
541       while  (row < i) {
542         pc = rtmp + row;
543         if (*pc != 0.0) {
544           pv         = b->a + diag_offset[row];
545           pj         = b->j + diag_offset[row] + 1;
546           multiplier = *pc / *pv++;
547           *pc        = multiplier;
548           nz         = bi[row+1] - diag_offset[row] - 1;
549           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
550           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
551         }
552         row = *bjtmp++;
553       }
554       /* finished row so stick it into b->a */
555       pv   = b->a + bi[i] ;
556       pj   = b->j + bi[i] ;
557       nz   = bi[i+1] - bi[i];
558       diag = diag_offset[i] - bi[i];
559       rs   = 0.0;
560       for (j=0; j<nz; j++) {
561         pv[j] = rtmps[pj[j]];
562         if (j != diag) rs += PetscAbsScalar(pv[j]);
563       }
564 
565       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
566       sctx.rs  = rs;
567       sctx.pv  = pv[diag];
568       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
569       if (newshift == 1) break;
570     }
571 
572     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573       /*
574        * if no shift in this attempt & shifting & started shifting & can refine,
575        * then try lower shift
576        */
577       sctx.shift_hi        = info->shift_fraction;
578       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
580       sctx.lushift         = PETSC_TRUE;
581       sctx.nshift++;
582     }
583   } while (sctx.lushift);
584 
585   /* invert diagonal entries for simplier triangular solves */
586   for (i=0; i<n; i++) {
587     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
588   }
589   ierr = PetscFree(rtmp);CHKERRQ(ierr);
590   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
592   if (b->inode.use) {
593     C->ops->solve   = MatSolve_Inode;
594   } else {
595     C->ops->solve   = MatSolve_SeqAIJ;
596   }
597   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
598   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
599   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
600   C->assembled    = PETSC_TRUE;
601   C->preallocated = PETSC_TRUE;
602   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
603   if (sctx.nshift){
604     if (info->shiftnz) {
605       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
606     } else if (info->shiftpd) {
607       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
608     }
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 /*
614    This routine implements inplace ILU(0) with row or/and column permutations.
615    Input:
616      A - original matrix
617    Output;
618      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
619          a->j (col index) is permuted by the inverse of colperm, then sorted
620          a->a reordered accordingly with a->j
621          a->diag (ptr to diagonal elements) is updated.
622 */
623 #undef __FUNCT__
624 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
626 {
627   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
628   IS             isrow = a->row,isicol = a->icol;
629   PetscErrorCode ierr;
630   PetscInt       *r,*ic,i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
631   PetscInt       *ajtmp,nz,row,*ics;
632   PetscInt       *diag = a->diag,nbdiag,*pj;
633   PetscScalar    *rtmp,*pc,multiplier,d;
634   MatScalar      *v,*pv;
635   PetscReal      rs;
636   LUShift_Ctx    sctx;
637   PetscInt       newshift;
638 
639   PetscFunctionBegin;
640   if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
641   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
642   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
643   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
644   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
645   ics = ic;
646 
647   sctx.shift_top  = 0;
648   sctx.nshift_max = 0;
649   sctx.shift_lo   = 0;
650   sctx.shift_hi   = 0;
651 
652   /* if both shift schemes are chosen by user, only use info->shiftpd */
653   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
654   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
655     sctx.shift_top = 0;
656     for (i=0; i<n; i++) {
657       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
658       d  = (a->a)[diag[i]];
659       rs = -PetscAbsScalar(d) - PetscRealPart(d);
660       v  = a->a+ai[i];
661       nz = ai[i+1] - ai[i];
662       for (j=0; j<nz; j++)
663 	rs += PetscAbsScalar(v[j]);
664       if (rs>sctx.shift_top) sctx.shift_top = rs;
665     }
666     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
667     sctx.shift_top    *= 1.1;
668     sctx.nshift_max   = 5;
669     sctx.shift_lo     = 0.;
670     sctx.shift_hi     = 1.;
671   }
672 
673   sctx.shift_amount = 0;
674   sctx.nshift       = 0;
675   do {
676     sctx.lushift = PETSC_FALSE;
677     for (i=0; i<n; i++){
678       /* load in initial unfactored row */
679       nz    = ai[r[i]+1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = a->a + ai[r[i]];
682       /* sort permuted ajtmp and values v accordingly */
683       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
684       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
685 
686       diag[r[i]] = ai[r[i]];
687       for (j=0; j<nz; j++) {
688         rtmp[ajtmp[j]] = v[j];
689         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
690       }
691       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
692 
693       row = *ajtmp++;
694       while  (row < i) {
695         pc = rtmp + row;
696         if (*pc != 0.0) {
697           pv         = a->a + diag[r[row]];
698           pj         = aj + diag[r[row]] + 1;
699 
700           multiplier = *pc / *pv++;
701           *pc        = multiplier;
702           nz         = ai[r[row]+1] - diag[r[row]] - 1;
703           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
704           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
705         }
706         row = *ajtmp++;
707       }
708       /* finished row so overwrite it onto a->a */
709       pv   = a->a + ai[r[i]] ;
710       pj   = aj + ai[r[i]] ;
711       nz   = ai[r[i]+1] - ai[r[i]];
712       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
713 
714       rs   = 0.0;
715       for (j=0; j<nz; j++) {
716         pv[j] = rtmp[pj[j]];
717         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
718       }
719 
720       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
721       sctx.rs  = rs;
722       sctx.pv  = pv[nbdiag];
723       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
724       if (newshift == 1) break;
725     }
726 
727     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
728       /*
729        * if no shift in this attempt & shifting & started shifting & can refine,
730        * then try lower shift
731        */
732       sctx.shift_hi        = info->shift_fraction;
733       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
734       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
735       sctx.lushift         = PETSC_TRUE;
736       sctx.nshift++;
737     }
738   } while (sctx.lushift);
739 
740   /* invert diagonal entries for simplier triangular solves */
741   for (i=0; i<n; i++) {
742     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
743   }
744 
745   ierr = PetscFree(rtmp);CHKERRQ(ierr);
746   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
747   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
748   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
749   A->ops->solveadd          = MatSolveAdd_SeqAIJ;
750   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
751   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
752   A->assembled = PETSC_TRUE;
753   A->preallocated = PETSC_TRUE;
754   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
755   if (sctx.nshift){
756     if (info->shiftnz) {
757       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
758     } else if (info->shiftpd) {
759       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
760     }
761   }
762   PetscFunctionReturn(0);
763 }
764 
765 /* ----------------------------------------------------------- */
766 #undef __FUNCT__
767 #define __FUNCT__ "MatLUFactor_SeqAIJ"
768 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
769 {
770   PetscErrorCode ierr;
771   Mat            C;
772 
773   PetscFunctionBegin;
774   ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
775   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
776   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
777   A->ops->solve            = C->ops->solve;
778   A->ops->solvetranspose   = C->ops->solvetranspose;
779   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
780   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 /* ----------------------------------------------------------- */
784 #undef __FUNCT__
785 #define __FUNCT__ "MatSolve_SeqAIJ"
786 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
787 {
788   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
789   IS                iscol = a->col,isrow = a->row;
790   PetscErrorCode    ierr;
791   PetscInt          *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
792   PetscInt          nz,*rout,*cout;
793   PetscScalar       *x,*tmp,*tmps,sum;
794   const PetscScalar *b;
795   const MatScalar   *aa = a->a,*v;
796 
797   PetscFunctionBegin;
798   if (!n) PetscFunctionReturn(0);
799 
800   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
802   tmp  = a->solve_work;
803 
804   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
805   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
806 
807   /* forward solve the lower triangular */
808   tmp[0] = b[*r++];
809   tmps   = tmp;
810   for (i=1; i<n; i++) {
811     v   = aa + ai[i] ;
812     vi  = aj + ai[i] ;
813     nz  = a->diag[i] - ai[i];
814     sum = b[*r++];
815     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
816     tmp[i] = sum;
817   }
818 
819   /* backward solve the upper triangular */
820   for (i=n-1; i>=0; i--){
821     v   = aa + a->diag[i] + 1;
822     vi  = aj + a->diag[i] + 1;
823     nz  = ai[i+1] - a->diag[i] - 1;
824     sum = tmp[i];
825     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
826     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
827   }
828 
829   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
830   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
831   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
832   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
833   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatMatSolve_SeqAIJ"
839 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
840 {
841   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
842   IS              iscol = a->col,isrow = a->row;
843   PetscErrorCode  ierr;
844   PetscInt        *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
845   PetscInt        nz,*rout,*cout,neq;
846   PetscScalar     *x,*b,*tmp,*tmps,sum;
847   const MatScalar *aa = a->a,*v;
848   PetscTruth      bisdense,xisdense;
849 
850   PetscFunctionBegin;
851   if (!n) PetscFunctionReturn(0);
852 
853   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
854   if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
855   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
856   if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
857 
858   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
859   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
860 
861   tmp  = a->solve_work;
862   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
863   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
864 
865   for (neq=0; neq<B->cmap->n; neq++){
866     /* forward solve the lower triangular */
867     tmp[0] = b[r[0]];
868     tmps   = tmp;
869     for (i=1; i<n; i++) {
870       v   = aa + ai[i] ;
871       vi  = aj + ai[i] ;
872       nz  = a->diag[i] - ai[i];
873       sum = b[r[i]];
874       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
875       tmp[i] = sum;
876     }
877     /* backward solve the upper triangular */
878     for (i=n-1; i>=0; i--){
879       v   = aa + a->diag[i] + 1;
880       vi  = aj + a->diag[i] + 1;
881       nz  = ai[i+1] - a->diag[i] - 1;
882       sum = tmp[i];
883       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
884       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
885     }
886 
887     b += n;
888     x += n;
889   }
890   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
891   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
892   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
893   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
894   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
900 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
901 {
902   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
903   IS              iscol = a->col,isrow = a->row;
904   PetscErrorCode  ierr;
905   PetscInt        *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
906   PetscInt        nz,*rout,*cout,row;
907   PetscScalar     *x,*b,*tmp,*tmps,sum;
908   const MatScalar *aa = a->a,*v;
909 
910   PetscFunctionBegin;
911   if (!n) PetscFunctionReturn(0);
912 
913   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
914   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
915   tmp  = a->solve_work;
916 
917   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
918   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
919 
920   /* forward solve the lower triangular */
921   tmp[0] = b[*r++];
922   tmps   = tmp;
923   for (row=1; row<n; row++) {
924     i   = rout[row]; /* permuted row */
925     v   = aa + ai[i] ;
926     vi  = aj + ai[i] ;
927     nz  = a->diag[i] - ai[i];
928     sum = b[*r++];
929     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
930     tmp[row] = sum;
931   }
932 
933   /* backward solve the upper triangular */
934   for (row=n-1; row>=0; row--){
935     i   = rout[row]; /* permuted row */
936     v   = aa + a->diag[i] + 1;
937     vi  = aj + a->diag[i] + 1;
938     nz  = ai[i+1] - a->diag[i] - 1;
939     sum = tmp[row];
940     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
941     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
942   }
943 
944   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
945   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
946   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
947   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
948   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 /* ----------------------------------------------------------- */
953 #undef __FUNCT__
954 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
955 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
956 {
957   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
958   PetscErrorCode    ierr;
959   PetscInt          n = A->rmap->n;
960   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
961   PetscScalar       *x;
962   const PetscScalar *b;
963   const MatScalar   *aa = a->a;
964 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
965   PetscInt          adiag_i,i,nz,ai_i;
966   const MatScalar   *v;
967   PetscScalar       sum;
968 #endif
969 
970   PetscFunctionBegin;
971   if (!n) PetscFunctionReturn(0);
972 
973   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
974   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
975 
976 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
977   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
978 #else
979   /* forward solve the lower triangular */
980   x[0] = b[0];
981   for (i=1; i<n; i++) {
982     ai_i = ai[i];
983     v    = aa + ai_i;
984     vi   = aj + ai_i;
985     nz   = adiag[i] - ai_i;
986     sum  = b[i];
987     while (nz--) sum -= *v++ * x[*vi++];
988     x[i] = sum;
989   }
990 
991   /* backward solve the upper triangular */
992   for (i=n-1; i>=0; i--){
993     adiag_i = adiag[i];
994     v       = aa + adiag_i + 1;
995     vi      = aj + adiag_i + 1;
996     nz      = ai[i+1] - adiag_i - 1;
997     sum     = x[i];
998     while (nz--) sum -= *v++ * x[*vi++];
999     x[i]    = sum*aa[adiag_i];
1000   }
1001 #endif
1002   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
1003   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
1004   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1010 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1011 {
1012   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1013   IS              iscol = a->col,isrow = a->row;
1014   PetscErrorCode  ierr;
1015   PetscInt        *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1016   PetscInt        nz,*rout,*cout;
1017   PetscScalar     *x,*b,*tmp,sum;
1018   const MatScalar *aa = a->a,*v;
1019 
1020   PetscFunctionBegin;
1021   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1022 
1023   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1024   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1025   tmp  = a->solve_work;
1026 
1027   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1028   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1029 
1030   /* forward solve the lower triangular */
1031   tmp[0] = b[*r++];
1032   for (i=1; i<n; i++) {
1033     v   = aa + ai[i] ;
1034     vi  = aj + ai[i] ;
1035     nz  = a->diag[i] - ai[i];
1036     sum = b[*r++];
1037     while (nz--) sum -= *v++ * tmp[*vi++ ];
1038     tmp[i] = sum;
1039   }
1040 
1041   /* backward solve the upper triangular */
1042   for (i=n-1; i>=0; i--){
1043     v   = aa + a->diag[i] + 1;
1044     vi  = aj + a->diag[i] + 1;
1045     nz  = ai[i+1] - a->diag[i] - 1;
1046     sum = tmp[i];
1047     while (nz--) sum -= *v++ * tmp[*vi++ ];
1048     tmp[i] = sum*aa[a->diag[i]];
1049     x[*c--] += tmp[i];
1050   }
1051 
1052   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1053   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1054   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1055   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1056   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1057 
1058   PetscFunctionReturn(0);
1059 }
1060 /* -------------------------------------------------------------------*/
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1063 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1064 {
1065   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1066   IS              iscol = a->col,isrow = a->row;
1067   PetscErrorCode  ierr;
1068   PetscInt        *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1069   PetscInt        nz,*rout,*cout,*diag = a->diag;
1070   PetscScalar     *x,*b,*tmp,s1;
1071   const MatScalar *aa = a->a,*v;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1075   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1076   tmp  = a->solve_work;
1077 
1078   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1079   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1080 
1081   /* copy the b into temp work space according to permutation */
1082   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1083 
1084   /* forward solve the U^T */
1085   for (i=0; i<n; i++) {
1086     v   = aa + diag[i] ;
1087     vi  = aj + diag[i] + 1;
1088     nz  = ai[i+1] - diag[i] - 1;
1089     s1  = tmp[i];
1090     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1091     while (nz--) {
1092       tmp[*vi++ ] -= (*v++)*s1;
1093     }
1094     tmp[i] = s1;
1095   }
1096 
1097   /* backward solve the L^T */
1098   for (i=n-1; i>=0; i--){
1099     v   = aa + diag[i] - 1 ;
1100     vi  = aj + diag[i] - 1 ;
1101     nz  = diag[i] - ai[i];
1102     s1  = tmp[i];
1103     while (nz--) {
1104       tmp[*vi-- ] -= (*v--)*s1;
1105     }
1106   }
1107 
1108   /* copy tmp into x according to permutation */
1109   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1110 
1111   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1112   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1113   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1114   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1115 
1116   ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1122 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1123 {
1124   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1125   IS              iscol = a->col,isrow = a->row;
1126   PetscErrorCode  ierr;
1127   PetscInt        *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1128   PetscInt        nz,*rout,*cout,*diag = a->diag;
1129   PetscScalar     *x,*b,*tmp;
1130   const MatScalar *aa = a->a,*v;
1131 
1132   PetscFunctionBegin;
1133   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1134 
1135   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1136   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1137   tmp = a->solve_work;
1138 
1139   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1140   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1141 
1142   /* copy the b into temp work space according to permutation */
1143   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1144 
1145   /* forward solve the U^T */
1146   for (i=0; i<n; i++) {
1147     v   = aa + diag[i] ;
1148     vi  = aj + diag[i] + 1;
1149     nz  = ai[i+1] - diag[i] - 1;
1150     tmp[i] *= *v++;
1151     while (nz--) {
1152       tmp[*vi++ ] -= (*v++)*tmp[i];
1153     }
1154   }
1155 
1156   /* backward solve the L^T */
1157   for (i=n-1; i>=0; i--){
1158     v   = aa + diag[i] - 1 ;
1159     vi  = aj + diag[i] - 1 ;
1160     nz  = diag[i] - ai[i];
1161     while (nz--) {
1162       tmp[*vi-- ] -= (*v--)*tmp[i];
1163     }
1164   }
1165 
1166   /* copy tmp into x according to permutation */
1167   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1168 
1169   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1170   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1171   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1172   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1173 
1174   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 /* ----------------------------------------------------------------*/
1178 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1179 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,MatDuplicateOption,Mat*);
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1183 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1184 {
1185   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1186   IS                 isicol;
1187   PetscErrorCode     ierr;
1188   PetscInt           *r,*ic,n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1189   PetscInt           *bi,*cols,nnz,*cols_lvl;
1190   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1191   PetscInt           i,levels,diagonal_fill;
1192   PetscTruth         col_identity,row_identity;
1193   PetscReal          f;
1194   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1195   PetscBT            lnkbt;
1196   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1197   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1198   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1199   PetscTruth         missing;
1200 
1201   PetscFunctionBegin;
1202   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1203   f             = info->fill;
1204   levels        = (PetscInt)info->levels;
1205   diagonal_fill = (PetscInt)info->diagonal_fill;
1206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1207 
1208   /* special case that simply copies fill pattern */
1209   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1210   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1211   if (!levels && row_identity && col_identity) {
1212     ierr = MatDuplicateNoCreate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
1213     (*fact)->factor                 = MAT_FACTOR_LU;
1214     (*fact)->info.factor_mallocs    = 0;
1215     (*fact)->info.fill_ratio_given  = info->fill;
1216     (*fact)->info.fill_ratio_needed = 1.0;
1217     b               = (Mat_SeqAIJ*)(*fact)->data;
1218     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1219     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1220     b->row              = isrow;
1221     b->col              = iscol;
1222     b->icol             = isicol;
1223     ierr                = PetscMalloc(((*fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1224     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1225     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1226     (*fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1227     ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1228     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1229     PetscFunctionReturn(0);
1230   }
1231 
1232   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1233   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1234 
1235   /* get new row pointers */
1236   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1237   bi[0] = 0;
1238   /* bdiag is location of diagonal in factor */
1239   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1240   bdiag[0]  = 0;
1241 
1242   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1243   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1244 
1245   /* create a linked list for storing column indices of the active row */
1246   nlnk = n + 1;
1247   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1248 
1249   /* initial FreeSpace size is f*(ai[n]+1) */
1250   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1251   current_space = free_space;
1252   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1253   current_space_lvl = free_space_lvl;
1254 
1255   for (i=0; i<n; i++) {
1256     nzi = 0;
1257     /* copy current row into linked list */
1258     nnz  = ai[r[i]+1] - ai[r[i]];
1259     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1260     cols = aj + ai[r[i]];
1261     lnk[i] = -1; /* marker to indicate if diagonal exists */
1262     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1263     nzi += nlnk;
1264 
1265     /* make sure diagonal entry is included */
1266     if (diagonal_fill && lnk[i] == -1) {
1267       fm = n;
1268       while (lnk[fm] < i) fm = lnk[fm];
1269       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1270       lnk[fm]    = i;
1271       lnk_lvl[i] = 0;
1272       nzi++; dcount++;
1273     }
1274 
1275     /* add pivot rows into the active row */
1276     nzbd = 0;
1277     prow = lnk[n];
1278     while (prow < i) {
1279       nnz      = bdiag[prow];
1280       cols     = bj_ptr[prow] + nnz + 1;
1281       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1282       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1283       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1284       nzi += nlnk;
1285       prow = lnk[prow];
1286       nzbd++;
1287     }
1288     bdiag[i] = nzbd;
1289     bi[i+1]  = bi[i] + nzi;
1290 
1291     /* if free space is not available, make more free space */
1292     if (current_space->local_remaining<nzi) {
1293       nnz = nzi*(n - i); /* estimated and max additional space needed */
1294       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1295       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1296       reallocs++;
1297     }
1298 
1299     /* copy data into free_space and free_space_lvl, then initialize lnk */
1300     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1301     bj_ptr[i]    = current_space->array;
1302     bjlvl_ptr[i] = current_space_lvl->array;
1303 
1304     /* make sure the active row i has diagonal entry */
1305     if (*(bj_ptr[i]+bdiag[i]) != i) {
1306       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1307     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1308     }
1309 
1310     current_space->array           += nzi;
1311     current_space->local_used      += nzi;
1312     current_space->local_remaining -= nzi;
1313     current_space_lvl->array           += nzi;
1314     current_space_lvl->local_used      += nzi;
1315     current_space_lvl->local_remaining -= nzi;
1316   }
1317 
1318   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1319   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1320 
1321   /* destroy list of free space and other temporary arrays */
1322   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1323   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1324   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1325   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1326   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1327 
1328 #if defined(PETSC_USE_INFO)
1329   {
1330     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1331     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1332     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1333     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1334     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1335     if (diagonal_fill) {
1336       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1337     }
1338   }
1339 #endif
1340 
1341   /* put together the new matrix */
1342   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1343   b = (Mat_SeqAIJ*)(*fact)->data;
1344   b->free_a       = PETSC_TRUE;
1345   b->free_ij      = PETSC_TRUE;
1346   b->singlemalloc = PETSC_FALSE;
1347   len = (bi[n] )*sizeof(PetscScalar);
1348   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1349   b->j          = bj;
1350   b->i          = bi;
1351   for (i=0; i<n; i++) bdiag[i] += bi[i];
1352   b->diag       = bdiag;
1353   b->ilen       = 0;
1354   b->imax       = 0;
1355   b->row        = isrow;
1356   b->col        = iscol;
1357   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1358   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1359   b->icol       = isicol;
1360   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1361   /* In b structure:  Free imax, ilen, old a, old j.
1362      Allocate bdiag, solve_work, new a, new j */
1363   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1364   b->maxnz             = b->nz = bi[n] ;
1365   (*fact)->factor = MAT_FACTOR_LU;
1366   (*fact)->info.factor_mallocs    = reallocs;
1367   (*fact)->info.fill_ratio_given  = f;
1368   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1369   (*fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ;
1370   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #include "src/mat/impls/sbaij/seq/sbaij.h"
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1377 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1378 {
1379   Mat            C = *B;
1380   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1381   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1382   IS             ip=b->row,iip = b->icol;
1383   PetscErrorCode ierr;
1384   PetscInt       *rip,*riip,i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol;
1385   PetscInt       *ai=a->i,*aj=a->j;
1386   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1387   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1388   PetscReal      zeropivot,rs,shiftnz;
1389   PetscReal      shiftpd;
1390   ChShift_Ctx    sctx;
1391   PetscInt       newshift;
1392   PetscTruth     perm_identity;
1393 
1394   PetscFunctionBegin;
1395 
1396   shiftnz   = info->shiftnz;
1397   shiftpd   = info->shiftpd;
1398   zeropivot = info->zeropivot;
1399 
1400   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1401   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1402 
1403   /* initialization */
1404   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1405   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1406   jl   = il + mbs;
1407   rtmp = (MatScalar*)(jl + mbs);
1408 
1409   sctx.shift_amount = 0;
1410   sctx.nshift       = 0;
1411   do {
1412     sctx.chshift = PETSC_FALSE;
1413     for (i=0; i<mbs; i++) {
1414       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1415     }
1416 
1417     for (k = 0; k<mbs; k++){
1418       bval = ba + bi[k];
1419       /* initialize k-th row by the perm[k]-th row of A */
1420       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1421       for (j = jmin; j < jmax; j++){
1422         col = riip[aj[j]];
1423         if (col >= k){ /* only take upper triangular entry */
1424           rtmp[col] = aa[j];
1425           *bval++  = 0.0; /* for in-place factorization */
1426         }
1427       }
1428       /* shift the diagonal of the matrix */
1429       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1430 
1431       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1432       dk = rtmp[k];
1433       i = jl[k]; /* first row to be added to k_th row  */
1434 
1435       while (i < k){
1436         nexti = jl[i]; /* next row to be added to k_th row */
1437 
1438         /* compute multiplier, update diag(k) and U(i,k) */
1439         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1440         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1441         dk += uikdi*ba[ili];
1442         ba[ili] = uikdi; /* -U(i,k) */
1443 
1444         /* add multiple of row i to k-th row */
1445         jmin = ili + 1; jmax = bi[i+1];
1446         if (jmin < jmax){
1447           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1448           /* update il and jl for row i */
1449           il[i] = jmin;
1450           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1451         }
1452         i = nexti;
1453       }
1454 
1455       /* shift the diagonals when zero pivot is detected */
1456       /* compute rs=sum of abs(off-diagonal) */
1457       rs   = 0.0;
1458       jmin = bi[k]+1;
1459       nz   = bi[k+1] - jmin;
1460       bcol = bj + jmin;
1461       while (nz--){
1462         rs += PetscAbsScalar(rtmp[*bcol]);
1463         bcol++;
1464       }
1465 
1466       sctx.rs = rs;
1467       sctx.pv = dk;
1468       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1469 
1470       if (newshift == 1) {
1471         if (!sctx.shift_amount) {
1472           sctx.shift_amount = 1e-5;
1473         }
1474         break;
1475       }
1476 
1477       /* copy data into U(k,:) */
1478       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1479       jmin = bi[k]+1; jmax = bi[k+1];
1480       if (jmin < jmax) {
1481         for (j=jmin; j<jmax; j++){
1482           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1483         }
1484         /* add the k-th row into il and jl */
1485         il[k] = jmin;
1486         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1487       }
1488     }
1489   } while (sctx.chshift);
1490   ierr = PetscFree(il);CHKERRQ(ierr);
1491 
1492   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1493   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1494 
1495   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
1496   if (perm_identity){
1497     (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1498     (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1499     (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1500     (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1501   } else {
1502     (*B)->ops->solve           = MatSolve_SeqSBAIJ_1;
1503     (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1504     (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1505     (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1506   }
1507 
1508   C->assembled    = PETSC_TRUE;
1509   C->preallocated = PETSC_TRUE;
1510   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
1511   if (sctx.nshift){
1512     if (shiftnz) {
1513       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1514     } else if (shiftpd) {
1515       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1516     }
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1523 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1524 {
1525   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1526   Mat_SeqSBAIJ       *b;
1527   PetscErrorCode     ierr;
1528   PetscTruth         perm_identity,missing;
1529   PetscInt           reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui;
1530   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1531   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1532   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1533   PetscReal          fill=info->fill,levels=info->levels;
1534   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1535   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1536   PetscBT            lnkbt;
1537   IS                 iperm;
1538 
1539   PetscFunctionBegin;
1540   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1541   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1542   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1543   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1544   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1545 
1546   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1547   ui[0] = 0;
1548 
1549   /* ICC(0) without matrix ordering: simply copies fill pattern */
1550   if (!levels && perm_identity) {
1551 
1552     for (i=0; i<am; i++) {
1553       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1554     }
1555     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1556     cols = uj;
1557     for (i=0; i<am; i++) {
1558       aj    = a->j + a->diag[i];
1559       ncols = ui[i+1] - ui[i];
1560       for (j=0; j<ncols; j++) *cols++ = *aj++;
1561     }
1562   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1563     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1564     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1565 
1566     /* initialization */
1567     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1568 
1569     /* jl: linked list for storing indices of the pivot rows
1570        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1571     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1572     il         = jl + am;
1573     uj_ptr     = (PetscInt**)(il + am);
1574     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1575     for (i=0; i<am; i++){
1576       jl[i] = am; il[i] = 0;
1577     }
1578 
1579     /* create and initialize a linked list for storing column indices of the active row k */
1580     nlnk = am + 1;
1581     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1582 
1583     /* initial FreeSpace size is fill*(ai[am]+1) */
1584     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1585     current_space = free_space;
1586     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1587     current_space_lvl = free_space_lvl;
1588 
1589     for (k=0; k<am; k++){  /* for each active row k */
1590       /* initialize lnk by the column indices of row rip[k] of A */
1591       nzk   = 0;
1592       ncols = ai[rip[k]+1] - ai[rip[k]];
1593       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1594       ncols_upper = 0;
1595       for (j=0; j<ncols; j++){
1596         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1597         if (riip[i] >= k){ /* only take upper triangular entry */
1598           ajtmp[ncols_upper] = i;
1599           ncols_upper++;
1600         }
1601       }
1602       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1603       nzk += nlnk;
1604 
1605       /* update lnk by computing fill-in for each pivot row to be merged in */
1606       prow = jl[k]; /* 1st pivot row */
1607 
1608       while (prow < k){
1609         nextprow = jl[prow];
1610 
1611         /* merge prow into k-th row */
1612         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1613         jmax = ui[prow+1];
1614         ncols = jmax-jmin;
1615         i     = jmin - ui[prow];
1616         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1617         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1618         j     = *(uj - 1);
1619         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1620         nzk += nlnk;
1621 
1622         /* update il and jl for prow */
1623         if (jmin < jmax){
1624           il[prow] = jmin;
1625           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1626         }
1627         prow = nextprow;
1628       }
1629 
1630       /* if free space is not available, make more free space */
1631       if (current_space->local_remaining<nzk) {
1632         i = am - k + 1; /* num of unfactored rows */
1633         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1634         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1635         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1636         reallocs++;
1637       }
1638 
1639       /* copy data into free_space and free_space_lvl, then initialize lnk */
1640       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1641       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1642 
1643       /* add the k-th row into il and jl */
1644       if (nzk > 1){
1645         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1646         jl[k] = jl[i]; jl[i] = k;
1647         il[k] = ui[k] + 1;
1648       }
1649       uj_ptr[k]     = current_space->array;
1650       uj_lvl_ptr[k] = current_space_lvl->array;
1651 
1652       current_space->array           += nzk;
1653       current_space->local_used      += nzk;
1654       current_space->local_remaining -= nzk;
1655 
1656       current_space_lvl->array           += nzk;
1657       current_space_lvl->local_used      += nzk;
1658       current_space_lvl->local_remaining -= nzk;
1659 
1660       ui[k+1] = ui[k] + nzk;
1661     }
1662 
1663 #if defined(PETSC_USE_INFO)
1664     if (ai[am] != 0) {
1665       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1666       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1667       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1668       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1669     } else {
1670       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1671     }
1672 #endif
1673 
1674     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1675     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1676     ierr = PetscFree(jl);CHKERRQ(ierr);
1677     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1678 
1679     /* destroy list of free space and other temporary array(s) */
1680     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1681     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1682     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1683     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1684 
1685   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1686 
1687   /* put together the new matrix in MATSEQSBAIJ format */
1688 
1689   b    = (Mat_SeqSBAIJ*)(*fact)->data;
1690   b->singlemalloc = PETSC_FALSE;
1691   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1692   b->j    = uj;
1693   b->i    = ui;
1694   b->diag = 0;
1695   b->ilen = 0;
1696   b->imax = 0;
1697   b->row  = perm;
1698   b->col  = perm;
1699   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1700   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1701   b->icol = iperm;
1702   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1703   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1704   ierr = PetscLogObjectMemory((*fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1705   b->maxnz   = b->nz = ui[am];
1706   b->free_a  = PETSC_TRUE;
1707   b->free_ij = PETSC_TRUE;
1708 
1709   (*fact)->info.factor_mallocs    = reallocs;
1710   (*fact)->info.fill_ratio_given  = fill;
1711   if (ai[am] != 0) {
1712     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1713   } else {
1714     (*fact)->info.fill_ratio_needed = 0.0;
1715   }
1716   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1722 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1723 {
1724   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1725   Mat_SeqSBAIJ       *b;
1726   PetscErrorCode     ierr;
1727   PetscTruth         perm_identity;
1728   PetscReal          fill = info->fill;
1729   PetscInt           *rip,*riip,i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1730   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1731   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1732   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1733   PetscBT            lnkbt;
1734   IS                 iperm;
1735 
1736   PetscFunctionBegin;
1737   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1738   /* check whether perm is the identity mapping */
1739   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1740   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1741   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1742   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1743 
1744   /* initialization */
1745   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1746   ui[0] = 0;
1747 
1748   /* jl: linked list for storing indices of the pivot rows
1749      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1750   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1751   il     = jl + am;
1752   cols   = il + am;
1753   ui_ptr = (PetscInt**)(cols + am);
1754   for (i=0; i<am; i++){
1755     jl[i] = am; il[i] = 0;
1756   }
1757 
1758   /* create and initialize a linked list for storing column indices of the active row k */
1759   nlnk = am + 1;
1760   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1761 
1762   /* initial FreeSpace size is fill*(ai[am]+1) */
1763   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1764   current_space = free_space;
1765 
1766   for (k=0; k<am; k++){  /* for each active row k */
1767     /* initialize lnk by the column indices of row rip[k] of A */
1768     nzk   = 0;
1769     ncols = ai[rip[k]+1] - ai[rip[k]];
1770     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1771     ncols_upper = 0;
1772     for (j=0; j<ncols; j++){
1773       i = riip[*(aj + ai[rip[k]] + j)];
1774       if (i >= k){ /* only take upper triangular entry */
1775         cols[ncols_upper] = i;
1776         ncols_upper++;
1777       }
1778     }
1779     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1780     nzk += nlnk;
1781 
1782     /* update lnk by computing fill-in for each pivot row to be merged in */
1783     prow = jl[k]; /* 1st pivot row */
1784 
1785     while (prow < k){
1786       nextprow = jl[prow];
1787       /* merge prow into k-th row */
1788       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1789       jmax = ui[prow+1];
1790       ncols = jmax-jmin;
1791       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1792       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1793       nzk += nlnk;
1794 
1795       /* update il and jl for prow */
1796       if (jmin < jmax){
1797         il[prow] = jmin;
1798         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1799       }
1800       prow = nextprow;
1801     }
1802 
1803     /* if free space is not available, make more free space */
1804     if (current_space->local_remaining<nzk) {
1805       i = am - k + 1; /* num of unfactored rows */
1806       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1807       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1808       reallocs++;
1809     }
1810 
1811     /* copy data into free space, then initialize lnk */
1812     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1813 
1814     /* add the k-th row into il and jl */
1815     if (nzk-1 > 0){
1816       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1817       jl[k] = jl[i]; jl[i] = k;
1818       il[k] = ui[k] + 1;
1819     }
1820     ui_ptr[k] = current_space->array;
1821     current_space->array           += nzk;
1822     current_space->local_used      += nzk;
1823     current_space->local_remaining -= nzk;
1824 
1825     ui[k+1] = ui[k] + nzk;
1826   }
1827 
1828 #if defined(PETSC_USE_INFO)
1829   if (ai[am] != 0) {
1830     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1831     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1832     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1833     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1834   } else {
1835      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1836   }
1837 #endif
1838 
1839   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1840   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1841   ierr = PetscFree(jl);CHKERRQ(ierr);
1842 
1843   /* destroy list of free space and other temporary array(s) */
1844   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1845   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1846   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1847 
1848   /* put together the new matrix in MATSEQSBAIJ format */
1849 
1850   b = (Mat_SeqSBAIJ*)(*fact)->data;
1851   b->singlemalloc = PETSC_FALSE;
1852   b->free_a       = PETSC_TRUE;
1853   b->free_ij      = PETSC_TRUE;
1854   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1855   b->j    = uj;
1856   b->i    = ui;
1857   b->diag = 0;
1858   b->ilen = 0;
1859   b->imax = 0;
1860   b->row  = perm;
1861   b->col  = perm;
1862   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1863   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1864   b->icol = iperm;
1865   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1866   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1867   ierr    = PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1868   b->maxnz = b->nz = ui[am];
1869 
1870   (*fact)->info.factor_mallocs    = reallocs;
1871   (*fact)->info.fill_ratio_given  = fill;
1872   if (ai[am] != 0) {
1873     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1874   } else {
1875     (*fact)->info.fill_ratio_needed = 0.0;
1876   }
1877   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1878   PetscFunctionReturn(0);
1879 }
1880