xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 290bbb0a1dcfb34dbf94efcfcc44171581b0efea)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
223   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
224   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
225   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
226   (*fact)->factor    = FACTOR_LU;
227   (*fact)->assembled = PETSC_TRUE;
228 
229   b = (Mat_SeqAIJ*)(*fact)->data;
230   b->free_a        = PETSC_TRUE;
231   b->free_ij       = PETSC_TRUE;
232   b->sorted        = PETSC_FALSE;
233   b->singlemalloc  = PETSC_FALSE;
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   af = ((double)b->nz)/((double)a->nz) + .001;
248   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
249   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
250   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
251   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
252 
253   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
254 
255   PetscFunctionReturn(0);
256 #endif
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
262 {
263   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
264   IS                 isicol;
265   PetscErrorCode     ierr;
266   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
267   PetscInt           *bi,*bj,*ajtmp;
268   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
269   PetscReal          f;
270   PetscInt           nlnk,*lnk,k,**bi_ptr;
271   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
272   PetscBT            lnkbt;
273 
274   PetscFunctionBegin;
275   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
278   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
279 
280   /* get new row pointers */
281   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
282   bi[0] = 0;
283 
284   /* bdiag is location of diagonal in factor */
285   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
286   bdiag[0] = 0;
287 
288   /* linked list for storing column indices of the active row */
289   nlnk = n + 1;
290   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
291 
292   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
293 
294   /* initial FreeSpace size is f*(ai[n]+1) */
295   f = info->fill;
296   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
297   current_space = free_space;
298 
299   for (i=0; i<n; i++) {
300     /* copy previous fill into linked list */
301     nzi = 0;
302     nnz = ai[r[i]+1] - ai[r[i]];
303     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
304     ajtmp = aj + ai[r[i]];
305     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
306     nzi += nlnk;
307 
308     /* add pivot rows into linked list */
309     row = lnk[n];
310     while (row < i) {
311       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
312       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
313       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
314       nzi += nlnk;
315       row  = lnk[row];
316     }
317     bi[i+1] = bi[i] + nzi;
318     im[i]   = nzi;
319 
320     /* mark bdiag */
321     nzbd = 0;
322     nnz  = nzi;
323     k    = lnk[n];
324     while (nnz-- && k < i){
325       nzbd++;
326       k = lnk[k];
327     }
328     bdiag[i] = bi[i] + nzbd;
329 
330     /* if free space is not available, make more free space */
331     if (current_space->local_remaining<nzi) {
332       nnz = (n - i)*nzi; /* estimated and max additional space needed */
333       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
334       reallocs++;
335     }
336 
337     /* copy data into free space, then initialize lnk */
338     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
339     bi_ptr[i] = current_space->array;
340     current_space->array           += nzi;
341     current_space->local_used      += nzi;
342     current_space->local_remaining -= nzi;
343   }
344 #if defined(PETSC_USE_INFO)
345   if (ai[n] != 0) {
346     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
347     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
348     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
349     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
350     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
351   } else {
352     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
353   }
354 #endif
355 
356   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
357   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
358 
359   /* destroy list of free space and other temporary array(s) */
360   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
361   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
362   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
363   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
364 
365   /* put together the new matrix */
366   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
367   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
368   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
369   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
370   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
371   b    = (Mat_SeqAIJ*)(*B)->data;
372   b->free_a       = PETSC_TRUE;
373   b->free_ij      = PETSC_TRUE;
374   b->singlemalloc = PETSC_FALSE;
375   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
376   b->j          = bj;
377   b->i          = bi;
378   b->diag       = bdiag;
379   b->ilen       = 0;
380   b->imax       = 0;
381   b->row        = isrow;
382   b->col        = iscol;
383   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
384   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
385   b->icol       = isicol;
386   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
387 
388   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
389   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
390   b->maxnz = b->nz = bi[n] ;
391 
392   (*B)->factor                 =  FACTOR_LU;
393   (*B)->info.factor_mallocs    = reallocs;
394   (*B)->info.fill_ratio_given  = f;
395 
396   if (ai[n] != 0) {
397     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
398   } else {
399     (*B)->info.fill_ratio_needed = 0.0;
400   }
401   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
402   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
403   PetscFunctionReturn(0);
404 }
405 
406 /*
407     Trouble in factorization, should we dump the original matrix?
408 */
409 #undef __FUNCT__
410 #define __FUNCT__ "MatFactorDumpMatrix"
411 PetscErrorCode MatFactorDumpMatrix(Mat A)
412 {
413   PetscErrorCode ierr;
414   PetscTruth     flg;
415 
416   PetscFunctionBegin;
417   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
418   if (flg) {
419     PetscViewer viewer;
420     char        filename[PETSC_MAX_PATH_LEN];
421 
422     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
423     ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
424     ierr = MatView(A,viewer);CHKERRQ(ierr);
425     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 /* ----------------------------------------------------------- */
431 #undef __FUNCT__
432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
434 {
435   Mat            C=*B;
436   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
437   IS             isrow = b->row,isicol = b->icol;
438   PetscErrorCode ierr;
439   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
440   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
441   PetscInt       *diag_offset = b->diag,diag,*pj;
442   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443   PetscScalar    d;
444   PetscReal      rs;
445   LUShift_Ctx    sctx;
446   PetscInt       newshift,*ddiag;
447 
448   PetscFunctionBegin;
449   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
450   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
451   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
452   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
453   rtmps = rtmp; ics = ic;
454 
455   sctx.shift_top  = 0;
456   sctx.nshift_max = 0;
457   sctx.shift_lo   = 0;
458   sctx.shift_hi   = 0;
459 
460   /* if both shift schemes are chosen by user, only use info->shiftpd */
461   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
462   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
463     PetscInt *aai = a->i;
464     ddiag          = a->diag;
465     sctx.shift_top = 0;
466     for (i=0; i<n; i++) {
467       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
468       d  = (a->a)[ddiag[i]];
469       rs = -PetscAbsScalar(d) - PetscRealPart(d);
470       v  = a->a+aai[i];
471       nz = aai[i+1] - aai[i];
472       for (j=0; j<nz; j++)
473 	rs += PetscAbsScalar(v[j]);
474       if (rs>sctx.shift_top) sctx.shift_top = rs;
475     }
476     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
477     sctx.shift_top    *= 1.1;
478     sctx.nshift_max   = 5;
479     sctx.shift_lo     = 0.;
480     sctx.shift_hi     = 1.;
481   }
482 
483   sctx.shift_amount = 0;
484   sctx.nshift       = 0;
485   do {
486     sctx.lushift = PETSC_FALSE;
487     for (i=0; i<n; i++){
488       nz    = bi[i+1] - bi[i];
489       bjtmp = bj + bi[i];
490       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
491 
492       /* load in initial (unfactored row) */
493       nz    = a->i[r[i]+1] - a->i[r[i]];
494       ajtmp = a->j + a->i[r[i]];
495       v     = a->a + a->i[r[i]];
496       for (j=0; j<nz; j++) {
497         rtmp[ics[ajtmp[j]]] = v[j];
498       }
499       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
500 
501       row = *bjtmp++;
502       while  (row < i) {
503         pc = rtmp + row;
504         if (*pc != 0.0) {
505           pv         = b->a + diag_offset[row];
506           pj         = b->j + diag_offset[row] + 1;
507           multiplier = *pc / *pv++;
508           *pc        = multiplier;
509           nz         = bi[row+1] - diag_offset[row] - 1;
510           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
511           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
512         }
513         row = *bjtmp++;
514       }
515       /* finished row so stick it into b->a */
516       pv   = b->a + bi[i] ;
517       pj   = b->j + bi[i] ;
518       nz   = bi[i+1] - bi[i];
519       diag = diag_offset[i] - bi[i];
520       rs   = 0.0;
521       for (j=0; j<nz; j++) {
522         pv[j] = rtmps[pj[j]];
523         if (j != diag) rs += PetscAbsScalar(pv[j]);
524       }
525 
526       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
527       sctx.rs  = rs;
528       sctx.pv  = pv[diag];
529       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
530       if (newshift == 1) break;
531     }
532 
533     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
534       /*
535        * if no shift in this attempt & shifting & started shifting & can refine,
536        * then try lower shift
537        */
538       sctx.shift_hi        = info->shift_fraction;
539       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
540       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
541       sctx.lushift         = PETSC_TRUE;
542       sctx.nshift++;
543     }
544   } while (sctx.lushift);
545 
546   /* invert diagonal entries for simplier triangular solves */
547   for (i=0; i<n; i++) {
548     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
549   }
550 
551   ierr = PetscFree(rtmp);CHKERRQ(ierr);
552   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
553   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
554   C->factor = FACTOR_LU;
555   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
556   C->assembled = PETSC_TRUE;
557   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
558   if (sctx.nshift){
559     if (info->shiftnz) {
560       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
561     } else if (info->shiftpd) {
562       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 /*
569    This routine implements inplace ILU(0) with row or/and column permutations.
570    Input:
571      A - original matrix
572    Output;
573      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
574          a->j (col index) is permuted by the inverse of colperm, then sorted
575          a->a reordered accordingly with a->j
576          a->diag (ptr to diagonal elements) is updated.
577 */
578 #undef __FUNCT__
579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
581 {
582   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
583   IS             isrow = a->row,isicol = a->icol;
584   PetscErrorCode ierr;
585   PetscInt       *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
586   PetscInt       *ajtmp,nz,row,*ics;
587   PetscInt       *diag = a->diag,nbdiag,*pj;
588   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,d;
589   PetscReal      rs;
590   LUShift_Ctx    sctx;
591   PetscInt       newshift;
592 
593   PetscFunctionBegin;
594   if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
595   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
596   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
597   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
598   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
599   ics = ic;
600 
601   sctx.shift_top  = 0;
602   sctx.nshift_max = 0;
603   sctx.shift_lo   = 0;
604   sctx.shift_hi   = 0;
605 
606   /* if both shift schemes are chosen by user, only use info->shiftpd */
607   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
608   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
609     sctx.shift_top = 0;
610     for (i=0; i<n; i++) {
611       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
612       d  = (a->a)[diag[i]];
613       rs = -PetscAbsScalar(d) - PetscRealPart(d);
614       v  = a->a+ai[i];
615       nz = ai[i+1] - ai[i];
616       for (j=0; j<nz; j++)
617 	rs += PetscAbsScalar(v[j]);
618       if (rs>sctx.shift_top) sctx.shift_top = rs;
619     }
620     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
621     sctx.shift_top    *= 1.1;
622     sctx.nshift_max   = 5;
623     sctx.shift_lo     = 0.;
624     sctx.shift_hi     = 1.;
625   }
626 
627   sctx.shift_amount = 0;
628   sctx.nshift       = 0;
629   do {
630     sctx.lushift = PETSC_FALSE;
631     for (i=0; i<n; i++){
632       /* load in initial unfactored row */
633       nz    = ai[r[i]+1] - ai[r[i]];
634       ajtmp = aj + ai[r[i]];
635       v     = a->a + ai[r[i]];
636       /* sort permuted ajtmp and values v accordingly */
637       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
638       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
639 
640       diag[r[i]] = ai[r[i]];
641       for (j=0; j<nz; j++) {
642         rtmp[ajtmp[j]] = v[j];
643         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
644       }
645       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
646 
647       row = *ajtmp++;
648       while  (row < i) {
649         pc = rtmp + row;
650         if (*pc != 0.0) {
651           pv         = a->a + diag[r[row]];
652           pj         = aj + diag[r[row]] + 1;
653 
654           multiplier = *pc / *pv++;
655           *pc        = multiplier;
656           nz         = ai[r[row]+1] - diag[r[row]] - 1;
657           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
658           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
659         }
660         row = *ajtmp++;
661       }
662       /* finished row so overwrite it onto a->a */
663       pv   = a->a + ai[r[i]] ;
664       pj   = aj + ai[r[i]] ;
665       nz   = ai[r[i]+1] - ai[r[i]];
666       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
667 
668       rs   = 0.0;
669       for (j=0; j<nz; j++) {
670         pv[j] = rtmp[pj[j]];
671         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
672       }
673 
674       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
675       sctx.rs  = rs;
676       sctx.pv  = pv[nbdiag];
677       ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr);
678       if (newshift == 1) break;
679     }
680 
681     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
682       /*
683        * if no shift in this attempt & shifting & started shifting & can refine,
684        * then try lower shift
685        */
686       sctx.shift_hi        = info->shift_fraction;
687       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
688       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
689       sctx.lushift         = PETSC_TRUE;
690       sctx.nshift++;
691     }
692   } while (sctx.lushift);
693 
694   /* invert diagonal entries for simplier triangular solves */
695   for (i=0; i<n; i++) {
696     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
697   }
698 
699   ierr = PetscFree(rtmp);CHKERRQ(ierr);
700   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
701   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
702   A->factor     = FACTOR_LU;
703   A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
704   A->assembled = PETSC_TRUE;
705   ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr);
706   if (sctx.nshift){
707     if (info->shiftnz) {
708       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
709     } else if (info->shiftpd) {
710       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
711     }
712   }
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
718 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
719 {
720   PetscFunctionBegin;
721   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
722   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
723   PetscFunctionReturn(0);
724 }
725 
726 
727 /* ----------------------------------------------------------- */
728 #undef __FUNCT__
729 #define __FUNCT__ "MatLUFactor_SeqAIJ"
730 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
731 {
732   PetscErrorCode ierr;
733   Mat            C;
734 
735   PetscFunctionBegin;
736   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
737   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
738   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
739   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 /* ----------------------------------------------------------- */
743 #undef __FUNCT__
744 #define __FUNCT__ "MatSolve_SeqAIJ"
745 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
746 {
747   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
748   IS             iscol = a->col,isrow = a->row;
749   PetscErrorCode ierr;
750   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
751   PetscInt       nz,*rout,*cout;
752   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
753 
754   PetscFunctionBegin;
755   if (!n) PetscFunctionReturn(0);
756 
757   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
758   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
759   tmp  = a->solve_work;
760 
761   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
762   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
763 
764   /* forward solve the lower triangular */
765   tmp[0] = b[*r++];
766   tmps   = tmp;
767   for (i=1; i<n; i++) {
768     v   = aa + ai[i] ;
769     vi  = aj + ai[i] ;
770     nz  = a->diag[i] - ai[i];
771     sum = b[*r++];
772     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
773     tmp[i] = sum;
774   }
775 
776   /* backward solve the upper triangular */
777   for (i=n-1; i>=0; i--){
778     v   = aa + a->diag[i] + 1;
779     vi  = aj + a->diag[i] + 1;
780     nz  = ai[i+1] - a->diag[i] - 1;
781     sum = tmp[i];
782     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
783     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
784   }
785 
786   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
787   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
788   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
789   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
790   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMatSolve_SeqAIJ"
796 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
797 {
798   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
799   IS             iscol = a->col,isrow = a->row;
800   PetscErrorCode ierr;
801   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
802   PetscInt       nz,*rout,*cout,neq;
803   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
804 
805   PetscFunctionBegin;
806   if (!n) PetscFunctionReturn(0);
807 
808   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
809   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
810 
811   tmp  = a->solve_work;
812   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
813   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
814 
815   for (neq=0; neq<n; neq++){
816     /* forward solve the lower triangular */
817     tmp[0] = b[r[0]];
818     tmps   = tmp;
819     for (i=1; i<n; i++) {
820       v   = aa + ai[i] ;
821       vi  = aj + ai[i] ;
822       nz  = a->diag[i] - ai[i];
823       sum = b[r[i]];
824       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
825       tmp[i] = sum;
826     }
827     /* backward solve the upper triangular */
828     for (i=n-1; i>=0; i--){
829       v   = aa + a->diag[i] + 1;
830       vi  = aj + a->diag[i] + 1;
831       nz  = ai[i+1] - a->diag[i] - 1;
832       sum = tmp[i];
833       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
834       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
835     }
836 
837     b += n;
838     x += n;
839   }
840   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
841   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
842   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
843   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
844   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
850 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
851 {
852   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
853   IS             iscol = a->col,isrow = a->row;
854   PetscErrorCode ierr;
855   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
856   PetscInt       nz,*rout,*cout,row;
857   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
858 
859   PetscFunctionBegin;
860   if (!n) PetscFunctionReturn(0);
861 
862   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
863   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
864   tmp  = a->solve_work;
865 
866   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
867   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
868 
869   /* forward solve the lower triangular */
870   tmp[0] = b[*r++];
871   tmps   = tmp;
872   for (row=1; row<n; row++) {
873     i   = rout[row]; /* permuted row */
874     v   = aa + ai[i] ;
875     vi  = aj + ai[i] ;
876     nz  = a->diag[i] - ai[i];
877     sum = b[*r++];
878     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
879     tmp[row] = sum;
880   }
881 
882   /* backward solve the upper triangular */
883   for (row=n-1; row>=0; row--){
884     i   = rout[row]; /* permuted row */
885     v   = aa + a->diag[i] + 1;
886     vi  = aj + a->diag[i] + 1;
887     nz  = ai[i+1] - a->diag[i] - 1;
888     sum = tmp[row];
889     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
890     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
891   }
892 
893   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
894   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
895   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
896   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
897   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 /* ----------------------------------------------------------- */
902 #undef __FUNCT__
903 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
904 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
905 {
906   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
909   PetscScalar    *x,*b,*aa = a->a;
910 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
911   PetscInt       adiag_i,i,*vi,nz,ai_i;
912   PetscScalar    *v,sum;
913 #endif
914 
915   PetscFunctionBegin;
916   if (!n) PetscFunctionReturn(0);
917 
918   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920 
921 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
922   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
923 #else
924   /* forward solve the lower triangular */
925   x[0] = b[0];
926   for (i=1; i<n; i++) {
927     ai_i = ai[i];
928     v    = aa + ai_i;
929     vi   = aj + ai_i;
930     nz   = adiag[i] - ai_i;
931     sum  = b[i];
932     while (nz--) sum -= *v++ * x[*vi++];
933     x[i] = sum;
934   }
935 
936   /* backward solve the upper triangular */
937   for (i=n-1; i>=0; i--){
938     adiag_i = adiag[i];
939     v       = aa + adiag_i + 1;
940     vi      = aj + adiag_i + 1;
941     nz      = ai[i+1] - adiag_i - 1;
942     sum     = x[i];
943     while (nz--) sum -= *v++ * x[*vi++];
944     x[i]    = sum*aa[adiag_i];
945   }
946 #endif
947   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
948   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
949   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
955 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
956 {
957   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
958   IS             iscol = a->col,isrow = a->row;
959   PetscErrorCode ierr;
960   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
961   PetscInt       nz,*rout,*cout;
962   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
963 
964   PetscFunctionBegin;
965   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
966 
967   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
968   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
969   tmp  = a->solve_work;
970 
971   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
972   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
973 
974   /* forward solve the lower triangular */
975   tmp[0] = b[*r++];
976   for (i=1; i<n; i++) {
977     v   = aa + ai[i] ;
978     vi  = aj + ai[i] ;
979     nz  = a->diag[i] - ai[i];
980     sum = b[*r++];
981     while (nz--) sum -= *v++ * tmp[*vi++ ];
982     tmp[i] = sum;
983   }
984 
985   /* backward solve the upper triangular */
986   for (i=n-1; i>=0; i--){
987     v   = aa + a->diag[i] + 1;
988     vi  = aj + a->diag[i] + 1;
989     nz  = ai[i+1] - a->diag[i] - 1;
990     sum = tmp[i];
991     while (nz--) sum -= *v++ * tmp[*vi++ ];
992     tmp[i] = sum*aa[a->diag[i]];
993     x[*c--] += tmp[i];
994   }
995 
996   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
997   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
998   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
999   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1000   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 /* -------------------------------------------------------------------*/
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1007 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1008 {
1009   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1010   IS             iscol = a->col,isrow = a->row;
1011   PetscErrorCode ierr;
1012   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1013   PetscInt       nz,*rout,*cout,*diag = a->diag;
1014   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
1015 
1016   PetscFunctionBegin;
1017   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1018   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1019   tmp  = a->solve_work;
1020 
1021   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1022   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1023 
1024   /* copy the b into temp work space according to permutation */
1025   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1026 
1027   /* forward solve the U^T */
1028   for (i=0; i<n; i++) {
1029     v   = aa + diag[i] ;
1030     vi  = aj + diag[i] + 1;
1031     nz  = ai[i+1] - diag[i] - 1;
1032     s1  = tmp[i];
1033     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1034     while (nz--) {
1035       tmp[*vi++ ] -= (*v++)*s1;
1036     }
1037     tmp[i] = s1;
1038   }
1039 
1040   /* backward solve the L^T */
1041   for (i=n-1; i>=0; i--){
1042     v   = aa + diag[i] - 1 ;
1043     vi  = aj + diag[i] - 1 ;
1044     nz  = diag[i] - ai[i];
1045     s1  = tmp[i];
1046     while (nz--) {
1047       tmp[*vi-- ] -= (*v--)*s1;
1048     }
1049   }
1050 
1051   /* copy tmp into x according to permutation */
1052   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1053 
1054   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1055   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1056   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1057   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1058 
1059   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1065 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1066 {
1067   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1068   IS             iscol = a->col,isrow = a->row;
1069   PetscErrorCode ierr;
1070   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1071   PetscInt       nz,*rout,*cout,*diag = a->diag;
1072   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
1073 
1074   PetscFunctionBegin;
1075   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1076 
1077   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1078   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1079   tmp = a->solve_work;
1080 
1081   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1082   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1083 
1084   /* copy the b into temp work space according to permutation */
1085   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1086 
1087   /* forward solve the U^T */
1088   for (i=0; i<n; i++) {
1089     v   = aa + diag[i] ;
1090     vi  = aj + diag[i] + 1;
1091     nz  = ai[i+1] - diag[i] - 1;
1092     tmp[i] *= *v++;
1093     while (nz--) {
1094       tmp[*vi++ ] -= (*v++)*tmp[i];
1095     }
1096   }
1097 
1098   /* backward solve the L^T */
1099   for (i=n-1; i>=0; i--){
1100     v   = aa + diag[i] - 1 ;
1101     vi  = aj + diag[i] - 1 ;
1102     nz  = diag[i] - ai[i];
1103     while (nz--) {
1104       tmp[*vi-- ] -= (*v--)*tmp[i];
1105     }
1106   }
1107 
1108   /* copy tmp into x according to permutation */
1109   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1110 
1111   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1112   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1113   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1114   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1115 
1116   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 /* ----------------------------------------------------------------*/
1120 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*);
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1124 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1125 {
1126   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1127   IS                 isicol;
1128   PetscErrorCode     ierr;
1129   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1130   PetscInt           *bi,*cols,nnz,*cols_lvl;
1131   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1132   PetscInt           i,levels,diagonal_fill;
1133   PetscTruth         col_identity,row_identity;
1134   PetscReal          f;
1135   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1136   PetscBT            lnkbt;
1137   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1138   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1139   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1140   PetscTruth         missing;
1141 
1142   PetscFunctionBegin;
1143   f             = info->fill;
1144   levels        = (PetscInt)info->levels;
1145   diagonal_fill = (PetscInt)info->diagonal_fill;
1146   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1147 
1148   /* special case that simply copies fill pattern */
1149   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1150   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1151   if (!levels && row_identity && col_identity) {
1152     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
1153     (*fact)->factor                 = FACTOR_LU;
1154     (*fact)->info.factor_mallocs    = 0;
1155     (*fact)->info.fill_ratio_given  = info->fill;
1156     (*fact)->info.fill_ratio_needed = 1.0;
1157     b               = (Mat_SeqAIJ*)(*fact)->data;
1158     ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr);
1159     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1160     b->row              = isrow;
1161     b->col              = iscol;
1162     b->icol             = isicol;
1163     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1164     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1165     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1166     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1167     PetscFunctionReturn(0);
1168   }
1169 
1170   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1171   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1172 
1173   /* get new row pointers */
1174   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1175   bi[0] = 0;
1176   /* bdiag is location of diagonal in factor */
1177   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1178   bdiag[0]  = 0;
1179 
1180   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1181   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1182 
1183   /* create a linked list for storing column indices of the active row */
1184   nlnk = n + 1;
1185   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1186 
1187   /* initial FreeSpace size is f*(ai[n]+1) */
1188   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1189   current_space = free_space;
1190   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1191   current_space_lvl = free_space_lvl;
1192 
1193   for (i=0; i<n; i++) {
1194     nzi = 0;
1195     /* copy current row into linked list */
1196     nnz  = ai[r[i]+1] - ai[r[i]];
1197     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1198     cols = aj + ai[r[i]];
1199     lnk[i] = -1; /* marker to indicate if diagonal exists */
1200     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1201     nzi += nlnk;
1202 
1203     /* make sure diagonal entry is included */
1204     if (diagonal_fill && lnk[i] == -1) {
1205       fm = n;
1206       while (lnk[fm] < i) fm = lnk[fm];
1207       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1208       lnk[fm]    = i;
1209       lnk_lvl[i] = 0;
1210       nzi++; dcount++;
1211     }
1212 
1213     /* add pivot rows into the active row */
1214     nzbd = 0;
1215     prow = lnk[n];
1216     while (prow < i) {
1217       nnz      = bdiag[prow];
1218       cols     = bj_ptr[prow] + nnz + 1;
1219       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1220       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1221       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1222       nzi += nlnk;
1223       prow = lnk[prow];
1224       nzbd++;
1225     }
1226     bdiag[i] = nzbd;
1227     bi[i+1]  = bi[i] + nzi;
1228 
1229     /* if free space is not available, make more free space */
1230     if (current_space->local_remaining<nzi) {
1231       nnz = nzi*(n - i); /* estimated and max additional space needed */
1232       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1233       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1234       reallocs++;
1235     }
1236 
1237     /* copy data into free_space and free_space_lvl, then initialize lnk */
1238     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1239     bj_ptr[i]    = current_space->array;
1240     bjlvl_ptr[i] = current_space_lvl->array;
1241 
1242     /* make sure the active row i has diagonal entry */
1243     if (*(bj_ptr[i]+bdiag[i]) != i) {
1244       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1245     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1246     }
1247 
1248     current_space->array           += nzi;
1249     current_space->local_used      += nzi;
1250     current_space->local_remaining -= nzi;
1251     current_space_lvl->array           += nzi;
1252     current_space_lvl->local_used      += nzi;
1253     current_space_lvl->local_remaining -= nzi;
1254   }
1255 
1256   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1257   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1258 
1259   /* destroy list of free space and other temporary arrays */
1260   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1261   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1262   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1263   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1264   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1265 
1266 #if defined(PETSC_USE_INFO)
1267   {
1268     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1269     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1270     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1271     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1272     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1273     if (diagonal_fill) {
1274       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1275     }
1276   }
1277 #endif
1278 
1279   /* put together the new matrix */
1280   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1281   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1282   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1283   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1285   b = (Mat_SeqAIJ*)(*fact)->data;
1286   b->free_a       = PETSC_TRUE;
1287   b->free_ij      = PETSC_TRUE;
1288   b->singlemalloc = PETSC_FALSE;
1289   len = (bi[n] )*sizeof(PetscScalar);
1290   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1291   b->j          = bj;
1292   b->i          = bi;
1293   for (i=0; i<n; i++) bdiag[i] += bi[i];
1294   b->diag       = bdiag;
1295   b->ilen       = 0;
1296   b->imax       = 0;
1297   b->row        = isrow;
1298   b->col        = iscol;
1299   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1300   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1301   b->icol       = isicol;
1302   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1303   /* In b structure:  Free imax, ilen, old a, old j.
1304      Allocate bdiag, solve_work, new a, new j */
1305   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1306   b->maxnz             = b->nz = bi[n] ;
1307   (*fact)->factor = FACTOR_LU;
1308   (*fact)->info.factor_mallocs    = reallocs;
1309   (*fact)->info.fill_ratio_given  = f;
1310   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1311 
1312   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1313   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1314 
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #include "src/mat/impls/sbaij/seq/sbaij.h"
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1321 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1322 {
1323   Mat            C = *B;
1324   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1325   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1326   IS             ip=b->row,iip = b->icol;
1327   PetscErrorCode ierr;
1328   PetscInt       *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1329   PetscInt       *ai=a->i,*aj=a->j;
1330   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1331   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1332   PetscReal      zeropivot,rs,shiftnz;
1333   PetscReal      shiftpd;
1334   ChShift_Ctx    sctx;
1335   PetscInt       newshift;
1336 
1337   PetscFunctionBegin;
1338   shiftnz   = info->shiftnz;
1339   shiftpd   = info->shiftpd;
1340   zeropivot = info->zeropivot;
1341 
1342   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1343   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1344 
1345   /* initialization */
1346   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1347   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1348   jl   = il + mbs;
1349   rtmp = (MatScalar*)(jl + mbs);
1350 
1351   sctx.shift_amount = 0;
1352   sctx.nshift       = 0;
1353   do {
1354     sctx.chshift = PETSC_FALSE;
1355     for (i=0; i<mbs; i++) {
1356       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1357     }
1358 
1359     for (k = 0; k<mbs; k++){
1360       bval = ba + bi[k];
1361       /* initialize k-th row by the perm[k]-th row of A */
1362       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1363       for (j = jmin; j < jmax; j++){
1364         col = riip[aj[j]];
1365         if (col >= k){ /* only take upper triangular entry */
1366           rtmp[col] = aa[j];
1367           *bval++  = 0.0; /* for in-place factorization */
1368         }
1369       }
1370       /* shift the diagonal of the matrix */
1371       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1372 
1373       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1374       dk = rtmp[k];
1375       i = jl[k]; /* first row to be added to k_th row  */
1376 
1377       while (i < k){
1378         nexti = jl[i]; /* next row to be added to k_th row */
1379 
1380         /* compute multiplier, update diag(k) and U(i,k) */
1381         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1382         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1383         dk += uikdi*ba[ili];
1384         ba[ili] = uikdi; /* -U(i,k) */
1385 
1386         /* add multiple of row i to k-th row */
1387         jmin = ili + 1; jmax = bi[i+1];
1388         if (jmin < jmax){
1389           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1390           /* update il and jl for row i */
1391           il[i] = jmin;
1392           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1393         }
1394         i = nexti;
1395       }
1396 
1397       /* shift the diagonals when zero pivot is detected */
1398       /* compute rs=sum of abs(off-diagonal) */
1399       rs   = 0.0;
1400       jmin = bi[k]+1;
1401       nz   = bi[k+1] - jmin;
1402       bcol = bj + jmin;
1403       while (nz--){
1404         rs += PetscAbsScalar(rtmp[*bcol]);
1405         bcol++;
1406       }
1407 
1408       sctx.rs = rs;
1409       sctx.pv = dk;
1410       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1411       if (newshift == 1) break;
1412 
1413       /* copy data into U(k,:) */
1414       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1415       jmin = bi[k]+1; jmax = bi[k+1];
1416       if (jmin < jmax) {
1417         for (j=jmin; j<jmax; j++){
1418           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1419         }
1420         /* add the k-th row into il and jl */
1421         il[k] = jmin;
1422         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1423       }
1424     }
1425   } while (sctx.chshift);
1426   ierr = PetscFree(il);CHKERRQ(ierr);
1427 
1428   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1429   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1430   C->factor       = FACTOR_CHOLESKY;
1431   C->assembled    = PETSC_TRUE;
1432   C->preallocated = PETSC_TRUE;
1433   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1434   if (sctx.nshift){
1435     if (shiftnz) {
1436       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1437     } else if (shiftpd) {
1438       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1439     }
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1446 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1447 {
1448   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1449   Mat_SeqSBAIJ       *b;
1450   Mat                B;
1451   PetscErrorCode     ierr;
1452   PetscTruth         perm_identity;
1453   PetscInt           reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1454   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1455   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1456   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1457   PetscReal          fill=info->fill,levels=info->levels;
1458   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1459   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1460   PetscBT            lnkbt;
1461   IS                 iperm;
1462 
1463   PetscFunctionBegin;
1464   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1465   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1466 
1467   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1468   ui[0] = 0;
1469 
1470   /* ICC(0) without matrix ordering: simply copies fill pattern */
1471   if (!levels && perm_identity) {
1472     for (i=0; i<am; i++) {
1473       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1474     }
1475     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1476     cols = uj;
1477     for (i=0; i<am; i++) {
1478       aj    = a->j + a->diag[i];
1479       ncols = ui[i+1] - ui[i];
1480       for (j=0; j<ncols; j++) *cols++ = *aj++;
1481     }
1482   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1483     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1484     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1485 
1486     /* initialization */
1487     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1488 
1489     /* jl: linked list for storing indices of the pivot rows
1490        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1491     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1492     il         = jl + am;
1493     uj_ptr     = (PetscInt**)(il + am);
1494     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1495     for (i=0; i<am; i++){
1496       jl[i] = am; il[i] = 0;
1497     }
1498 
1499     /* create and initialize a linked list for storing column indices of the active row k */
1500     nlnk = am + 1;
1501     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1502 
1503     /* initial FreeSpace size is fill*(ai[am]+1) */
1504     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1505     current_space = free_space;
1506     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1507     current_space_lvl = free_space_lvl;
1508 
1509     for (k=0; k<am; k++){  /* for each active row k */
1510       /* initialize lnk by the column indices of row rip[k] of A */
1511       nzk   = 0;
1512       ncols = ai[rip[k]+1] - ai[rip[k]];
1513       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1514       ncols_upper = 0;
1515       for (j=0; j<ncols; j++){
1516         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1517         if (riip[i] >= k){ /* only take upper triangular entry */
1518           ajtmp[ncols_upper] = i;
1519           ncols_upper++;
1520         }
1521       }
1522       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1523       nzk += nlnk;
1524 
1525       /* update lnk by computing fill-in for each pivot row to be merged in */
1526       prow = jl[k]; /* 1st pivot row */
1527 
1528       while (prow < k){
1529         nextprow = jl[prow];
1530 
1531         /* merge prow into k-th row */
1532         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1533         jmax = ui[prow+1];
1534         ncols = jmax-jmin;
1535         i     = jmin - ui[prow];
1536         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1537         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1538         j     = *(uj - 1);
1539         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1540         nzk += nlnk;
1541 
1542         /* update il and jl for prow */
1543         if (jmin < jmax){
1544           il[prow] = jmin;
1545           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1546         }
1547         prow = nextprow;
1548       }
1549 
1550       /* if free space is not available, make more free space */
1551       if (current_space->local_remaining<nzk) {
1552         i = am - k + 1; /* num of unfactored rows */
1553         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1554         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1555         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1556         reallocs++;
1557       }
1558 
1559       /* copy data into free_space and free_space_lvl, then initialize lnk */
1560       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1561       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1562 
1563       /* add the k-th row into il and jl */
1564       if (nzk > 1){
1565         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1566         jl[k] = jl[i]; jl[i] = k;
1567         il[k] = ui[k] + 1;
1568       }
1569       uj_ptr[k]     = current_space->array;
1570       uj_lvl_ptr[k] = current_space_lvl->array;
1571 
1572       current_space->array           += nzk;
1573       current_space->local_used      += nzk;
1574       current_space->local_remaining -= nzk;
1575 
1576       current_space_lvl->array           += nzk;
1577       current_space_lvl->local_used      += nzk;
1578       current_space_lvl->local_remaining -= nzk;
1579 
1580       ui[k+1] = ui[k] + nzk;
1581     }
1582 
1583 #if defined(PETSC_USE_INFO)
1584     if (ai[am] != 0) {
1585       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1586       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1587       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1588       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1589     } else {
1590       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1591     }
1592 #endif
1593 
1594     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1595     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1596     ierr = PetscFree(jl);CHKERRQ(ierr);
1597     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1598 
1599     /* destroy list of free space and other temporary array(s) */
1600     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1601     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1602     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1603     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1604 
1605   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1606 
1607   /* put together the new matrix in MATSEQSBAIJ format */
1608   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1609   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1610   B = *fact;
1611   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1612   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1613 
1614   b    = (Mat_SeqSBAIJ*)B->data;
1615   b->singlemalloc = PETSC_FALSE;
1616   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1617   b->j    = uj;
1618   b->i    = ui;
1619   b->diag = 0;
1620   b->ilen = 0;
1621   b->imax = 0;
1622   b->row  = perm;
1623   b->col  = perm;
1624   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1625   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1626   b->icol = iperm;
1627   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1628   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1629   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1630   b->maxnz   = b->nz = ui[am];
1631   b->free_a  = PETSC_TRUE;
1632   b->free_ij = PETSC_TRUE;
1633 
1634   B->factor                 = FACTOR_CHOLESKY;
1635   B->info.factor_mallocs    = reallocs;
1636   B->info.fill_ratio_given  = fill;
1637   if (ai[am] != 0) {
1638     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1639   } else {
1640     B->info.fill_ratio_needed = 0.0;
1641   }
1642   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1643   if (perm_identity){
1644     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1645     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1652 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1653 {
1654   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1655   Mat_SeqSBAIJ       *b;
1656   Mat                B;
1657   PetscErrorCode     ierr;
1658   PetscTruth         perm_identity;
1659   PetscReal          fill = info->fill;
1660   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1661   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1662   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1663   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1664   PetscBT            lnkbt;
1665   IS                 iperm;
1666 
1667   PetscFunctionBegin;
1668   /* check whether perm is the identity mapping */
1669   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1670   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1671   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1672   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1673 
1674   /* initialization */
1675   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1676   ui[0] = 0;
1677 
1678   /* jl: linked list for storing indices of the pivot rows
1679      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1680   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1681   il     = jl + am;
1682   cols   = il + am;
1683   ui_ptr = (PetscInt**)(cols + am);
1684   for (i=0; i<am; i++){
1685     jl[i] = am; il[i] = 0;
1686   }
1687 
1688   /* create and initialize a linked list for storing column indices of the active row k */
1689   nlnk = am + 1;
1690   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1691 
1692   /* initial FreeSpace size is fill*(ai[am]+1) */
1693   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1694   current_space = free_space;
1695 
1696   for (k=0; k<am; k++){  /* for each active row k */
1697     /* initialize lnk by the column indices of row rip[k] of A */
1698     nzk   = 0;
1699     ncols = ai[rip[k]+1] - ai[rip[k]];
1700     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1701     ncols_upper = 0;
1702     for (j=0; j<ncols; j++){
1703       i = riip[*(aj + ai[rip[k]] + j)];
1704       if (i >= k){ /* only take upper triangular entry */
1705         cols[ncols_upper] = i;
1706         ncols_upper++;
1707       }
1708     }
1709     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1710     nzk += nlnk;
1711 
1712     /* update lnk by computing fill-in for each pivot row to be merged in */
1713     prow = jl[k]; /* 1st pivot row */
1714 
1715     while (prow < k){
1716       nextprow = jl[prow];
1717       /* merge prow into k-th row */
1718       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1719       jmax = ui[prow+1];
1720       ncols = jmax-jmin;
1721       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1722       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1723       nzk += nlnk;
1724 
1725       /* update il and jl for prow */
1726       if (jmin < jmax){
1727         il[prow] = jmin;
1728         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1729       }
1730       prow = nextprow;
1731     }
1732 
1733     /* if free space is not available, make more free space */
1734     if (current_space->local_remaining<nzk) {
1735       i = am - k + 1; /* num of unfactored rows */
1736       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1737       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1738       reallocs++;
1739     }
1740 
1741     /* copy data into free space, then initialize lnk */
1742     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1743 
1744     /* add the k-th row into il and jl */
1745     if (nzk-1 > 0){
1746       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1747       jl[k] = jl[i]; jl[i] = k;
1748       il[k] = ui[k] + 1;
1749     }
1750     ui_ptr[k] = current_space->array;
1751     current_space->array           += nzk;
1752     current_space->local_used      += nzk;
1753     current_space->local_remaining -= nzk;
1754 
1755     ui[k+1] = ui[k] + nzk;
1756   }
1757 
1758 #if defined(PETSC_USE_INFO)
1759   if (ai[am] != 0) {
1760     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1761     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1762     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1763     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1764   } else {
1765      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1766   }
1767 #endif
1768 
1769   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1770   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1771   ierr = PetscFree(jl);CHKERRQ(ierr);
1772 
1773   /* destroy list of free space and other temporary array(s) */
1774   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1775   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1776   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1777 
1778   /* put together the new matrix in MATSEQSBAIJ format */
1779   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1780   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1781   B    = *fact;
1782   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1783   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1784 
1785   b = (Mat_SeqSBAIJ*)B->data;
1786   b->singlemalloc = PETSC_FALSE;
1787   b->free_a       = PETSC_TRUE;
1788   b->free_ij      = PETSC_TRUE;
1789   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1790   b->j    = uj;
1791   b->i    = ui;
1792   b->diag = 0;
1793   b->ilen = 0;
1794   b->imax = 0;
1795   b->row  = perm;
1796   b->col  = perm;
1797   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1798   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1799   b->icol = iperm;
1800   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1801   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1802   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1803   b->maxnz = b->nz = ui[am];
1804 
1805   B->factor                 = FACTOR_CHOLESKY;
1806   B->info.factor_mallocs    = reallocs;
1807   B->info.fill_ratio_given  = fill;
1808   if (ai[am] != 0) {
1809     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1810   } else {
1811     B->info.fill_ratio_needed = 0.0;
1812   }
1813   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1814   if (perm_identity){
1815     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1816     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820