xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 137fb5114f49918a42483a727df6eef8df74e8e4)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/inline/dot.h"
5 #include "src/inline/spops.h"
6 #include "petscbt.h"
7 #include "src/mat/utils/freespace.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12 {
13   PetscFunctionBegin;
14 
15   SETERRQ(PETSC_ERR_SUP,"Code not written");
16 #if !defined(PETSC_USE_DEBUG)
17   PetscFunctionReturn(0);
18 #endif
19 }
20 
21 
22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ------------------------------------------------------------
50 */
51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52 {
53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
54   PetscFunctionBegin;
55   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56   You can obtain the drop tolerance routines by installing PETSc from\n\
57   www.mcs.anl.gov/petsc\n");
58 #else
59   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
60   IS             iscolf,isicol,isirow;
61   PetscTruth     reorder;
62   PetscErrorCode ierr,sierr;
63   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
64   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65   PetscInt       *ordcol,*iwk,*iperm,*jw;
66   PetscInt       jmax,lfill,job,*o_i,*o_j;
67   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68   PetscReal      af;
69 
70   PetscFunctionBegin;
71 
72   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
73   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
75   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
76   lfill   = (PetscInt)(info->dtcount/2.0);
77   jmax    = (PetscInt)(info->fill*a->nz);
78 
79 
80   /* ------------------------------------------------------------
81      If reorder=.TRUE., then the original matrix has to be
82      reordered to reflect the user selected ordering scheme, and
83      then de-reordered so it is in it's original format.
84      Because Saad's dperm() is NOT in place, we have to copy
85      the original matrix and allocate more storage. . .
86      ------------------------------------------------------------
87   */
88 
89   /* set reorder to true if either isrow or iscol is not identity */
90   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
91   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
92   reorder = PetscNot(reorder);
93 
94 
95   /* storage for ilu factor */
96   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr);
97   ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr);
98   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
99   ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr);
100 
101   /* ------------------------------------------------------------
102      Make sure that everything is Fortran formatted (1-Based)
103      ------------------------------------------------------------
104   */
105   for (i=old_i[0];i<old_i[n];i++) {
106     old_j[i]++;
107   }
108   for(i=0;i<n+1;i++) {
109     old_i[i]++;
110   };
111 
112 
113   if (reorder) {
114     ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
115     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
116     for(i=0;i<n;i++) {
117       r[i]  = r[i]+1;
118       c[i]  = c[i]+1;
119     }
120     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr);
121     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr);
122     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
123     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124     for (i=0;i<n;i++) {
125       r[i]  = r[i]-1;
126       c[i]  = c[i]-1;
127     }
128     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
130     o_a = old_a2;
131     o_j = old_j2;
132     o_i = old_i2;
133   } else {
134     o_a = old_a;
135     o_j = old_j;
136     o_i = old_i;
137   }
138 
139   /* ------------------------------------------------------------
140      Call Saad's ilutp() routine to generate the factorization
141      ------------------------------------------------------------
142   */
143 
144   ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr);
145   ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr);
146   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
147 
148   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149   if (sierr) {
150     switch (sierr) {
151       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157     }
158   }
159 
160   ierr = PetscFree(w);CHKERRQ(ierr);
161   ierr = PetscFree(jw);CHKERRQ(ierr);
162 
163   /* ------------------------------------------------------------
164      Saad's routine gives the result in Modified Sparse Row (msr)
165      Convert to Compressed Sparse Row format (csr)
166      ------------------------------------------------------------
167   */
168 
169   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
170   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr);
171 
172   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
173 
174   ierr = PetscFree(iwk);CHKERRQ(ierr);
175   ierr = PetscFree(wk);CHKERRQ(ierr);
176 
177   if (reorder) {
178     ierr = PetscFree(old_a2);CHKERRQ(ierr);
179     ierr = PetscFree(old_j2);CHKERRQ(ierr);
180     ierr = PetscFree(old_i2);CHKERRQ(ierr);
181   } else {
182     /* fix permutation of old_j that the factorization introduced */
183     for (i=old_i[0]; i<old_i[n]; i++) {
184       old_j[i-1] = iperm[old_j[i-1]-1];
185     }
186   }
187 
188   /* get rid of the shift to indices starting at 1 */
189   for (i=0; i<n+1; i++) {
190     old_i[i]--;
191   }
192   for (i=old_i[0];i<old_i[n];i++) {
193     old_j[i]--;
194   }
195 
196   /* Make the factored matrix 0-based */
197   for (i=0; i<n+1; i++) {
198     new_i[i]--;
199   }
200   for (i=new_i[0];i<new_i[n];i++) {
201     new_j[i]--;
202   }
203 
204   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205   /*-- permute the right-hand-side and solution vectors           --*/
206   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
207   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
208   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
209   for(i=0; i<n; i++) {
210     ordcol[i] = ic[iperm[i]-1];
211   };
212   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
213   ierr = ISDestroy(isicol);CHKERRQ(ierr);
214 
215   ierr = PetscFree(iperm);CHKERRQ(ierr);
216 
217   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
218   ierr = PetscFree(ordcol);CHKERRQ(ierr);
219 
220   /*----- put together the new matrix -----*/
221 
222   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
223   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
224   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
225   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
226   (*fact)->factor    = FACTOR_LU;
227   (*fact)->assembled = PETSC_TRUE;
228 
229   b = (Mat_SeqAIJ*)(*fact)->data;
230   b->free_a        = PETSC_TRUE;
231   b->free_ij       = PETSC_TRUE;
232   b->sorted        = PETSC_FALSE;
233   b->singlemalloc  = PETSC_FALSE;
234   b->a             = new_a;
235   b->j             = new_j;
236   b->i             = new_i;
237   b->ilen          = 0;
238   b->imax          = 0;
239   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240   b->row           = isirow;
241   b->col           = iscolf;
242   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
243   b->maxnz = b->nz = new_i[n];
244   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
245   (*fact)->info.factor_mallocs = 0;
246 
247   af = ((double)b->nz)/((double)a->nz) + .001;
248   ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr);
249   ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
250   ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
251   ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
252 
253   ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
254 
255   PetscFunctionReturn(0);
256 #endif
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
262 {
263   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
264   IS                 isicol;
265   PetscErrorCode     ierr;
266   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
267   PetscInt           *bi,*bj,*ajtmp;
268   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
269   PetscReal          f;
270   PetscInt           nlnk,*lnk,k,**bi_ptr;
271   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
272   PetscBT            lnkbt;
273 
274   PetscFunctionBegin;
275   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
278   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
279 
280   /* get new row pointers */
281   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
282   bi[0] = 0;
283 
284   /* bdiag is location of diagonal in factor */
285   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
286   bdiag[0] = 0;
287 
288   /* linked list for storing column indices of the active row */
289   nlnk = n + 1;
290   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
291 
292   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
293 
294   /* initial FreeSpace size is f*(ai[n]+1) */
295   f = info->fill;
296   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
297   current_space = free_space;
298 
299   for (i=0; i<n; i++) {
300     /* copy previous fill into linked list */
301     nzi = 0;
302     nnz = ai[r[i]+1] - ai[r[i]];
303     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
304     ajtmp = aj + ai[r[i]];
305     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
306     nzi += nlnk;
307 
308     /* add pivot rows into linked list */
309     row = lnk[n];
310     while (row < i) {
311       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
312       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
313       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
314       nzi += nlnk;
315       row  = lnk[row];
316     }
317     bi[i+1] = bi[i] + nzi;
318     im[i]   = nzi;
319 
320     /* mark bdiag */
321     nzbd = 0;
322     nnz  = nzi;
323     k    = lnk[n];
324     while (nnz-- && k < i){
325       nzbd++;
326       k = lnk[k];
327     }
328     bdiag[i] = bi[i] + nzbd;
329 
330     /* if free space is not available, make more free space */
331     if (current_space->local_remaining<nzi) {
332       nnz = (n - i)*nzi; /* estimated and max additional space needed */
333       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
334       reallocs++;
335     }
336 
337     /* copy data into free space, then initialize lnk */
338     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
339     bi_ptr[i] = current_space->array;
340     current_space->array           += nzi;
341     current_space->local_used      += nzi;
342     current_space->local_remaining -= nzi;
343   }
344 #if defined(PETSC_USE_INFO)
345   if (ai[n] != 0) {
346     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
347     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
348     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
349     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
350     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
351   } else {
352     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
353   }
354 #endif
355 
356   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
357   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
358 
359   /* destroy list of free space and other temporary array(s) */
360   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
361   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
362   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
363   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
364 
365   /* put together the new matrix */
366   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
367   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
368   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
369   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
370   ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr);
371   b    = (Mat_SeqAIJ*)(*B)->data;
372   b->free_a       = PETSC_TRUE;
373   b->free_ij      = PETSC_TRUE;
374   b->singlemalloc = PETSC_FALSE;
375   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
376   b->j          = bj;
377   b->i          = bi;
378   b->diag       = bdiag;
379   b->ilen       = 0;
380   b->imax       = 0;
381   b->row        = isrow;
382   b->col        = iscol;
383   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
384   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
385   b->icol       = isicol;
386   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
387 
388   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
389   ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
390   b->maxnz = b->nz = bi[n] ;
391 
392   (*B)->factor                 =  FACTOR_LU;
393   (*B)->info.factor_mallocs    = reallocs;
394   (*B)->info.fill_ratio_given  = f;
395 
396   if (ai[n] != 0) {
397     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
398   } else {
399     (*B)->info.fill_ratio_needed = 0.0;
400   }
401   ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr);
402   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
403   PetscFunctionReturn(0);
404 }
405 
406 /*
407     Trouble in factorization, should we dump the original matrix?
408 */
409 #undef __FUNCT__
410 #define __FUNCT__ "MatFactorDumpMatrix"
411 PetscErrorCode MatFactorDumpMatrix(Mat A)
412 {
413   PetscErrorCode ierr;
414   PetscTruth     flg;
415 
416   PetscFunctionBegin;
417   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr);
418   if (flg) {
419     PetscViewer viewer;
420     char        filename[PETSC_MAX_PATH_LEN];
421 
422     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
423     ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
424     ierr = MatView(A,viewer);CHKERRQ(ierr);
425     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 /* ----------------------------------------------------------- */
431 #undef __FUNCT__
432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
434 {
435   Mat            C=*B;
436   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
437   IS             isrow = b->row,isicol = b->icol;
438   PetscErrorCode ierr;
439   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
440   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
441   PetscInt       *diag_offset = b->diag,diag,*pj;
442   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443   PetscScalar    d;
444   PetscReal      rs;
445   LUShift_Ctx    sctx;
446   PetscInt       newshift,*ddiag;
447 
448   PetscFunctionBegin;
449   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
450   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
451   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
452   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
453   rtmps = rtmp; ics = ic;
454 
455   sctx.shift_top  = 0;
456   sctx.nshift_max = 0;
457   sctx.shift_lo   = 0;
458   sctx.shift_hi   = 0;
459 
460   /* if both shift schemes are chosen by user, only use info->shiftpd */
461   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
462   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
463     PetscInt *aai = a->i;
464     ddiag          = a->diag;
465     sctx.shift_top = 0;
466     for (i=0; i<n; i++) {
467       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
468       d  = (a->a)[ddiag[i]];
469       rs = -PetscAbsScalar(d) - PetscRealPart(d);
470       v  = a->a+aai[i];
471       nz = aai[i+1] - aai[i];
472       for (j=0; j<nz; j++)
473 	rs += PetscAbsScalar(v[j]);
474       if (rs>sctx.shift_top) sctx.shift_top = rs;
475     }
476     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
477     sctx.shift_top    *= 1.1;
478     sctx.nshift_max   = 5;
479     sctx.shift_lo     = 0.;
480     sctx.shift_hi     = 1.;
481   }
482 
483   sctx.shift_amount = 0;
484   sctx.nshift       = 0;
485   do {
486     sctx.lushift = PETSC_FALSE;
487     for (i=0; i<n; i++){
488       nz    = bi[i+1] - bi[i];
489       bjtmp = bj + bi[i];
490       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
491 
492       /* load in initial (unfactored row) */
493       nz    = a->i[r[i]+1] - a->i[r[i]];
494       ajtmp = a->j + a->i[r[i]];
495       v     = a->a + a->i[r[i]];
496       for (j=0; j<nz; j++) {
497         rtmp[ics[ajtmp[j]]] = v[j];
498       }
499       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
500 
501       row = *bjtmp++;
502       while  (row < i) {
503         pc = rtmp + row;
504         if (*pc != 0.0) {
505           pv         = b->a + diag_offset[row];
506           pj         = b->j + diag_offset[row] + 1;
507           multiplier = *pc / *pv++;
508           *pc        = multiplier;
509           nz         = bi[row+1] - diag_offset[row] - 1;
510           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
511           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
512         }
513         row = *bjtmp++;
514       }
515       /* finished row so stick it into b->a */
516       pv   = b->a + bi[i] ;
517       pj   = b->j + bi[i] ;
518       nz   = bi[i+1] - bi[i];
519       diag = diag_offset[i] - bi[i];
520       rs   = 0.0;
521       for (j=0; j<nz; j++) {
522         pv[j] = rtmps[pj[j]];
523         if (j != diag) rs += PetscAbsScalar(pv[j]);
524       }
525 
526       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
527       sctx.rs  = rs;
528       sctx.pv  = pv[diag];
529       ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr);
530       if (newshift == 1) break;
531     }
532 
533     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
534       /*
535        * if no shift in this attempt & shifting & started shifting & can refine,
536        * then try lower shift
537        */
538       sctx.shift_hi        = info->shift_fraction;
539       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
540       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
541       sctx.lushift         = PETSC_TRUE;
542       sctx.nshift++;
543     }
544   } while (sctx.lushift);
545 
546   /* invert diagonal entries for simplier triangular solves */
547   for (i=0; i<n; i++) {
548     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
549   }
550 
551   ierr = PetscFree(rtmp);CHKERRQ(ierr);
552   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
553   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
554   C->factor = FACTOR_LU;
555   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
556   C->assembled = PETSC_TRUE;
557   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
558   if (sctx.nshift){
559     if (info->shiftnz) {
560       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
561     } else if (info->shiftpd) {
562       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 /*
569    This routine implements inplace ILU(0) with row or/and column permutations.
570    Input:
571      A - original matrix
572    Output;
573      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
574          a->j (col index) is permuted by the inverse of colperm, then sorted
575          a->a reordered accordingly with a->j
576          a->diag (ptr to diagonal elements) is updated.
577 */
578 #undef __FUNCT__
579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
581 {
582   Mat            C=*B;
583   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
584   IS             isrow = b->row,isicol = b->icol;
585   PetscErrorCode ierr;
586   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
587   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
588   PetscInt       *diag_offset = b->diag,diag,*pj;
589   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
590   PetscScalar    d;
591   PetscReal      rs;
592   LUShift_Ctx    sctx;
593   PetscInt       newshift,*ddiag;
594 
595   PetscFunctionBegin;
596   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
597   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
598   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
599   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
600   rtmps = rtmp; ics = ic;
601 
602   sctx.shift_top  = 0;
603   sctx.nshift_max = 0;
604   sctx.shift_lo   = 0;
605   sctx.shift_hi   = 0;
606 
607   /* if both shift schemes are chosen by user, only use info->shiftpd */
608   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
609   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
610     PetscInt *aai = a->i;
611     ddiag          = a->diag;
612     sctx.shift_top = 0;
613     for (i=0; i<n; i++) {
614       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
615       d  = (a->a)[ddiag[i]];
616       rs = -PetscAbsScalar(d) - PetscRealPart(d);
617       v  = a->a+aai[i];
618       nz = aai[i+1] - aai[i];
619       for (j=0; j<nz; j++)
620 	rs += PetscAbsScalar(v[j]);
621       if (rs>sctx.shift_top) sctx.shift_top = rs;
622     }
623     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
624     sctx.shift_top    *= 1.1;
625     sctx.nshift_max   = 5;
626     sctx.shift_lo     = 0.;
627     sctx.shift_hi     = 1.;
628   }
629 
630   sctx.shift_amount = 0;
631   sctx.nshift       = 0;
632   do {
633     sctx.lushift = PETSC_FALSE;
634     for (i=0; i<n; i++){
635       nz    = bi[r[i]+1] - bi[r[i]];
636       bjtmp = bj + bi[r[i]];
637       for  (j=0; j<nz; j++) rtmps[ics[bjtmp[j]]] = 0.0; /* intializing sorted column */
638 
639       /* load in initial (unfactored row) */
640       nz    = a->i[r[i]+1] - a->i[r[i]];
641       ajtmp = a->j + a->i[r[i]];
642       v     = a->a + a->i[r[i]];
643       /* sort permuted ajtmp and values v accordingly */
644       for (j=0; j<nz; j++) {
645         ajtmp[j] = ics[ajtmp[j]];
646       }
647       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
648 
649       diag_offset[r[i]] = bi[r[i]];
650       for (j=0; j<nz; j++) {
651         rtmp[ajtmp[j]] = v[j];
652         if (ajtmp[j] < i) diag_offset[r[i]]++; /* update diag_offset */
653       }
654       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
655 
656       row = *bjtmp++;
657       while  (row < i) {
658         pc = rtmp + row;
659         if (*pc != 0.0) {
660           pv         = b->a + diag_offset[r[row]];
661           pj         = b->j + diag_offset[r[row]] + 1;
662 
663           multiplier = *pc / *pv++;
664           *pc        = multiplier;
665           nz         = bi[r[row]+1] - diag_offset[r[row]] - 1;
666           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
667           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
668         }
669         row = *bjtmp++;
670       }
671       /* finished row so stick it into b->a */
672       pv   = b->a + bi[r[i]] ;
673       pj   = b->j + bi[r[i]] ;
674       nz   = bi[r[i]+1] - bi[r[i]];
675       diag = diag_offset[r[i]] - bi[r[i]];
676 
677       rs   = 0.0;
678       for (j=0; j<nz; j++) {
679         pv[j] = rtmps[pj[j]];
680         if (j != diag) rs += PetscAbsScalar(pv[j]);
681       }
682 
683       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
684       sctx.rs  = rs;
685       sctx.pv  = pv[diag];
686       ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr);
687       if (newshift == 1) break;
688     }
689 
690     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
691       /*
692        * if no shift in this attempt & shifting & started shifting & can refine,
693        * then try lower shift
694        */
695       sctx.shift_hi        = info->shift_fraction;
696       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
697       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
698       sctx.lushift         = PETSC_TRUE;
699       sctx.nshift++;
700     }
701   } while (sctx.lushift);
702 
703   /* invert diagonal entries for simplier triangular solves */
704   for (i=0; i<n; i++) {
705     b->a[diag_offset[r[i]]] = 1.0/b->a[diag_offset[r[i]]];
706   }
707 
708   ierr = PetscFree(rtmp);CHKERRQ(ierr);
709   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
710   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
711   C->factor = FACTOR_LU;
712   (*B)->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
713   C->assembled = PETSC_TRUE;
714   ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr);
715   if (sctx.nshift){
716     if (info->shiftnz) {
717       ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
718     } else if (info->shiftpd) {
719       ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr);
720     }
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatUsePETSc_SeqAIJ"
727 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
728 {
729   PetscFunctionBegin;
730   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
731   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
732   PetscFunctionReturn(0);
733 }
734 
735 
736 /* ----------------------------------------------------------- */
737 #undef __FUNCT__
738 #define __FUNCT__ "MatLUFactor_SeqAIJ"
739 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
740 {
741   PetscErrorCode ierr;
742   Mat            C;
743 
744   PetscFunctionBegin;
745   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
746   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
747   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
748   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 /* ----------------------------------------------------------- */
752 #undef __FUNCT__
753 #define __FUNCT__ "MatSolve_SeqAIJ"
754 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
755 {
756   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
757   IS             iscol = a->col,isrow = a->row;
758   PetscErrorCode ierr;
759   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
760   PetscInt       nz,*rout,*cout;
761   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
762 
763   PetscFunctionBegin;
764   if (!n) PetscFunctionReturn(0);
765 
766   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
767   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
768   tmp  = a->solve_work;
769 
770   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
771   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
772 
773   /* forward solve the lower triangular */
774   tmp[0] = b[*r++];
775   tmps   = tmp;
776   for (i=1; i<n; i++) {
777     v   = aa + ai[i] ;
778     vi  = aj + ai[i] ;
779     nz  = a->diag[i] - ai[i];
780     sum = b[*r++];
781     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
782     tmp[i] = sum;
783   }
784 
785   /* backward solve the upper triangular */
786   for (i=n-1; i>=0; i--){
787     v   = aa + a->diag[i] + 1;
788     vi  = aj + a->diag[i] + 1;
789     nz  = ai[i+1] - a->diag[i] - 1;
790     sum = tmp[i];
791     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
792     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
793   }
794 
795   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
796   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
797   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
798   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
799   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatMatSolve_SeqAIJ"
805 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
806 {
807   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
808   IS             iscol = a->col,isrow = a->row;
809   PetscErrorCode ierr;
810   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
811   PetscInt       nz,*rout,*cout,neq;
812   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
813 
814   PetscFunctionBegin;
815   if (!n) PetscFunctionReturn(0);
816 
817   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
818   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
819 
820   tmp  = a->solve_work;
821   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
822   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
823 
824   for (neq=0; neq<n; neq++){
825     /* forward solve the lower triangular */
826     tmp[0] = b[r[0]];
827     tmps   = tmp;
828     for (i=1; i<n; i++) {
829       v   = aa + ai[i] ;
830       vi  = aj + ai[i] ;
831       nz  = a->diag[i] - ai[i];
832       sum = b[r[i]];
833       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
834       tmp[i] = sum;
835     }
836     /* backward solve the upper triangular */
837     for (i=n-1; i>=0; i--){
838       v   = aa + a->diag[i] + 1;
839       vi  = aj + a->diag[i] + 1;
840       nz  = ai[i+1] - a->diag[i] - 1;
841       sum = tmp[i];
842       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
843       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
844     }
845 
846     b += n;
847     x += n;
848   }
849   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
850   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
851   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
852   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
853   ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
859 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
860 {
861   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
862   IS             iscol = a->col,isrow = a->row;
863   PetscErrorCode ierr;
864   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
865   PetscInt       nz,*rout,*cout,row;
866   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
867 
868   PetscFunctionBegin;
869   if (!n) PetscFunctionReturn(0);
870 
871   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
872   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
873   tmp  = a->solve_work;
874 
875   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
876   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
877 
878   /* forward solve the lower triangular */
879   tmp[0] = b[*r++];
880   tmps   = tmp;
881   for (row=1; row<n; row++) {
882     i   = rout[row]; /* permuted row */
883     v   = aa + ai[i] ;
884     vi  = aj + ai[i] ;
885     nz  = a->diag[i] - ai[i];
886     sum = b[*r++];
887     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
888     tmp[row] = sum;
889   }
890 
891   /* backward solve the upper triangular */
892   for (row=n-1; row>=0; row--){
893     i   = rout[row]; /* permuted row */
894     v   = aa + a->diag[i] + 1;
895     vi  = aj + a->diag[i] + 1;
896     nz  = ai[i+1] - a->diag[i] - 1;
897     sum = tmp[row];
898     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
899     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
900   }
901 
902   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
903   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
904   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
905   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
906   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 /* ----------------------------------------------------------- */
911 #undef __FUNCT__
912 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
913 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
914 {
915   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
916   PetscErrorCode ierr;
917   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
918   PetscScalar    *x,*b,*aa = a->a;
919 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
920   PetscInt       adiag_i,i,*vi,nz,ai_i;
921   PetscScalar    *v,sum;
922 #endif
923 
924   PetscFunctionBegin;
925   if (!n) PetscFunctionReturn(0);
926 
927   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
928   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
929 
930 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
931   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
932 #else
933   /* forward solve the lower triangular */
934   x[0] = b[0];
935   for (i=1; i<n; i++) {
936     ai_i = ai[i];
937     v    = aa + ai_i;
938     vi   = aj + ai_i;
939     nz   = adiag[i] - ai_i;
940     sum  = b[i];
941     while (nz--) sum -= *v++ * x[*vi++];
942     x[i] = sum;
943   }
944 
945   /* backward solve the upper triangular */
946   for (i=n-1; i>=0; i--){
947     adiag_i = adiag[i];
948     v       = aa + adiag_i + 1;
949     vi      = aj + adiag_i + 1;
950     nz      = ai[i+1] - adiag_i - 1;
951     sum     = x[i];
952     while (nz--) sum -= *v++ * x[*vi++];
953     x[i]    = sum*aa[adiag_i];
954   }
955 #endif
956   ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr);
957   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
958   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
964 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
965 {
966   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
967   IS             iscol = a->col,isrow = a->row;
968   PetscErrorCode ierr;
969   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
970   PetscInt       nz,*rout,*cout;
971   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;
972 
973   PetscFunctionBegin;
974   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
975 
976   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
977   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
978   tmp  = a->solve_work;
979 
980   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
981   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
982 
983   /* forward solve the lower triangular */
984   tmp[0] = b[*r++];
985   for (i=1; i<n; i++) {
986     v   = aa + ai[i] ;
987     vi  = aj + ai[i] ;
988     nz  = a->diag[i] - ai[i];
989     sum = b[*r++];
990     while (nz--) sum -= *v++ * tmp[*vi++ ];
991     tmp[i] = sum;
992   }
993 
994   /* backward solve the upper triangular */
995   for (i=n-1; i>=0; i--){
996     v   = aa + a->diag[i] + 1;
997     vi  = aj + a->diag[i] + 1;
998     nz  = ai[i+1] - a->diag[i] - 1;
999     sum = tmp[i];
1000     while (nz--) sum -= *v++ * tmp[*vi++ ];
1001     tmp[i] = sum*aa[a->diag[i]];
1002     x[*c--] += tmp[i];
1003   }
1004 
1005   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1006   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1007   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1008   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1009   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1010 
1011   PetscFunctionReturn(0);
1012 }
1013 /* -------------------------------------------------------------------*/
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1016 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1017 {
1018   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1019   IS             iscol = a->col,isrow = a->row;
1020   PetscErrorCode ierr;
1021   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1022   PetscInt       nz,*rout,*cout,*diag = a->diag;
1023   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;
1024 
1025   PetscFunctionBegin;
1026   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1027   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1028   tmp  = a->solve_work;
1029 
1030   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1031   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1032 
1033   /* copy the b into temp work space according to permutation */
1034   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1035 
1036   /* forward solve the U^T */
1037   for (i=0; i<n; i++) {
1038     v   = aa + diag[i] ;
1039     vi  = aj + diag[i] + 1;
1040     nz  = ai[i+1] - diag[i] - 1;
1041     s1  = tmp[i];
1042     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1043     while (nz--) {
1044       tmp[*vi++ ] -= (*v++)*s1;
1045     }
1046     tmp[i] = s1;
1047   }
1048 
1049   /* backward solve the L^T */
1050   for (i=n-1; i>=0; i--){
1051     v   = aa + diag[i] - 1 ;
1052     vi  = aj + diag[i] - 1 ;
1053     nz  = diag[i] - ai[i];
1054     s1  = tmp[i];
1055     while (nz--) {
1056       tmp[*vi-- ] -= (*v--)*s1;
1057     }
1058   }
1059 
1060   /* copy tmp into x according to permutation */
1061   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1062 
1063   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1064   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1065   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1066   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1067 
1068   ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1074 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1075 {
1076   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1077   IS             iscol = a->col,isrow = a->row;
1078   PetscErrorCode ierr;
1079   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1080   PetscInt       nz,*rout,*cout,*diag = a->diag;
1081   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;
1082 
1083   PetscFunctionBegin;
1084   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1085 
1086   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1087   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1088   tmp = a->solve_work;
1089 
1090   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1091   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1092 
1093   /* copy the b into temp work space according to permutation */
1094   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1095 
1096   /* forward solve the U^T */
1097   for (i=0; i<n; i++) {
1098     v   = aa + diag[i] ;
1099     vi  = aj + diag[i] + 1;
1100     nz  = ai[i+1] - diag[i] - 1;
1101     tmp[i] *= *v++;
1102     while (nz--) {
1103       tmp[*vi++ ] -= (*v++)*tmp[i];
1104     }
1105   }
1106 
1107   /* backward solve the L^T */
1108   for (i=n-1; i>=0; i--){
1109     v   = aa + diag[i] - 1 ;
1110     vi  = aj + diag[i] - 1 ;
1111     nz  = diag[i] - ai[i];
1112     while (nz--) {
1113       tmp[*vi-- ] -= (*v--)*tmp[i];
1114     }
1115   }
1116 
1117   /* copy tmp into x according to permutation */
1118   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1119 
1120   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1122   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1123   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1124 
1125   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 /* ----------------------------------------------------------------*/
1129 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*);
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1133 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1134 {
1135   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1136   IS                 isicol;
1137   PetscErrorCode     ierr;
1138   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1139   PetscInt           *bi,*cols,nnz,*cols_lvl;
1140   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1141   PetscInt           i,levels,diagonal_fill;
1142   PetscTruth         col_identity,row_identity;
1143   PetscReal          f;
1144   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1145   PetscBT            lnkbt;
1146   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1147   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1148   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1149   PetscTruth         missing;
1150 
1151   PetscFunctionBegin;
1152   f             = info->fill;
1153   levels        = (PetscInt)info->levels;
1154   diagonal_fill = (PetscInt)info->diagonal_fill;
1155   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1156 
1157   /* special case that simply copies fill pattern */
1158   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1159   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1160   if (!levels && row_identity && col_identity) {
1161     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
1162     (*fact)->factor                 = FACTOR_LU;
1163     (*fact)->info.factor_mallocs    = 0;
1164     (*fact)->info.fill_ratio_given  = info->fill;
1165     (*fact)->info.fill_ratio_needed = 1.0;
1166     b               = (Mat_SeqAIJ*)(*fact)->data;
1167     ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr);
1168     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1169     b->row              = isrow;
1170     b->col              = iscol;
1171     b->icol             = isicol;
1172     ierr                = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1173     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1174     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1175     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1176     PetscFunctionReturn(0);
1177   }
1178 
1179   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1180   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1181 
1182   /* get new row pointers */
1183   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1184   bi[0] = 0;
1185   /* bdiag is location of diagonal in factor */
1186   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1187   bdiag[0]  = 0;
1188 
1189   ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr);
1190   bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1191 
1192   /* create a linked list for storing column indices of the active row */
1193   nlnk = n + 1;
1194   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1195 
1196   /* initial FreeSpace size is f*(ai[n]+1) */
1197   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1198   current_space = free_space;
1199   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1200   current_space_lvl = free_space_lvl;
1201 
1202   for (i=0; i<n; i++) {
1203     nzi = 0;
1204     /* copy current row into linked list */
1205     nnz  = ai[r[i]+1] - ai[r[i]];
1206     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1207     cols = aj + ai[r[i]];
1208     lnk[i] = -1; /* marker to indicate if diagonal exists */
1209     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1210     nzi += nlnk;
1211 
1212     /* make sure diagonal entry is included */
1213     if (diagonal_fill && lnk[i] == -1) {
1214       fm = n;
1215       while (lnk[fm] < i) fm = lnk[fm];
1216       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1217       lnk[fm]    = i;
1218       lnk_lvl[i] = 0;
1219       nzi++; dcount++;
1220     }
1221 
1222     /* add pivot rows into the active row */
1223     nzbd = 0;
1224     prow = lnk[n];
1225     while (prow < i) {
1226       nnz      = bdiag[prow];
1227       cols     = bj_ptr[prow] + nnz + 1;
1228       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1229       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1230       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1231       nzi += nlnk;
1232       prow = lnk[prow];
1233       nzbd++;
1234     }
1235     bdiag[i] = nzbd;
1236     bi[i+1]  = bi[i] + nzi;
1237 
1238     /* if free space is not available, make more free space */
1239     if (current_space->local_remaining<nzi) {
1240       nnz = nzi*(n - i); /* estimated and max additional space needed */
1241       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1242       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1243       reallocs++;
1244     }
1245 
1246     /* copy data into free_space and free_space_lvl, then initialize lnk */
1247     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1248     bj_ptr[i]    = current_space->array;
1249     bjlvl_ptr[i] = current_space_lvl->array;
1250 
1251     /* make sure the active row i has diagonal entry */
1252     if (*(bj_ptr[i]+bdiag[i]) != i) {
1253       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1254     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1255     }
1256 
1257     current_space->array           += nzi;
1258     current_space->local_used      += nzi;
1259     current_space->local_remaining -= nzi;
1260     current_space_lvl->array           += nzi;
1261     current_space_lvl->local_used      += nzi;
1262     current_space_lvl->local_remaining -= nzi;
1263   }
1264 
1265   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1266   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1267 
1268   /* destroy list of free space and other temporary arrays */
1269   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1270   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
1271   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1272   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1273   ierr = PetscFree(bj_ptr);CHKERRQ(ierr);
1274 
1275 #if defined(PETSC_USE_INFO)
1276   {
1277     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1278     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1279     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1280     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1281     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1282     if (diagonal_fill) {
1283       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1284     }
1285   }
1286 #endif
1287 
1288   /* put together the new matrix */
1289   ierr = MatCreate(A->comm,fact);CHKERRQ(ierr);
1290   ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr);
1291   ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr);
1292   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1293   ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr);
1294   b = (Mat_SeqAIJ*)(*fact)->data;
1295   b->free_a       = PETSC_TRUE;
1296   b->free_ij      = PETSC_TRUE;
1297   b->singlemalloc = PETSC_FALSE;
1298   len = (bi[n] )*sizeof(PetscScalar);
1299   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1300   b->j          = bj;
1301   b->i          = bi;
1302   for (i=0; i<n; i++) bdiag[i] += bi[i];
1303   b->diag       = bdiag;
1304   b->ilen       = 0;
1305   b->imax       = 0;
1306   b->row        = isrow;
1307   b->col        = iscol;
1308   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1309   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1310   b->icol       = isicol;
1311   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1312   /* In b structure:  Free imax, ilen, old a, old j.
1313      Allocate bdiag, solve_work, new a, new j */
1314   ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1315   b->maxnz             = b->nz = bi[n] ;
1316   (*fact)->factor = FACTOR_LU;
1317   (*fact)->info.factor_mallocs    = reallocs;
1318   (*fact)->info.fill_ratio_given  = f;
1319   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1320 
1321   ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr);
1322   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1323 
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #include "src/mat/impls/sbaij/seq/sbaij.h"
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1330 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1331 {
1332   Mat            C = *B;
1333   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1334   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1335   IS             ip=b->row,iip = b->icol;
1336   PetscErrorCode ierr;
1337   PetscInt       *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1338   PetscInt       *ai=a->i,*aj=a->j;
1339   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1340   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1341   PetscReal      zeropivot,rs,shiftnz;
1342   PetscReal      shiftpd;
1343   ChShift_Ctx    sctx;
1344   PetscInt       newshift;
1345 
1346   PetscFunctionBegin;
1347   shiftnz   = info->shiftnz;
1348   shiftpd   = info->shiftpd;
1349   zeropivot = info->zeropivot;
1350 
1351   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1352   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
1353 
1354   /* initialization */
1355   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1356   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1357   jl   = il + mbs;
1358   rtmp = (MatScalar*)(jl + mbs);
1359 
1360   sctx.shift_amount = 0;
1361   sctx.nshift       = 0;
1362   do {
1363     sctx.chshift = PETSC_FALSE;
1364     for (i=0; i<mbs; i++) {
1365       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1366     }
1367 
1368     for (k = 0; k<mbs; k++){
1369       bval = ba + bi[k];
1370       /* initialize k-th row by the perm[k]-th row of A */
1371       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1372       for (j = jmin; j < jmax; j++){
1373         col = riip[aj[j]];
1374         if (col >= k){ /* only take upper triangular entry */
1375           rtmp[col] = aa[j];
1376           *bval++  = 0.0; /* for in-place factorization */
1377         }
1378       }
1379       /* shift the diagonal of the matrix */
1380       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1381 
1382       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1383       dk = rtmp[k];
1384       i = jl[k]; /* first row to be added to k_th row  */
1385 
1386       while (i < k){
1387         nexti = jl[i]; /* next row to be added to k_th row */
1388 
1389         /* compute multiplier, update diag(k) and U(i,k) */
1390         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1391         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1392         dk += uikdi*ba[ili];
1393         ba[ili] = uikdi; /* -U(i,k) */
1394 
1395         /* add multiple of row i to k-th row */
1396         jmin = ili + 1; jmax = bi[i+1];
1397         if (jmin < jmax){
1398           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1399           /* update il and jl for row i */
1400           il[i] = jmin;
1401           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1402         }
1403         i = nexti;
1404       }
1405 
1406       /* shift the diagonals when zero pivot is detected */
1407       /* compute rs=sum of abs(off-diagonal) */
1408       rs   = 0.0;
1409       jmin = bi[k]+1;
1410       nz   = bi[k+1] - jmin;
1411       if (nz){
1412         bcol = bj + jmin;
1413         while (nz--){
1414           rs += PetscAbsScalar(rtmp[*bcol]);
1415           bcol++;
1416         }
1417       }
1418 
1419       sctx.rs = rs;
1420       sctx.pv = dk;
1421       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1422       if (newshift == 1) break;
1423 
1424       /* copy data into U(k,:) */
1425       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1426       jmin = bi[k]+1; jmax = bi[k+1];
1427       if (jmin < jmax) {
1428         for (j=jmin; j<jmax; j++){
1429           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1430         }
1431         /* add the k-th row into il and jl */
1432         il[k] = jmin;
1433         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1434       }
1435     }
1436   } while (sctx.chshift);
1437   ierr = PetscFree(il);CHKERRQ(ierr);
1438 
1439   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1440   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
1441   C->factor       = FACTOR_CHOLESKY;
1442   C->assembled    = PETSC_TRUE;
1443   C->preallocated = PETSC_TRUE;
1444   ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr);
1445   if (sctx.nshift){
1446     if (shiftnz) {
1447       ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1448     } else if (shiftpd) {
1449       ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1450     }
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1457 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1458 {
1459   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1460   Mat_SeqSBAIJ       *b;
1461   Mat                B;
1462   PetscErrorCode     ierr;
1463   PetscTruth         perm_identity;
1464   PetscInt           reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1465   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1466   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1467   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1468   PetscReal          fill=info->fill,levels=info->levels;
1469   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1470   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1471   PetscBT            lnkbt;
1472 
1473   PetscFunctionBegin;
1474   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1475   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1476 
1477   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1478   ui[0] = 0;
1479 
1480   /* special case that simply copies fill pattern */
1481   if (!levels && perm_identity) {
1482     for (i=0; i<am; i++) {
1483       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1484     }
1485     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1486     cols = uj;
1487     for (i=0; i<am; i++) {
1488       aj    = a->j + a->diag[i];
1489       ncols = ui[i+1] - ui[i];
1490       for (j=0; j<ncols; j++) *cols++ = *aj++;
1491     }
1492   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1493     /* initialization */
1494     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
1495 
1496     /* jl: linked list for storing indices of the pivot rows
1497        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1498     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1499     il         = jl + am;
1500     uj_ptr     = (PetscInt**)(il + am);
1501     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1502     for (i=0; i<am; i++){
1503       jl[i] = am; il[i] = 0;
1504     }
1505 
1506     /* create and initialize a linked list for storing column indices of the active row k */
1507     nlnk = am + 1;
1508     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1509 
1510     /* initial FreeSpace size is fill*(ai[am]+1) */
1511     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1512     current_space = free_space;
1513     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1514     current_space_lvl = free_space_lvl;
1515 
1516     for (k=0; k<am; k++){  /* for each active row k */
1517       /* initialize lnk by the column indices of row rip[k] of A */
1518       nzk   = 0;
1519       ncols = ai[rip[k]+1] - ai[rip[k]];
1520       ncols_upper = 0;
1521       for (j=0; j<ncols; j++){
1522         i = *(aj + ai[rip[k]] + j);
1523         if (rip[i] >= k){ /* only take upper triangular entry */
1524           ajtmp[ncols_upper] = i;
1525           ncols_upper++;
1526         }
1527       }
1528       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1529       nzk += nlnk;
1530 
1531       /* update lnk by computing fill-in for each pivot row to be merged in */
1532       prow = jl[k]; /* 1st pivot row */
1533 
1534       while (prow < k){
1535         nextprow = jl[prow];
1536 
1537         /* merge prow into k-th row */
1538         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1539         jmax = ui[prow+1];
1540         ncols = jmax-jmin;
1541         i     = jmin - ui[prow];
1542         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1543         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1544         j     = *(uj - 1);
1545         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
1546         nzk += nlnk;
1547 
1548         /* update il and jl for prow */
1549         if (jmin < jmax){
1550           il[prow] = jmin;
1551           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1552         }
1553         prow = nextprow;
1554       }
1555 
1556       /* if free space is not available, make more free space */
1557       if (current_space->local_remaining<nzk) {
1558         i = am - k + 1; /* num of unfactored rows */
1559         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1560         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1561         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
1562         reallocs++;
1563       }
1564 
1565       /* copy data into free_space and free_space_lvl, then initialize lnk */
1566       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1567 
1568       /* add the k-th row into il and jl */
1569       if (nzk > 1){
1570         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1571         jl[k] = jl[i]; jl[i] = k;
1572         il[k] = ui[k] + 1;
1573       }
1574       uj_ptr[k]     = current_space->array;
1575       uj_lvl_ptr[k] = current_space_lvl->array;
1576 
1577       current_space->array           += nzk;
1578       current_space->local_used      += nzk;
1579       current_space->local_remaining -= nzk;
1580 
1581       current_space_lvl->array           += nzk;
1582       current_space_lvl->local_used      += nzk;
1583       current_space_lvl->local_remaining -= nzk;
1584 
1585       ui[k+1] = ui[k] + nzk;
1586     }
1587 
1588 #if defined(PETSC_USE_INFO)
1589     if (ai[am] != 0) {
1590       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1591       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1592       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1593       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1594     } else {
1595       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1596     }
1597 #endif
1598 
1599     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1600     ierr = PetscFree(jl);CHKERRQ(ierr);
1601     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
1602 
1603     /* destroy list of free space and other temporary array(s) */
1604     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1605     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1606     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1607     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1608 
1609   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1610 
1611   /* put together the new matrix in MATSEQSBAIJ format */
1612   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1613   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1614   B = *fact;
1615   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1616   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1617 
1618   b    = (Mat_SeqSBAIJ*)B->data;
1619   b->singlemalloc = PETSC_FALSE;
1620   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1621   b->j    = uj;
1622   b->i    = ui;
1623   b->diag = 0;
1624   b->ilen = 0;
1625   b->imax = 0;
1626   b->row  = perm;
1627   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1628   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1629   b->icol = perm;
1630   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1631   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1632   ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1633   b->maxnz = b->nz = ui[am];
1634   b->free_a  = PETSC_TRUE;
1635   b->free_ij = PETSC_TRUE;
1636 
1637   B->factor                 = FACTOR_CHOLESKY;
1638   B->info.factor_mallocs    = reallocs;
1639   B->info.fill_ratio_given  = fill;
1640   if (ai[am] != 0) {
1641     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1642   } else {
1643     B->info.fill_ratio_needed = 0.0;
1644   }
1645   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1646   if (perm_identity){
1647     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1648     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1649   }
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1655 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1656 {
1657   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1658   Mat_SeqSBAIJ       *b;
1659   Mat                B;
1660   PetscErrorCode     ierr;
1661   PetscTruth         perm_identity;
1662   PetscReal          fill = info->fill;
1663   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1664   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1665   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1666   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1667   PetscBT            lnkbt;
1668   IS                 iperm;
1669 
1670   PetscFunctionBegin;
1671   /* check whether perm is the identity mapping */
1672   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1673   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
1674   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
1675   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1676 
1677   /* initialization */
1678   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1679   ui[0] = 0;
1680 
1681   /* jl: linked list for storing indices of the pivot rows
1682      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1683   ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr);
1684   il     = jl + am;
1685   cols   = il + am;
1686   ui_ptr = (PetscInt**)(cols + am);
1687   for (i=0; i<am; i++){
1688     jl[i] = am; il[i] = 0;
1689   }
1690 
1691   /* create and initialize a linked list for storing column indices of the active row k */
1692   nlnk = am + 1;
1693   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1694 
1695   /* initial FreeSpace size is fill*(ai[am]+1) */
1696   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1697   current_space = free_space;
1698 
1699   for (k=0; k<am; k++){  /* for each active row k */
1700     /* initialize lnk by the column indices of row rip[k] of A */
1701     nzk   = 0;
1702     ncols = ai[rip[k]+1] - ai[rip[k]];
1703     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1704     ncols_upper = 0;
1705     for (j=0; j<ncols; j++){
1706       i = riip[*(aj + ai[rip[k]] + j)];
1707       if (i >= k){ /* only take upper triangular entry */
1708         cols[ncols_upper] = i;
1709         ncols_upper++;
1710       }
1711     }
1712     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1713     nzk += nlnk;
1714 
1715     /* update lnk by computing fill-in for each pivot row to be merged in */
1716     prow = jl[k]; /* 1st pivot row */
1717 
1718     while (prow < k){
1719       nextprow = jl[prow];
1720       /* merge prow into k-th row */
1721       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1722       jmax = ui[prow+1];
1723       ncols = jmax-jmin;
1724       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1725       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1726       nzk += nlnk;
1727 
1728       /* update il and jl for prow */
1729       if (jmin < jmax){
1730         il[prow] = jmin;
1731         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1732       }
1733       prow = nextprow;
1734     }
1735 
1736     /* if free space is not available, make more free space */
1737     if (current_space->local_remaining<nzk) {
1738       i = am - k + 1; /* num of unfactored rows */
1739       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1740       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
1741       reallocs++;
1742     }
1743 
1744     /* copy data into free space, then initialize lnk */
1745     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1746 
1747     /* add the k-th row into il and jl */
1748     if (nzk-1 > 0){
1749       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1750       jl[k] = jl[i]; jl[i] = k;
1751       il[k] = ui[k] + 1;
1752     }
1753     ui_ptr[k] = current_space->array;
1754     current_space->array           += nzk;
1755     current_space->local_used      += nzk;
1756     current_space->local_remaining -= nzk;
1757 
1758     ui[k+1] = ui[k] + nzk;
1759   }
1760 
1761 #if defined(PETSC_USE_INFO)
1762   if (ai[am] != 0) {
1763     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1764     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
1765     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1766     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
1767   } else {
1768      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1769   }
1770 #endif
1771 
1772   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1773   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
1774   ierr = PetscFree(jl);CHKERRQ(ierr);
1775 
1776   /* destroy list of free space and other temporary array(s) */
1777   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1778   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1779   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1780 
1781   /* put together the new matrix in MATSEQSBAIJ format */
1782   ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr);
1783   ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr);
1784   B    = *fact;
1785   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1786   ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1787 
1788   b = (Mat_SeqSBAIJ*)B->data;
1789   b->singlemalloc = PETSC_FALSE;
1790   b->free_a       = PETSC_TRUE;
1791   b->free_ij      = PETSC_TRUE;
1792   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1793   b->j    = uj;
1794   b->i    = ui;
1795   b->diag = 0;
1796   b->ilen = 0;
1797   b->imax = 0;
1798   b->row  = perm;
1799   b->col  = perm;
1800   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1801   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1802   b->icol = iperm;
1803   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1804   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1805   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1806   b->maxnz = b->nz = ui[am];
1807 
1808   B->factor                 = FACTOR_CHOLESKY;
1809   B->info.factor_mallocs    = reallocs;
1810   B->info.fill_ratio_given  = fill;
1811   if (ai[am] != 0) {
1812     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1813   } else {
1814     B->info.fill_ratio_needed = 0.0;
1815   }
1816   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1817   if (perm_identity){
1818     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1819     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1820   }
1821   PetscFunctionReturn(0);
1822 }
1823