xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision cd0d46ebc1dba9f5ba3de7cf2d3eee7f6fd25de7)
1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/seq/aij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/dot.h"
6 #include "src/inline/spops.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
10 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
11 {
12   PetscFunctionBegin;
13 
14   SETERRQ(PETSC_ERR_SUP,"Code not written");
15 #if !defined(PETSC_USE_DEBUG)
16   PetscFunctionReturn(0);
17 #endif
18 }
19 
20 
21 EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
22 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
23 
24 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
25 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
26 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
30   /* ------------------------------------------------------------
31 
32           This interface was contribed by Tony Caola
33 
34      This routine is an interface to the pivoting drop-tolerance
35      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36      SPARSEKIT2.
37 
38      The SPARSEKIT2 routines used here are covered by the GNU
39      copyright; see the file gnu in this directory.
40 
41      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42      help in getting this routine ironed out.
43 
44      The major drawback to this routine is that if info->fill is
45      not large enough it fails rather than allocating more space;
46      this can be fixed by hacking/improving the f2c version of
47      Yousef Saad's code.
48 
49      ishift = 0, for indices start at 1
50      ishift = 1, for indices starting at 0
51      ------------------------------------------------------------
52 */
53 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
54 {
55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56   PetscFunctionBegin;
57   SETERRQ(1,"This distribution does not include GNU Copyright code\n\
58   You can obtain the drop tolerance routines by installing PETSc from\n\
59   www.mcs.anl.gov/petsc\n");
60 #else
61   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b;
62   IS           iscolf,isicol,isirow;
63   PetscTruth   reorder;
64   int          *c,*r,*ic,ierr,i,n = A->m;
65   int          *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
66   int          *ordcol,*iwk,*iperm,*jw;
67   int          ishift = !a->indexshift;
68   int          jmax,lfill,job,*o_i,*o_j;
69   PetscScalar  *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70   PetscReal    permtol,af;
71 
72   PetscFunctionBegin;
73 
74   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
75   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
76   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
77   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
78   lfill   = (int)(info->dtcount/2.0);
79   jmax    = (int)(info->fill*a->nz);
80   permtol = info->dtcol;
81 
82 
83   /* ------------------------------------------------------------
84      If reorder=.TRUE., then the original matrix has to be
85      reordered to reflect the user selected ordering scheme, and
86      then de-reordered so it is in it's original format.
87      Because Saad's dperm() is NOT in place, we have to copy
88      the original matrix and allocate more storage. . .
89      ------------------------------------------------------------
90   */
91 
92   /* set reorder to true if either isrow or iscol is not identity */
93   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
94   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
95   reorder = PetscNot(reorder);
96 
97 
98   /* storage for ilu factor */
99   ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr);
100   ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr);
101   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
102   ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr);
103 
104   /* ------------------------------------------------------------
105      Make sure that everything is Fortran formatted (1-Based)
106      ------------------------------------------------------------
107   */
108   if (ishift) {
109     for (i=old_i[0];i<old_i[n];i++) {
110       old_j[i]++;
111     }
112     for(i=0;i<n+1;i++) {
113       old_i[i]++;
114     };
115   };
116 
117   if (reorder) {
118     ierr = ISGetIndices(iscol,&c);           CHKERRQ(ierr);
119     ierr = ISGetIndices(isrow,&r);           CHKERRQ(ierr);
120     for(i=0;i<n;i++) {
121       r[i]  = r[i]+1;
122       c[i]  = c[i]+1;
123     }
124     ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr);
125     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr);
126     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
127     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
128     for (i=0;i<n;i++) {
129       r[i]  = r[i]-1;
130       c[i]  = c[i]-1;
131     }
132     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
133     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
134     o_a = old_a2;
135     o_j = old_j2;
136     o_i = old_i2;
137   } else {
138     o_a = old_a;
139     o_j = old_j;
140     o_i = old_i;
141   }
142 
143   /* ------------------------------------------------------------
144      Call Saad's ilutp() routine to generate the factorization
145      ------------------------------------------------------------
146   */
147 
148   ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr);
149   ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr);
150   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
151 
152   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
153   if (ierr) {
154     switch (ierr) {
155       case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
156       case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
157       case -5: SETERRQ(1,"ilutp(), zero row encountered");
158       case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
159       case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
160       default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
161     }
162   }
163 
164   ierr = PetscFree(w);CHKERRQ(ierr);
165   ierr = PetscFree(jw);CHKERRQ(ierr);
166 
167   /* ------------------------------------------------------------
168      Saad's routine gives the result in Modified Sparse Row (msr)
169      Convert to Compressed Sparse Row format (csr)
170      ------------------------------------------------------------
171   */
172 
173   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
174   ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr);
175 
176   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
177 
178   ierr = PetscFree(iwk);CHKERRQ(ierr);
179   ierr = PetscFree(wk);CHKERRQ(ierr);
180 
181   if (reorder) {
182     ierr = PetscFree(old_a2);CHKERRQ(ierr);
183     ierr = PetscFree(old_j2);CHKERRQ(ierr);
184     ierr = PetscFree(old_i2);CHKERRQ(ierr);
185   } else {
186     /* fix permutation of old_j that the factorization introduced */
187     for (i=old_i[0]; i<old_i[n]; i++) {
188       old_j[i-1] = iperm[old_j[i-1]-1];
189     }
190   }
191 
192   /* get rid of the shift to indices starting at 1 */
193   if (ishift) {
194     for (i=0; i<n+1; i++) {
195       old_i[i]--;
196     }
197     for (i=old_i[0];i<old_i[n];i++) {
198       old_j[i]--;
199     }
200   }
201 
202   /* Make the factored matrix 0-based */
203   if (ishift) {
204     for (i=0; i<n+1; i++) {
205       new_i[i]--;
206     }
207     for (i=new_i[0];i<new_i[n];i++) {
208       new_j[i]--;
209     }
210   }
211 
212   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
213   /*-- permute the right-hand-side and solution vectors           --*/
214   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
215   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
216   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
217   for(i=0; i<n; i++) {
218     ordcol[i] = ic[iperm[i]-1];
219   };
220   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
221   ierr = ISDestroy(isicol);CHKERRQ(ierr);
222 
223   ierr = PetscFree(iperm);CHKERRQ(ierr);
224 
225   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
226   ierr = PetscFree(ordcol);CHKERRQ(ierr);
227 
228   /*----- put together the new matrix -----*/
229 
230   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
231   (*fact)->factor    = FACTOR_LU;
232   (*fact)->assembled = PETSC_TRUE;
233 
234   b = (Mat_SeqAIJ*)(*fact)->data;
235   ierr = PetscFree(b->imax);CHKERRQ(ierr);
236   b->sorted        = PETSC_FALSE;
237   b->singlemalloc  = PETSC_FALSE;
238   /* the next line frees the default space generated by the MatCreate() */
239   ierr             = PetscFree(b->a);CHKERRQ(ierr);
240   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
241   b->a             = new_a;
242   b->j             = new_j;
243   b->i             = new_i;
244   b->ilen          = 0;
245   b->imax          = 0;
246   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
247   b->row           = isirow;
248   b->col           = iscolf;
249   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
250   b->maxnz = b->nz = new_i[n];
251   b->indexshift    = a->indexshift;
252   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
253   (*fact)->info.factor_mallocs = 0;
254 
255   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
256 
257   /* check out for identical nodes. If found, use inode functions */
258   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
259 
260   af = ((double)b->nz)/((double)a->nz) + .001;
261   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
262   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
263   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
264   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
265 
266   PetscFunctionReturn(0);
267 #endif
268 }
269 
270 /*
271     Factorization code for AIJ format.
272 */
273 #undef __FUNCT__
274 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
275 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
276 {
277   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
278   IS         isicol;
279   int        *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
280   int        *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
281   int        *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
282   PetscReal  f;
283 
284   PetscFunctionBegin;
285   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
286   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
287   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
288   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
289 
290   /* get new row pointers */
291   ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr);
292   ainew[0] = -shift;
293   /* don't know how many column pointers are needed so estimate */
294   f = info->fill;
295   jmax  = (int)(f*ai[n]+(!shift));
296   ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr);
297   /* fill is a linked list of nonzeros in active row */
298   ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr);
299   im   = fill + n + 1;
300   /* idnew is location of diagonal in factor */
301   ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr);
302   idnew[0] = -shift;
303 
304   for (i=0; i<n; i++) {
305     /* first copy previous fill into linked list */
306     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
307     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308     ajtmp   = aj + ai[r[i]] + shift;
309     fill[n] = n;
310     while (nz--) {
311       fm  = n;
312       idx = ic[*ajtmp++ + shift];
313       do {
314         m  = fm;
315         fm = fill[m];
316       } while (fm < idx);
317       fill[m]   = idx;
318       fill[idx] = fm;
319     }
320     row = fill[n];
321     while (row < i) {
322       ajtmp = ajnew + idnew[row] + (!shift);
323       nzbd  = 1 + idnew[row] - ainew[row];
324       nz    = im[row] - nzbd;
325       fm    = row;
326       while (nz-- > 0) {
327         idx = *ajtmp++ + shift;
328         nzbd++;
329         if (idx == i) im[row] = nzbd;
330         do {
331           m  = fm;
332           fm = fill[m];
333         } while (fm < idx);
334         if (fm != idx) {
335           fill[m]   = idx;
336           fill[idx] = fm;
337           fm        = idx;
338           nnz++;
339         }
340       }
341       row = fill[row];
342     }
343     /* copy new filled row into permanent storage */
344     ainew[i+1] = ainew[i] + nnz;
345     if (ainew[i+1] > jmax) {
346 
347       /* estimate how much additional space we will need */
348       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
349       /* just double the memory each time */
350       int maxadd = jmax;
351       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
352       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
353       jmax += maxadd;
354 
355       /* allocate a longer ajnew */
356       ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr);
357       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr);
358       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
359       ajnew = ajtmp;
360       realloc++; /* count how many times we realloc */
361     }
362     ajtmp = ajnew + ainew[i] + shift;
363     fm    = fill[n];
364     nzi   = 0;
365     im[i] = nnz;
366     while (nnz--) {
367       if (fm < i) nzi++;
368       *ajtmp++ = fm - shift;
369       fm       = fill[fm];
370     }
371     idnew[i] = ainew[i] + nzi;
372   }
373   if (ai[n] != 0) {
374     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
375     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
376     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
377     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
378     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
379   } else {
380     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
381   }
382 
383   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
384   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
385 
386   ierr = PetscFree(fill);CHKERRQ(ierr);
387 
388   /* put together the new matrix */
389   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr);
390   PetscLogObjectParent(*B,isicol);
391   b = (Mat_SeqAIJ*)(*B)->data;
392   ierr = PetscFree(b->imax);CHKERRQ(ierr);
393   b->singlemalloc = PETSC_FALSE;
394   /* the next line frees the default space generated by the Create() */
395   ierr = PetscFree(b->a);CHKERRQ(ierr);
396   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
397   ierr          = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
398   b->j          = ajnew;
399   b->i          = ainew;
400   b->diag       = idnew;
401   b->ilen       = 0;
402   b->imax       = 0;
403   b->row        = isrow;
404   b->col        = iscol;
405   b->lu_damping        = info->damping;
406   b->lu_zeropivot      = info->zeropivot;
407   b->lu_shift          = info->lu_shift;
408   b->lu_shift_fraction = info->lu_shift_fraction;
409   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
410   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
411   b->icol       = isicol;
412   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
413   /* In b structure:  Free imax, ilen, old a, old j.
414      Allocate idnew, solve_work, new a, new j */
415   PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar)));
416   b->maxnz = b->nz = ainew[n] + shift;
417 
418   (*B)->factor                 =  FACTOR_LU;
419   (*B)->info.factor_mallocs    = realloc;
420   (*B)->info.fill_ratio_given  = f;
421   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
422   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
423 
424   if (ai[n] != 0) {
425     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
426   } else {
427     (*B)->info.fill_ratio_needed = 0.0;
428   }
429   PetscFunctionReturn(0);
430 }
431 /* ----------------------------------------------------------- */
432 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
436 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
437 {
438   Mat          C = *B;
439   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
440   IS           isrow = b->row,isicol = b->icol;
441   int          *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
442   int          *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
443   int          *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
444   PetscScalar  *rtmp,*v,*pc,multiplier,*pv,*rtmps;
445   PetscReal    damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
446   PetscReal    row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
447   PetscTruth   damp,lushift;
448 
449   PetscFunctionBegin;
450   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
451   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
452   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
453   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
454   rtmps = rtmp + shift; ics = ic + shift;
455 
456   if (!a->diag) {
457     ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr);
458   }
459 
460   if (b->lu_shift) { /* set max shift */
461     int *aai = a->i,*ddiag = a->diag;
462     shift_top = 0;
463     for (i=0; i<n; i++) {
464       d = PetscAbsScalar((a->a)[ddiag[i]+shift]);
465       /* calculate amt of shift needed for this row */
466       if (d<0) row_shift = 0; else row_shift = -2*d;
467       v = a->a+aai[i]+shift;
468       for (j=0; j<aai[i+1]-aai[i]; j++)
469 	row_shift += PetscAbsScalar(v[j]);
470       if (row_shift>shift_top) shift_top = row_shift;
471     }
472   }
473 
474   shift_fraction = 0; shift_amount = 0;
475   do {
476     damp    = PETSC_FALSE;
477     lushift = PETSC_FALSE;
478     for (i=0; i<n; i++) {
479       nz    = ai[i+1] - ai[i];
480       ajtmp = aj + ai[i] + shift;
481       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
482 
483       /* load in initial (unfactored row) */
484       nz       = a->i[r[i]+1] - a->i[r[i]];
485       ajtmpold = a->j + a->i[r[i]] + shift;
486       v        = a->a + a->i[r[i]] + shift;
487       for (j=0; j<nz; j++) {
488         rtmp[ics[ajtmpold[j]]] = v[j];
489       }
490       rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
491 
492       row = *ajtmp++ + shift;
493       while  (row < i) {
494         pc = rtmp + row;
495         if (*pc != 0.0) {
496           pv         = b->a + diag_offset[row] + shift;
497           pj         = b->j + diag_offset[row] + (!shift);
498           multiplier = *pc / *pv++;
499           *pc        = multiplier;
500           nz         = ai[row+1] - diag_offset[row] - 1;
501           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
502           PetscLogFlops(2*nz);
503         }
504         row = *ajtmp++ + shift;
505       }
506       /* finished row so stick it into b->a */
507       pv   = b->a + ai[i] + shift;
508       pj   = b->j + ai[i] + shift;
509       nz   = ai[i+1] - ai[i];
510       diag = diag_offset[i] - ai[i];
511       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
512       rs   = 0.0;
513       for (j=0; j<nz; j++) {
514         pv[j] = rtmps[pj[j]];
515         if (j != diag) rs += PetscAbsScalar(pv[j]);
516       }
517 #define MAX_NSHIFT 5
518       if (PetscRealPart(pv[diag]) < zeropivot*rs && b->lu_shift) {
519 	if (nshift>MAX_NSHIFT) {
520 	  SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner");
521 	} else if (nshift==MAX_NSHIFT) {
522 	  shift_fraction = shift_hi;
523 	} else {
524 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
525 	}
526 	shift_amount = shift_fraction * shift_top;
527 	lushift      = PETSC_TRUE;
528         nshift++;
529         break;
530       }
531       if (PetscAbsScalar(pv[diag]) < zeropivot*rs) {
532         if (damping) {
533           if (ndamp) damping *= 2.0;
534           damp = PETSC_TRUE;
535           ndamp++;
536           break;
537         } else {
538           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
539         }
540       }
541     }
542     if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
543       /*
544        * if not already shifting up & shifting & started shifting & can refine,
545        * then try lower shift
546        */
547       shift_hi       = shift_fraction;
548       shift_fraction = (shift_hi+shift_lo)/2.;
549       shift_amount   = shift_fraction * shift_top;
550       lushift        = PETSC_TRUE;
551       nshift++;
552     }
553   } while (damp || lushift);
554 
555   /* invert diagonal entries for simplier triangular solves */
556   for (i=0; i<n; i++) {
557     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
558   }
559 
560   ierr = PetscFree(rtmp);CHKERRQ(ierr);
561   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
562   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
563   C->factor = FACTOR_LU;
564   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
565   C->assembled = PETSC_TRUE;
566   PetscLogFlops(C->n);
567   if (ndamp) {
568     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping);
569   }
570   if (nshift) {
571     b->lu_shift_fraction = shift_fraction;
572     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction\n",shift_fraction);
573   }
574   PetscFunctionReturn(0);
575 }
576 /* ----------------------------------------------------------- */
577 #undef __FUNCT__
578 #define __FUNCT__ "MatLUFactor_SeqAIJ"
579 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
580 {
581   int ierr;
582   Mat C;
583 
584   PetscFunctionBegin;
585   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
586   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
587   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
588   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
589   PetscFunctionReturn(0);
590 }
591 /* ----------------------------------------------------------- */
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSolve_SeqAIJ"
594 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
595 {
596   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
597   IS           iscol = a->col,isrow = a->row;
598   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
599   int          nz,shift = a->indexshift,*rout,*cout;
600   PetscScalar  *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
601 
602   PetscFunctionBegin;
603   if (!n) PetscFunctionReturn(0);
604 
605   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
606   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
607   tmp  = a->solve_work;
608 
609   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
610   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
611 
612   /* forward solve the lower triangular */
613   tmp[0] = b[*r++];
614   tmps   = tmp + shift;
615   for (i=1; i<n; i++) {
616     v   = aa + ai[i] + shift;
617     vi  = aj + ai[i] + shift;
618     nz  = a->diag[i] - ai[i];
619     sum = b[*r++];
620     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
621     tmp[i] = sum;
622   }
623 
624   /* backward solve the upper triangular */
625   for (i=n-1; i>=0; i--){
626     v   = aa + a->diag[i] + (!shift);
627     vi  = aj + a->diag[i] + (!shift);
628     nz  = ai[i+1] - a->diag[i] - 1;
629     sum = tmp[i];
630     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
631     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
632   }
633 
634   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
635   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
636   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
637   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
638   PetscLogFlops(2*a->nz - A->n);
639   PetscFunctionReturn(0);
640 }
641 
642 /* ----------------------------------------------------------- */
643 #undef __FUNCT__
644 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
645 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
646 {
647   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
648   int          n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
649   PetscScalar  *x,*b,*aa = a->a;
650 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
651   int          adiag_i,i,*vi,nz,ai_i;
652   PetscScalar  *v,sum;
653 #endif
654 
655   PetscFunctionBegin;
656   if (!n) PetscFunctionReturn(0);
657   if (a->indexshift) {
658      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
659      PetscFunctionReturn(0);
660   }
661 
662   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
663   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
664 
665 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
666   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
667 #else
668   /* forward solve the lower triangular */
669   x[0] = b[0];
670   for (i=1; i<n; i++) {
671     ai_i = ai[i];
672     v    = aa + ai_i;
673     vi   = aj + ai_i;
674     nz   = adiag[i] - ai_i;
675     sum  = b[i];
676     while (nz--) sum -= *v++ * x[*vi++];
677     x[i] = sum;
678   }
679 
680   /* backward solve the upper triangular */
681   for (i=n-1; i>=0; i--){
682     adiag_i = adiag[i];
683     v       = aa + adiag_i + 1;
684     vi      = aj + adiag_i + 1;
685     nz      = ai[i+1] - adiag_i - 1;
686     sum     = x[i];
687     while (nz--) sum -= *v++ * x[*vi++];
688     x[i]    = sum*aa[adiag_i];
689   }
690 #endif
691   PetscLogFlops(2*a->nz - A->n);
692   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
699 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
700 {
701   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
702   IS           iscol = a->col,isrow = a->row;
703   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
704   int          nz,shift = a->indexshift,*rout,*cout;
705   PetscScalar  *x,*b,*tmp,*aa = a->a,sum,*v;
706 
707   PetscFunctionBegin;
708   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
709 
710   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
711   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
712   tmp  = a->solve_work;
713 
714   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
715   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
716 
717   /* forward solve the lower triangular */
718   tmp[0] = b[*r++];
719   for (i=1; i<n; i++) {
720     v   = aa + ai[i] + shift;
721     vi  = aj + ai[i] + shift;
722     nz  = a->diag[i] - ai[i];
723     sum = b[*r++];
724     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
725     tmp[i] = sum;
726   }
727 
728   /* backward solve the upper triangular */
729   for (i=n-1; i>=0; i--){
730     v   = aa + a->diag[i] + (!shift);
731     vi  = aj + a->diag[i] + (!shift);
732     nz  = ai[i+1] - a->diag[i] - 1;
733     sum = tmp[i];
734     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
735     tmp[i] = sum*aa[a->diag[i]+shift];
736     x[*c--] += tmp[i];
737   }
738 
739   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
740   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
741   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
742   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
743   PetscLogFlops(2*a->nz);
744 
745   PetscFunctionReturn(0);
746 }
747 /* -------------------------------------------------------------------*/
748 #undef __FUNCT__
749 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
750 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
751 {
752   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
753   IS           iscol = a->col,isrow = a->row;
754   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
755   int          nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
756   PetscScalar  *x,*b,*tmp,*aa = a->a,*v,s1;
757 
758   PetscFunctionBegin;
759   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
760   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
761   tmp  = a->solve_work;
762 
763   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
764   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
765 
766   /* copy the b into temp work space according to permutation */
767   for (i=0; i<n; i++) tmp[i] = b[c[i]];
768 
769   /* forward solve the U^T */
770   for (i=0; i<n; i++) {
771     v   = aa + diag[i] + shift;
772     vi  = aj + diag[i] + (!shift);
773     nz  = ai[i+1] - diag[i] - 1;
774     s1  = tmp[i];
775     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
776     while (nz--) {
777       tmp[*vi++ + shift] -= (*v++)*s1;
778     }
779     tmp[i] = s1;
780   }
781 
782   /* backward solve the L^T */
783   for (i=n-1; i>=0; i--){
784     v   = aa + diag[i] - 1 + shift;
785     vi  = aj + diag[i] - 1 + shift;
786     nz  = diag[i] - ai[i];
787     s1  = tmp[i];
788     while (nz--) {
789       tmp[*vi-- + shift] -= (*v--)*s1;
790     }
791   }
792 
793   /* copy tmp into x according to permutation */
794   for (i=0; i<n; i++) x[r[i]] = tmp[i];
795 
796   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
797   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
798   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800 
801   PetscLogFlops(2*a->nz-A->n);
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
807 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
808 {
809   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
810   IS           iscol = a->col,isrow = a->row;
811   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
812   int          nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
813   PetscScalar  *x,*b,*tmp,*aa = a->a,*v;
814 
815   PetscFunctionBegin;
816   if (zz != xx) VecCopy(zz,xx);
817 
818   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820   tmp = a->solve_work;
821 
822   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
823   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
824 
825   /* copy the b into temp work space according to permutation */
826   for (i=0; i<n; i++) tmp[i] = b[c[i]];
827 
828   /* forward solve the U^T */
829   for (i=0; i<n; i++) {
830     v   = aa + diag[i] + shift;
831     vi  = aj + diag[i] + (!shift);
832     nz  = ai[i+1] - diag[i] - 1;
833     tmp[i] *= *v++;
834     while (nz--) {
835       tmp[*vi++ + shift] -= (*v++)*tmp[i];
836     }
837   }
838 
839   /* backward solve the L^T */
840   for (i=n-1; i>=0; i--){
841     v   = aa + diag[i] - 1 + shift;
842     vi  = aj + diag[i] - 1 + shift;
843     nz  = diag[i] - ai[i];
844     while (nz--) {
845       tmp[*vi-- + shift] -= (*v--)*tmp[i];
846     }
847   }
848 
849   /* copy tmp into x according to permutation */
850   for (i=0; i<n; i++) x[r[i]] += tmp[i];
851 
852   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
853   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
854   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856 
857   PetscLogFlops(2*a->nz);
858   PetscFunctionReturn(0);
859 }
860 /* ----------------------------------------------------------------*/
861 EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
865 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
866 {
867   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
868   IS         isicol;
869   int        *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
870   int        *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
871   int        *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
872   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
873   PetscTruth col_identity,row_identity;
874   PetscReal  f;
875 
876   PetscFunctionBegin;
877   f             = info->fill;
878   levels        = (int)info->levels;
879   diagonal_fill = (int)info->diagonal_fill;
880   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
881 
882   /* special case that simply copies fill pattern */
883   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
884   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
885   if (!levels && row_identity && col_identity) {
886     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
887     (*fact)->factor = FACTOR_LU;
888     b               = (Mat_SeqAIJ*)(*fact)->data;
889     if (!b->diag) {
890       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
891     }
892     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
893     b->row              = isrow;
894     b->col              = iscol;
895     b->icol             = isicol;
896     b->lu_damping       = info->damping;
897     b->lu_zeropivot     = info->zeropivot;
898     b->lu_shift         = info->lu_shift;
899     b->lu_shift_fraction= info->lu_shift_fraction;
900     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
901     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
902     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
903     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
904     PetscFunctionReturn(0);
905   }
906 
907   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
908   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
909 
910   /* get new row pointers */
911   ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr);
912   ainew[0] = -shift;
913   /* don't know how many column pointers are needed so estimate */
914   jmax = (int)(f*(ai[n]+!shift));
915   ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr);
916   /* ajfill is level of fill for each fill entry */
917   ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr);
918   /* fill is a linked list of nonzeros in active row */
919   ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr);
920   /* im is level for each filled value */
921   ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr);
922   /* dloc is location of diagonal in factor */
923   ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr);
924   dloc[0]  = 0;
925   for (prow=0; prow<n; prow++) {
926 
927     /* copy current row into linked list */
928     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
929     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
930     xi      = aj + ai[r[prow]] + shift;
931     fill[n]    = n;
932     fill[prow] = -1; /* marker to indicate if diagonal exists */
933     while (nz--) {
934       fm  = n;
935       idx = ic[*xi++ + shift];
936       do {
937         m  = fm;
938         fm = fill[m];
939       } while (fm < idx);
940       fill[m]   = idx;
941       fill[idx] = fm;
942       im[idx]   = 0;
943     }
944 
945     /* make sure diagonal entry is included */
946     if (diagonal_fill && fill[prow] == -1) {
947       fm = n;
948       while (fill[fm] < prow) fm = fill[fm];
949       fill[prow] = fill[fm]; /* insert diagonal into linked list */
950       fill[fm]   = prow;
951       im[prow]   = 0;
952       nzf++;
953       dcount++;
954     }
955 
956     nzi = 0;
957     row = fill[n];
958     while (row < prow) {
959       incrlev = im[row] + 1;
960       nz      = dloc[row];
961       xi      = ajnew  + ainew[row] + shift + nz + 1;
962       flev    = ajfill + ainew[row] + shift + nz + 1;
963       nnz     = ainew[row+1] - ainew[row] - nz - 1;
964       fm      = row;
965       while (nnz-- > 0) {
966         idx = *xi++ + shift;
967         if (*flev + incrlev > levels) {
968           flev++;
969           continue;
970         }
971         do {
972           m  = fm;
973           fm = fill[m];
974         } while (fm < idx);
975         if (fm != idx) {
976           im[idx]   = *flev + incrlev;
977           fill[m]   = idx;
978           fill[idx] = fm;
979           fm        = idx;
980           nzf++;
981         } else {
982           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
983         }
984         flev++;
985       }
986       row = fill[row];
987       nzi++;
988     }
989     /* copy new filled row into permanent storage */
990     ainew[prow+1] = ainew[prow] + nzf;
991     if (ainew[prow+1] > jmax-shift) {
992 
993       /* estimate how much additional space we will need */
994       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
995       /* just double the memory each time */
996       /*  maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
997       int maxadd = jmax;
998       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
999       jmax += maxadd;
1000 
1001       /* allocate a longer ajnew and ajfill */
1002       ierr   = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr);
1003       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1004       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
1005       ajnew  = xi;
1006       ierr   = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr);
1007       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1008       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
1009       ajfill = xi;
1010       realloc++; /* count how many times we realloc */
1011     }
1012     xi          = ajnew + ainew[prow] + shift;
1013     flev        = ajfill + ainew[prow] + shift;
1014     dloc[prow]  = nzi;
1015     fm          = fill[n];
1016     while (nzf--) {
1017       *xi++   = fm - shift;
1018       *flev++ = im[fm];
1019       fm      = fill[fm];
1020     }
1021     /* make sure row has diagonal entry */
1022     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
1023       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\
1024     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1025     }
1026   }
1027   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1028   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1029   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1030   ierr = PetscFree(fill);CHKERRQ(ierr);
1031   ierr = PetscFree(im);CHKERRQ(ierr);
1032 
1033   {
1034     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1035     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1036     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1037     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1038     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1039     if (diagonal_fill) {
1040       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
1041     }
1042   }
1043 
1044   /* put together the new matrix */
1045   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1046   PetscLogObjectParent(*fact,isicol);
1047   b = (Mat_SeqAIJ*)(*fact)->data;
1048   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1049   b->singlemalloc = PETSC_FALSE;
1050   len = (ainew[n] + shift)*sizeof(PetscScalar);
1051   /* the next line frees the default space generated by the Create() */
1052   ierr = PetscFree(b->a);CHKERRQ(ierr);
1053   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1054   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1055   b->j          = ajnew;
1056   b->i          = ainew;
1057   for (i=0; i<n; i++) dloc[i] += ainew[i];
1058   b->diag       = dloc;
1059   b->ilen       = 0;
1060   b->imax       = 0;
1061   b->row        = isrow;
1062   b->col        = iscol;
1063   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1064   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1065   b->icol       = isicol;
1066   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1067   /* In b structure:  Free imax, ilen, old a, old j.
1068      Allocate dloc, solve_work, new a, new j */
1069   PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar)));
1070   b->maxnz             = b->nz = ainew[n] + shift;
1071   b->lu_damping        = info->damping;
1072   b->lu_shift          = info->lu_shift;
1073   b->lu_shift_fraction = info->lu_shift_fraction;
1074   b->lu_zeropivot = info->zeropivot;
1075   (*fact)->factor = FACTOR_LU;
1076   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
1077   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1078 
1079   (*fact)->info.factor_mallocs    = realloc;
1080   (*fact)->info.fill_ratio_given  = f;
1081   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #include "src/mat/impls/sbaij/seq/sbaij.h"
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1088 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1089 {
1090   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1091   int                 ierr;
1092 
1093   PetscFunctionBegin;
1094   if (!a->sbaijMat){
1095     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1096   }
1097 
1098   ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr);
1099   ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
1100   a->sbaijMat = PETSC_NULL;
1101 
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
1107 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1108 {
1109   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1110   int                 ierr;
1111   PetscTruth          perm_identity;
1112 
1113   PetscFunctionBegin;
1114   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1115   if (!perm_identity){
1116     SETERRQ(1,"Non-identity permutation is not supported yet");
1117   }
1118   if (!a->sbaijMat){
1119     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1120   }
1121 
1122   ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1123   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1124 
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1130 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1131 {
1132   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1133   int                 ierr;
1134   PetscTruth          perm_identity;
1135 
1136   PetscFunctionBegin;
1137   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1138   if (!perm_identity){
1139     SETERRQ(1,"Non-identity permutation is not supported yet");
1140   }
1141   if (!a->sbaijMat){
1142     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1143   }
1144 
1145   ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1146   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1147 
1148   PetscFunctionReturn(0);
1149 }
1150