xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 86bacbd289654a409d70be6250945b1b2f8959ce)
1*86bacbd2SBarry Smith /*$Id: aijfact.c,v 1.127 1999/11/24 21:53:47 bsmith Exp bsmith $*/
2289bc588SBarry Smith 
370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h"
490f02eecSBarry Smith #include "src/vec/vecimpl.h"
51c4feecaSSatish Balay #include "src/inline/dot.h"
63b2fbd54SBarry Smith 
75615d1e5SSatish Balay #undef __FUNC__
891e9ee9fSBarry Smith #define __FUNC__ "MatOrdering_Flow_SeqAIJ"
991e9ee9fSBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
103b2fbd54SBarry Smith {
113a40ed3dSBarry Smith   PetscFunctionBegin;
123a40ed3dSBarry Smith 
13e3372554SBarry Smith   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG)
15d64ed03dSBarry Smith   PetscFunctionReturn(0);
16d64ed03dSBarry Smith #endif
173b2fbd54SBarry Smith }
183b2fbd54SBarry Smith 
19*86bacbd2SBarry Smith 
20*86bacbd2SBarry Smith extern int MatMarkDiagonal_SeqAIJ(Mat);
21*86bacbd2SBarry Smith extern int Mat_AIJ_CheckInode(Mat);
22*86bacbd2SBarry Smith 
23*86bacbd2SBarry Smith #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT)
24*86bacbd2SBarry Smith 
25*86bacbd2SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
26*86bacbd2SBarry Smith #define dperm_  DPERM
27*86bacbd2SBarry Smith #define ilutp_  ILUTP
28*86bacbd2SBarry Smith #define ilu0_   ILU0
29*86bacbd2SBarry Smith #define msrcsr_ MSRCSR
30*86bacbd2SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
31*86bacbd2SBarry Smith #define dperm_  dperm
32*86bacbd2SBarry Smith #define ilutp_  ilutp
33*86bacbd2SBarry Smith #define ilu0_   ilu0
34*86bacbd2SBarry Smith #define msrcsr_ msrcsr
35*86bacbd2SBarry Smith #endif
36*86bacbd2SBarry Smith 
37*86bacbd2SBarry Smith EXTERN_C_BEGIN
38*86bacbd2SBarry Smith extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*);
39*86bacbd2SBarry Smith extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*);
40*86bacbd2SBarry Smith extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*);
41*86bacbd2SBarry Smith extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *);
42*86bacbd2SBarry Smith EXTERN_C_END
43*86bacbd2SBarry Smith 
44*86bacbd2SBarry Smith #undef __FUNC__
45*86bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ"
46*86bacbd2SBarry Smith   /* ------------------------------------------------------------
47*86bacbd2SBarry Smith 
48*86bacbd2SBarry Smith           This interface was contribed by Tony Caola
49*86bacbd2SBarry Smith 
50*86bacbd2SBarry Smith      This routine is an interface to the pivoting drop-tolerance
51*86bacbd2SBarry Smith      ILU routine written by Yousef Saad (saad@cs.umn.edu).  It
52*86bacbd2SBarry Smith      was inspired by the non-pivoting iludt written by David
53*86bacbd2SBarry Smith      Hysom (hysom@cs.odu.edu).
54*86bacbd2SBarry Smith 
55*86bacbd2SBarry Smith      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
56*86bacbd2SBarry Smith      help in getting this routine ironed out.
57*86bacbd2SBarry Smith 
58*86bacbd2SBarry Smith      As of right now, there are a couple of things that could
59*86bacbd2SBarry Smith      be, uh, better.
60*86bacbd2SBarry Smith 
61*86bacbd2SBarry Smith      1 - Since Saad's routine is Fortran based, memory cannot be
62*86bacbd2SBarry Smith      malloc'd.  I was trying to get the expected fill from the
63*86bacbd2SBarry Smith      preconditioner and use this number as the multiplier in
64*86bacbd2SBarry Smith      the equation for jmax, below, but couldn't figure it out.
65*86bacbd2SBarry Smith      Anyway, perhaps a better solution is to run SPARSKIT through
66*86bacbd2SBarry Smith      f2c and incorporate mallocs(), but I want to graduate so I'll
67*86bacbd2SBarry Smith      just rebuild Petsc. . .
68*86bacbd2SBarry Smith 
69*86bacbd2SBarry Smith      shift = 1, ishift = 0, for indices start at 1
70*86bacbd2SBarry Smith      shift = 0, ishift = 1, for indices starting at 0
71*86bacbd2SBarry Smith      ------------------------------------------------------------
72*86bacbd2SBarry Smith   */
73*86bacbd2SBarry Smith 
74*86bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
75*86bacbd2SBarry Smith {
76*86bacbd2SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
77*86bacbd2SBarry Smith   IS         iscolf, isrowf, isicol, isirow;
78*86bacbd2SBarry Smith   PetscTruth reorder;
79*86bacbd2SBarry Smith   int        *c,*r,*ic,*ir, ierr, i, n = a->m;
80*86bacbd2SBarry Smith   int        *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j;
81*86bacbd2SBarry Smith   int        *ordcol, *ordrow,*iwk,*iperm, *jw;
82*86bacbd2SBarry Smith   int        ishift = !a->indexshift,shift = -a->indexshift;
83*86bacbd2SBarry Smith   int        jmax,lfill,job;
84*86bacbd2SBarry Smith   Scalar     *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0;
85*86bacbd2SBarry Smith 
86*86bacbd2SBarry Smith   PetscFunctionBegin;
87*86bacbd2SBarry Smith 
88*86bacbd2SBarry Smith   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
89*86bacbd2SBarry Smith   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax);
90*86bacbd2SBarry Smith   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
91*86bacbd2SBarry Smith   if (info->fill == PETSC_DEFAULT)    info->fill    = (n*info->dtcount)/a->nz;
92*86bacbd2SBarry Smith   lfill   = info->dtcount/2;
93*86bacbd2SBarry Smith   jmax    = info->fill*a->nz;
94*86bacbd2SBarry Smith   permtol = info->dtcol;
95*86bacbd2SBarry Smith 
96*86bacbd2SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
97*86bacbd2SBarry Smith   ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr);
98*86bacbd2SBarry Smith 
99*86bacbd2SBarry Smith 
100*86bacbd2SBarry Smith   /* ------------------------------------------------------------
101*86bacbd2SBarry Smith      If reorder=.TRUE., then the original matrix has to be
102*86bacbd2SBarry Smith      reordered to reflect the user selected ordering scheme, and
103*86bacbd2SBarry Smith      then de-reordered so it is in it's original format.
104*86bacbd2SBarry Smith      Because Saad's dperm() is NOT in place, we have to copy
105*86bacbd2SBarry Smith      the original matrix and allocate more storage. . .
106*86bacbd2SBarry Smith      ------------------------------------------------------------
107*86bacbd2SBarry Smith   */
108*86bacbd2SBarry Smith 
109*86bacbd2SBarry Smith   /* set reorder to true if either isrow or iscol is not identity */
110*86bacbd2SBarry Smith   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
111*86bacbd2SBarry Smith   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
112*86bacbd2SBarry Smith   reorder = PetscNot(reorder);
113*86bacbd2SBarry Smith 
114*86bacbd2SBarry Smith   ierr = ISGetIndices(iscol,&c);           CHKERRQ(ierr);
115*86bacbd2SBarry Smith   ierr = ISGetIndices(isrow,&r);           CHKERRQ(ierr);
116*86bacbd2SBarry Smith   ierr = ISGetIndices(isicol,&ic);          CHKERRQ(ierr);
117*86bacbd2SBarry Smith   ierr = ISGetIndices(isirow,&ir);          CHKERRQ(ierr);
118*86bacbd2SBarry Smith 
119*86bacbd2SBarry Smith   /* storage for ilu factor */
120*86bacbd2SBarry Smith   new_i = (int *)    PetscMalloc((n+1)*sizeof(int));   CHKPTRQ(new_i);
121*86bacbd2SBarry Smith   new_j = (int *)    PetscMalloc(jmax*sizeof(int));    CHKPTRQ(new_j);
122*86bacbd2SBarry Smith   new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a);
123*86bacbd2SBarry Smith 
124*86bacbd2SBarry Smith   if (reorder) {
125*86bacbd2SBarry Smith     old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2);
126*86bacbd2SBarry Smith     old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2);
127*86bacbd2SBarry Smith     old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2);
128*86bacbd2SBarry Smith   }
129*86bacbd2SBarry Smith 
130*86bacbd2SBarry Smith   ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol);
131*86bacbd2SBarry Smith   ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow);
132*86bacbd2SBarry Smith 
133*86bacbd2SBarry Smith   /* ------------------------------------------------------------
134*86bacbd2SBarry Smith      Make sure that everything is Fortran formatted (1-Based)
135*86bacbd2SBarry Smith      ------------------------------------------------------------
136*86bacbd2SBarry Smith   */
137*86bacbd2SBarry Smith   for (i=old_i[0]-shift;i<old_i[n]-shift;i++) {
138*86bacbd2SBarry Smith     old_j[i] = old_j[i]+ishift;
139*86bacbd2SBarry Smith     if (reorder) {
140*86bacbd2SBarry Smith       old_j2[i] = old_j[i];
141*86bacbd2SBarry Smith       old_a2[i] = old_a[i];
142*86bacbd2SBarry Smith     }
143*86bacbd2SBarry Smith   };
144*86bacbd2SBarry Smith 
145*86bacbd2SBarry Smith   for(i=0;i<n+1;i++) {
146*86bacbd2SBarry Smith     old_i[i] = old_i[i]+ishift;
147*86bacbd2SBarry Smith     if (reorder) old_i2[i]=old_i[i];
148*86bacbd2SBarry Smith   };
149*86bacbd2SBarry Smith 
150*86bacbd2SBarry Smith   for(i=0;i<n;i++) {
151*86bacbd2SBarry Smith     r[i]  = r[i]+1;
152*86bacbd2SBarry Smith     c[i]  = c[i]+1;
153*86bacbd2SBarry Smith     ir[i] = ir[i]+1;
154*86bacbd2SBarry Smith     ic[i] = ic[i]+1;
155*86bacbd2SBarry Smith   }
156*86bacbd2SBarry Smith 
157*86bacbd2SBarry Smith   if (reorder) {
158*86bacbd2SBarry Smith     job = 3;
159*86bacbd2SBarry Smith     dperm_(&n,old_a2,old_j2,old_i2,old_a,old_j,old_i,r,c,&job);
160*86bacbd2SBarry Smith   }
161*86bacbd2SBarry Smith 
162*86bacbd2SBarry Smith   /* ------------------------------------------------------------
163*86bacbd2SBarry Smith      Call Saad's ilutp() routine to generate the factorization
164*86bacbd2SBarry Smith      ------------------------------------------------------------
165*86bacbd2SBarry Smith   */
166*86bacbd2SBarry Smith 
167*86bacbd2SBarry Smith   iperm   = (int *)    PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm);
168*86bacbd2SBarry Smith   jw      = (int *)    PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw);
169*86bacbd2SBarry Smith   w       = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w);
170*86bacbd2SBarry Smith 
171*86bacbd2SBarry Smith 
172*86bacbd2SBarry Smith   ilutp_(&n,old_a,old_j,old_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
173*86bacbd2SBarry Smith   if (ierr) {
174*86bacbd2SBarry Smith     switch (ierr) {
175*86bacbd2SBarry Smith       case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax);
176*86bacbd2SBarry Smith                break;
177*86bacbd2SBarry Smith       case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax);
178*86bacbd2SBarry Smith                break;
179*86bacbd2SBarry Smith       case -5: SETERRQ(1,1,"ilutp(), zero row encountered");
180*86bacbd2SBarry Smith                break;
181*86bacbd2SBarry Smith       case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong");
182*86bacbd2SBarry Smith                break;
183*86bacbd2SBarry Smith       case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax);
184*86bacbd2SBarry Smith                break;
185*86bacbd2SBarry Smith       default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr);
186*86bacbd2SBarry Smith     }
187*86bacbd2SBarry Smith   }
188*86bacbd2SBarry Smith 
189*86bacbd2SBarry Smith   ierr = PetscFree(w);CHKERRQ(ierr);
190*86bacbd2SBarry Smith   ierr = PetscFree(jw);CHKERRQ(ierr);
191*86bacbd2SBarry Smith 
192*86bacbd2SBarry Smith 
193*86bacbd2SBarry Smith   /* ------------------------------------------------------------
194*86bacbd2SBarry Smith      Saad's routine gives the result in Modified Sparse Row (msr)
195*86bacbd2SBarry Smith      Convert to Compressed Sparse Row format (csr)
196*86bacbd2SBarry Smith      ------------------------------------------------------------
197*86bacbd2SBarry Smith   */
198*86bacbd2SBarry Smith 
199*86bacbd2SBarry Smith   wk  = (Scalar *)    PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk);
200*86bacbd2SBarry Smith   iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk);
201*86bacbd2SBarry Smith 
202*86bacbd2SBarry Smith   msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
203*86bacbd2SBarry Smith 
204*86bacbd2SBarry Smith   ierr = PetscFree(iwk);CHKERRQ(ierr);
205*86bacbd2SBarry Smith   ierr = PetscFree(wk);CHKERRQ(ierr);
206*86bacbd2SBarry Smith 
207*86bacbd2SBarry Smith   for(i=old_i[0]; i<=old_i[n]; i++) {
208*86bacbd2SBarry Smith     old_j[i-1] = iperm[old_j[i-1]-1];
209*86bacbd2SBarry Smith     if (reorder) {
210*86bacbd2SBarry Smith       old_j2[i-1] = old_j[i-1];
211*86bacbd2SBarry Smith       old_a2[i-1] = old_a[i-1];
212*86bacbd2SBarry Smith     }
213*86bacbd2SBarry Smith   };
214*86bacbd2SBarry Smith 
215*86bacbd2SBarry Smith   if (reorder) {
216*86bacbd2SBarry Smith     for(i=0; i<n+1; i++) {
217*86bacbd2SBarry Smith       old_i2[i]=old_i[i];
218*86bacbd2SBarry Smith     };
219*86bacbd2SBarry Smith 
220*86bacbd2SBarry Smith     job = 3;
221*86bacbd2SBarry Smith     dperm_(&n,old_a2,old_j2,old_i2,old_a,old_j,old_i,ir,ic,&job);
222*86bacbd2SBarry Smith     ierr = PetscFree(old_a2);CHKERRQ(ierr);
223*86bacbd2SBarry Smith     ierr = PetscFree(old_j2);CHKERRQ(ierr);
224*86bacbd2SBarry Smith     ierr = PetscFree(old_i2);CHKERRQ(ierr);
225*86bacbd2SBarry Smith   }
226*86bacbd2SBarry Smith 
227*86bacbd2SBarry Smith   /* get rid of the shift to incies statrting at 1 */
228*86bacbd2SBarry Smith   for (i=0; i<n+1; i++) {
229*86bacbd2SBarry Smith     old_i[i] = old_i[i]-ishift;
230*86bacbd2SBarry Smith   }
231*86bacbd2SBarry Smith 
232*86bacbd2SBarry Smith   for (i=old_i[0]-shift;i<=old_i[n]-shift;i++) {
233*86bacbd2SBarry Smith     old_j[i-1] = old_j[i-1]-ishift;
234*86bacbd2SBarry Smith   }
235*86bacbd2SBarry Smith 
236*86bacbd2SBarry Smith   /* Make the return matrix 0-based */
237*86bacbd2SBarry Smith 
238*86bacbd2SBarry Smith   for (i=new_i[0]-shift;i<=new_i[n]-shift;i++) {
239*86bacbd2SBarry Smith     new_j[i-1] = new_j[i-1]-ishift;
240*86bacbd2SBarry Smith   }
241*86bacbd2SBarry Smith 
242*86bacbd2SBarry Smith   for (i=0; i<n+1; i++) {
243*86bacbd2SBarry Smith     new_i[i] = new_i[i]-ishift;
244*86bacbd2SBarry Smith   }
245*86bacbd2SBarry Smith 
246*86bacbd2SBarry Smith   for (i=0;i<n;i++) {
247*86bacbd2SBarry Smith     r[i]  = r[i]-1;
248*86bacbd2SBarry Smith     c[i]  = c[i]-1;
249*86bacbd2SBarry Smith     ir[i] = ir[i]-1;
250*86bacbd2SBarry Smith     ic[i] = ic[i]-1;
251*86bacbd2SBarry Smith   }
252*86bacbd2SBarry Smith 
253*86bacbd2SBarry Smith   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
254*86bacbd2SBarry Smith   /*-- permute the right-hand-side and solution vectors           --*/
255*86bacbd2SBarry Smith   for(i=0; i<n; i++) {
256*86bacbd2SBarry Smith     ordcol[i] = ic[iperm[i]-1];
257*86bacbd2SBarry Smith     ordrow[i] = ir[i];
258*86bacbd2SBarry Smith   };
259*86bacbd2SBarry Smith 
260*86bacbd2SBarry Smith   ierr = PetscFree(iperm);CHKERRQ(ierr);
261*86bacbd2SBarry Smith 
262*86bacbd2SBarry Smith   ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr);
263*86bacbd2SBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
264*86bacbd2SBarry Smith 
265*86bacbd2SBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
266*86bacbd2SBarry Smith   ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr);
267*86bacbd2SBarry Smith 
268*86bacbd2SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf);
269*86bacbd2SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf);
270*86bacbd2SBarry Smith 
271*86bacbd2SBarry Smith   /*----- put together the new matrix -----*/
272*86bacbd2SBarry Smith 
273*86bacbd2SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
274*86bacbd2SBarry Smith   (*fact)->factor    = FACTOR_LU;
275*86bacbd2SBarry Smith   (*fact)->assembled = PETSC_TRUE;
276*86bacbd2SBarry Smith 
277*86bacbd2SBarry Smith   b = (Mat_SeqAIJ *) (*fact)->data;
278*86bacbd2SBarry Smith   ierr = PetscFree(b->imax);CHKERRQ(ierr);
279*86bacbd2SBarry Smith   b->sorted        = PETSC_FALSE;
280*86bacbd2SBarry Smith   b->singlemalloc  = PETSC_TRUE;
281*86bacbd2SBarry Smith   /* the next line frees the default space generated by the Create() */
282*86bacbd2SBarry Smith   ierr             = PetscFree(b->a);CHKERRQ(ierr);
283*86bacbd2SBarry Smith   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
284*86bacbd2SBarry Smith   b->a             = new_a;
285*86bacbd2SBarry Smith   b->j             = new_j;
286*86bacbd2SBarry Smith   b->i             = new_i;
287*86bacbd2SBarry Smith   b->ilen          = 0;
288*86bacbd2SBarry Smith   b->imax          = 0;
289*86bacbd2SBarry Smith   b->row           = isrowf;
290*86bacbd2SBarry Smith   b->col           = iscolf;
291*86bacbd2SBarry Smith   b->solve_work    =  (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
292*86bacbd2SBarry Smith   b->maxnz = b->nz = new_i[n];
293*86bacbd2SBarry Smith   b->indexshift    = a->indexshift;
294*86bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
295*86bacbd2SBarry Smith   (*fact)->info.factor_mallocs = 0;
296*86bacbd2SBarry Smith 
297*86bacbd2SBarry Smith   PLogFlops(b->n);
298*86bacbd2SBarry Smith 
299*86bacbd2SBarry Smith   a->j      = old_j;
300*86bacbd2SBarry Smith   a->i      = old_i;
301*86bacbd2SBarry Smith   a->a      = old_a;
302*86bacbd2SBarry Smith   a->sorted = PETSC_FALSE;
303*86bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
304*86bacbd2SBarry Smith 
305*86bacbd2SBarry Smith   /* check out for identical nodes. If found, use inode functions */
306*86bacbd2SBarry Smith   ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr);
307*86bacbd2SBarry Smith 
308*86bacbd2SBarry Smith   PetscFunctionReturn(0);
309*86bacbd2SBarry Smith }
310*86bacbd2SBarry Smith #else
311*86bacbd2SBarry Smith #undef __FUNC__
312*86bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ"
313*86bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,double dt,int maxnz,IS isrow,IS iscol,Mat *fact)
314*86bacbd2SBarry Smith {
315*86bacbd2SBarry Smith   PetscFunctionBegin;
316*86bacbd2SBarry Smith   SETERRQ(1,1,"You must install Saad's ILUDT to use this");
317*86bacbd2SBarry Smith }
318*86bacbd2SBarry Smith #endif
319*86bacbd2SBarry Smith 
320289bc588SBarry Smith /*
321289bc588SBarry Smith     Factorization code for AIJ format.
322289bc588SBarry Smith */
3235615d1e5SSatish Balay #undef __FUNC__
3245615d1e5SSatish Balay #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
325416022c9SBarry Smith int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
326289bc588SBarry Smith {
327416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
328289bc588SBarry Smith   IS         isicol;
329416022c9SBarry Smith   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
330416022c9SBarry Smith   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
33191bf9adeSBarry Smith   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
332289bc588SBarry Smith 
3333a40ed3dSBarry Smith   PetscFunctionBegin;
334d3cbd50cSLois Curfman McInnes   PetscValidHeaderSpecific(isrow,IS_COOKIE);
335d3cbd50cSLois Curfman McInnes   PetscValidHeaderSpecific(iscol,IS_COOKIE);
336f1af5d2fSBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square");
3373b2fbd54SBarry Smith 
33878b31e54SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
339ac121b3dSBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
340ac121b3dSBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
341289bc588SBarry Smith 
342289bc588SBarry Smith   /* get new row pointers */
3430452661fSBarry Smith   ainew    = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
344dbb450caSBarry Smith   ainew[0] = -shift;
345289bc588SBarry Smith   /* don't know how many column pointers are needed so estimate */
346dbb450caSBarry Smith   jmax  = (int) (f*ai[n]+(!shift));
3470452661fSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
348289bc588SBarry Smith   /* fill is a linked list of nonzeros in active row */
3490452661fSBarry Smith   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill);
3502fb3ed76SBarry Smith   im   = fill + n + 1;
351289bc588SBarry Smith   /* idnew is location of diagonal in factor */
3520452661fSBarry Smith   idnew    = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew);
353dbb450caSBarry Smith   idnew[0] = -shift;
354289bc588SBarry Smith 
355289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
356289bc588SBarry Smith     /* first copy previous fill into linked list */
357289bc588SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
3586b96ef82SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
359dbb450caSBarry Smith     ajtmp   = aj + ai[r[i]] + shift;
360289bc588SBarry Smith     fill[n] = n;
361289bc588SBarry Smith     while (nz--) {
362289bc588SBarry Smith       fm  = n;
363dbb450caSBarry Smith       idx = ic[*ajtmp++ + shift];
364289bc588SBarry Smith       do {
365289bc588SBarry Smith         m  = fm;
366289bc588SBarry Smith         fm = fill[m];
367289bc588SBarry Smith       } while (fm < idx);
368289bc588SBarry Smith       fill[m]   = idx;
369289bc588SBarry Smith       fill[idx] = fm;
370289bc588SBarry Smith     }
371289bc588SBarry Smith     row = fill[n];
372289bc588SBarry Smith     while ( row < i ) {
373dbb450caSBarry Smith       ajtmp = ajnew + idnew[row] + (!shift);
3742fb3ed76SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
3752fb3ed76SBarry Smith       nz    = im[row] - nzbd;
376289bc588SBarry Smith       fm    = row;
3772fb3ed76SBarry Smith       while (nz-- > 0) {
378dbb450caSBarry Smith         idx = *ajtmp++ + shift;
3792fb3ed76SBarry Smith         nzbd++;
3802fb3ed76SBarry Smith         if (idx == i) im[row] = nzbd;
381289bc588SBarry Smith         do {
382289bc588SBarry Smith           m  = fm;
383289bc588SBarry Smith           fm = fill[m];
384289bc588SBarry Smith         } while (fm < idx);
385289bc588SBarry Smith         if (fm != idx) {
386289bc588SBarry Smith           fill[m]   = idx;
387289bc588SBarry Smith           fill[idx] = fm;
388289bc588SBarry Smith           fm        = idx;
389289bc588SBarry Smith           nnz++;
390289bc588SBarry Smith         }
391289bc588SBarry Smith       }
392289bc588SBarry Smith       row = fill[row];
393289bc588SBarry Smith     }
394289bc588SBarry Smith     /* copy new filled row into permanent storage */
395289bc588SBarry Smith     ainew[i+1] = ainew[i] + nnz;
396496e697eSBarry Smith     if (ainew[i+1] > jmax) {
397ecf371e4SBarry Smith 
398ecf371e4SBarry Smith       /* estimate how much additional space we will need */
399ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
400ecf371e4SBarry Smith       /* just double the memory each time */
401ecf371e4SBarry Smith       int maxadd = jmax;
402ecf371e4SBarry Smith       /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */
403bbb6d6a8SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
4042fb3ed76SBarry Smith       jmax += maxadd;
405ecf371e4SBarry Smith 
406ecf371e4SBarry Smith       /* allocate a longer ajnew */
4070452661fSBarry Smith       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
408549d3d68SSatish Balay       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr);
409606d414cSSatish Balay       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
410289bc588SBarry Smith       ajnew = ajtmp;
4112fb3ed76SBarry Smith       realloc++; /* count how many times we realloc */
412289bc588SBarry Smith     }
413dbb450caSBarry Smith     ajtmp = ajnew + ainew[i] + shift;
414289bc588SBarry Smith     fm    = fill[n];
415289bc588SBarry Smith     nzi   = 0;
4162fb3ed76SBarry Smith     im[i] = nnz;
417289bc588SBarry Smith     while (nnz--) {
418289bc588SBarry Smith       if (fm < i) nzi++;
419dbb450caSBarry Smith       *ajtmp++ = fm - shift;
420289bc588SBarry Smith       fm       = fill[fm];
421289bc588SBarry Smith     }
422289bc588SBarry Smith     idnew[i] = ainew[i] + nzi;
423289bc588SBarry Smith   }
424464e8d44SSatish Balay   if (ai[n] != 0) {
425464e8d44SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
426c38d4ed2SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
427981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
428981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
429981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
43005bf559cSSatish Balay   } else {
431981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
43205bf559cSSatish Balay   }
4332fb3ed76SBarry Smith 
434898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
435898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
4361987afe7SBarry Smith 
437606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
438289bc588SBarry Smith 
439289bc588SBarry Smith   /* put together the new matrix */
440b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr);
4411987afe7SBarry Smith   PLogObjectParent(*B,isicol);
442416022c9SBarry Smith   b = (Mat_SeqAIJ *) (*B)->data;
443606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
4447c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
445e8d4e0b9SBarry Smith   /* the next line frees the default space generated by the Create() */
446606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
447606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
44891bf9adeSBarry Smith   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
449416022c9SBarry Smith   b->j          = ajnew;
450416022c9SBarry Smith   b->i          = ainew;
451416022c9SBarry Smith   b->diag       = idnew;
452416022c9SBarry Smith   b->ilen       = 0;
453416022c9SBarry Smith   b->imax       = 0;
454416022c9SBarry Smith   b->row        = isrow;
455416022c9SBarry Smith   b->col        = iscol;
456c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
457c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
45882bf6240SBarry Smith   b->icol       = isicol;
45991bf9adeSBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
460416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
4617fd98bd6SLois Curfman McInnes      Allocate idnew, solve_work, new a, new j */
462416022c9SBarry Smith   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
463416022c9SBarry Smith   b->maxnz = b->nz = ainew[n] + shift;
4647fd98bd6SLois Curfman McInnes 
465e93ccc4dSBarry Smith   (*B)->factor                 =  FACTOR_LU;;
466ae068f58SLois Curfman McInnes   (*B)->info.factor_mallocs    = realloc;
467ae068f58SLois Curfman McInnes   (*B)->info.fill_ratio_given  = f;
468703deb49SSatish Balay   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant if A has inodes */
469703deb49SSatish Balay 
470e93ccc4dSBarry Smith   if (ai[n] != 0) {
471e93ccc4dSBarry Smith     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]);
472eea2913aSSatish Balay   } else {
473eea2913aSSatish Balay     (*B)->info.fill_ratio_needed = 0.0;
474eea2913aSSatish Balay   }
4753a40ed3dSBarry Smith   PetscFunctionReturn(0);
476289bc588SBarry Smith }
4770a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
478184914b5SBarry Smith extern int Mat_AIJ_CheckInode(Mat);
47941c01911SSatish Balay 
4805615d1e5SSatish Balay #undef __FUNC__
4815615d1e5SSatish Balay #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
482416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
483289bc588SBarry Smith {
484416022c9SBarry Smith   Mat        C = *B;
485416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
48682bf6240SBarry Smith   IS         isrow = b->row, isicol = b->icol;
487416022c9SBarry Smith   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
4881987afe7SBarry Smith   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
489f2582111SSatish Balay   int        *diag_offset = b->diag,diag,k;
49035aab85fSBarry Smith   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
4913a40ed3dSBarry Smith   register   int    *pj;
4928ecf7165SBarry Smith   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
49335aab85fSBarry Smith   double     ssum;
49435aab85fSBarry Smith   register   Scalar *pv, *rtmps,*u_values;
495289bc588SBarry Smith 
4963a40ed3dSBarry Smith   PetscFunctionBegin;
49782bf6240SBarry Smith 
49878b31e54SBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
49978b31e54SBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
5000452661fSBarry Smith   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp);
501549d3d68SSatish Balay   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr);
5020cf60383SBarry Smith   rtmps = rtmp + shift; ics = ic + shift;
503289bc588SBarry Smith 
5046cf997b0SBarry Smith   /* precalculate row sums */
50535aab85fSBarry Smith   if (preserve_row_sums) {
50635aab85fSBarry Smith     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums);
50735aab85fSBarry Smith     for ( i=0; i<n; i++ ) {
50835aab85fSBarry Smith       nz  = a->i[r[i]+1] - a->i[r[i]];
50935aab85fSBarry Smith       v   = a->a + a->i[r[i]] + shift;
51035aab85fSBarry Smith       sum = 0.0;
51135aab85fSBarry Smith       for ( j=0; j<nz; j++ ) sum += v[j];
51235aab85fSBarry Smith       rowsums[i] = sum;
51335aab85fSBarry Smith     }
51435aab85fSBarry Smith   }
51535aab85fSBarry Smith 
516289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
517289bc588SBarry Smith     nz    = ai[i+1] - ai[i];
518dbb450caSBarry Smith     ajtmp = aj + ai[i] + shift;
51948e94559SBarry Smith     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
520289bc588SBarry Smith 
521289bc588SBarry Smith     /* load in initial (unfactored row) */
522416022c9SBarry Smith     nz       = a->i[r[i]+1] - a->i[r[i]];
523416022c9SBarry Smith     ajtmpold = a->j + a->i[r[i]] + shift;
524416022c9SBarry Smith     v        = a->a + a->i[r[i]] + shift;
5250cf60383SBarry Smith     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
526289bc588SBarry Smith 
527dbb450caSBarry Smith     row = *ajtmp++ + shift;
528f2582111SSatish Balay     while  (row < i ) {
5298c37ef55SBarry Smith       pc = rtmp + row;
5308c37ef55SBarry Smith       if (*pc != 0.0) {
5311987afe7SBarry Smith         pv         = b->a + diag_offset[row] + shift;
5321987afe7SBarry Smith         pj         = b->j + diag_offset[row] + (!shift);
53335aab85fSBarry Smith         multiplier = *pc / *pv++;
5348c37ef55SBarry Smith         *pc        = multiplier;
535f2582111SSatish Balay         nz         = ai[row+1] - diag_offset[row] - 1;
536f2582111SSatish Balay         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
537f2582111SSatish Balay         PLogFlops(2*nz);
538289bc588SBarry Smith       }
539dbb450caSBarry Smith       row = *ajtmp++ + shift;
540289bc588SBarry Smith     }
541416022c9SBarry Smith     /* finished row so stick it into b->a */
542416022c9SBarry Smith     pv = b->a + ai[i] + shift;
543416022c9SBarry Smith     pj = b->j + ai[i] + shift;
5448c37ef55SBarry Smith     nz = ai[i+1] - ai[i];
545416022c9SBarry Smith     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
54635aab85fSBarry Smith     diag = diag_offset[i] - ai[i];
54735aab85fSBarry Smith     /*
54835aab85fSBarry Smith           Possibly adjust diagonal entry on current row to force
54935aab85fSBarry Smith         LU matrix to have same row sum as initial matrix.
55035aab85fSBarry Smith     */
551d708749eSBarry Smith     if (pv[diag] == 0.0) {
5526cf997b0SBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i);
553d708749eSBarry Smith     }
55435aab85fSBarry Smith     if (preserve_row_sums) {
55535aab85fSBarry Smith       pj  = b->j + ai[i] + shift;
55635aab85fSBarry Smith       sum = rowsums[i];
55735aab85fSBarry Smith       for ( j=0; j<diag; j++ ) {
55835aab85fSBarry Smith         u_values  = b->a + diag_offset[pj[j]] + shift;
55935aab85fSBarry Smith         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
56035aab85fSBarry Smith         inner_sum = 0.0;
56135aab85fSBarry Smith         for ( k=0; k<nz; k++ ) {
56235aab85fSBarry Smith           inner_sum += u_values[k];
56335aab85fSBarry Smith         }
56435aab85fSBarry Smith         sum -= pv[j]*inner_sum;
56535aab85fSBarry Smith 
56635aab85fSBarry Smith       }
56735aab85fSBarry Smith       nz       = ai[i+1] - diag_offset[i] - 1;
56835aab85fSBarry Smith       u_values = b->a + diag_offset[i] + 1 + shift;
56935aab85fSBarry Smith       for ( k=0; k<nz; k++ ) {
57035aab85fSBarry Smith         sum -= u_values[k];
57135aab85fSBarry Smith       }
57235aab85fSBarry Smith       ssum = PetscAbsScalar(sum/pv[diag]);
57335aab85fSBarry Smith       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
57435aab85fSBarry Smith     }
57535aab85fSBarry Smith     /* check pivot entry for current row */
5768c37ef55SBarry Smith   }
5770f11f9aeSSatish Balay 
57835aab85fSBarry Smith   /* invert diagonal entries for simplier triangular solves */
57935aab85fSBarry Smith   for ( i=0; i<n; i++ ) {
58035aab85fSBarry Smith     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
58135aab85fSBarry Smith   }
58235aab85fSBarry Smith 
583606d414cSSatish Balay   if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);}
584606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
58578b31e54SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
58678b31e54SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
587416022c9SBarry Smith   C->factor = FACTOR_LU;
58841c01911SSatish Balay   ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr);
589c456f294SBarry Smith   C->assembled = PETSC_TRUE;
590416022c9SBarry Smith   PLogFlops(b->n);
5913a40ed3dSBarry Smith   PetscFunctionReturn(0);
592289bc588SBarry Smith }
5930a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
5945615d1e5SSatish Balay #undef __FUNC__
5955615d1e5SSatish Balay #define __FUNC__ "MatLUFactor_SeqAIJ"
596416022c9SBarry Smith int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
597da3a660dSBarry Smith {
598416022c9SBarry Smith   Mat_SeqAIJ     *mat = (Mat_SeqAIJ *) A->data;
599*86bacbd2SBarry Smith   int            ierr,refct;
600416022c9SBarry Smith   Mat            C;
601f830108cSBarry Smith   PetscOps       *Abops;
6020a6ffc59SBarry Smith   MatOps         Aops;
603416022c9SBarry Smith 
6043a40ed3dSBarry Smith   PetscFunctionBegin;
605f2582111SSatish Balay   ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr);
606f2582111SSatish Balay   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
607da3a660dSBarry Smith 
608da3a660dSBarry Smith   /* free all the data structures from mat */
609606d414cSSatish Balay   ierr = PetscFree(mat->a);CHKERRQ(ierr);
610606d414cSSatish Balay   if (!mat->singlemalloc) {
611606d414cSSatish Balay     ierr = PetscFree(mat->i);CHKERRQ(ierr);
612606d414cSSatish Balay     ierr = PetscFree(mat->j);CHKERRQ(ierr);
613606d414cSSatish Balay   }
614606d414cSSatish Balay   if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);}
615606d414cSSatish Balay   if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);}
616606d414cSSatish Balay   if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);}
617606d414cSSatish Balay   if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);}
618606d414cSSatish Balay   if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);}
6190f0aacb9SBarry Smith   if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);}
620606d414cSSatish Balay   ierr = PetscFree(mat);CHKERRQ(ierr);
621da3a660dSBarry Smith 
62217642b18SBarry Smith   ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
62317642b18SBarry Smith   ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
62417642b18SBarry Smith 
625f830108cSBarry Smith   /*
626f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
627f830108cSBarry Smith     A pointers for the bops and ops but copy everything
628f830108cSBarry Smith     else from C.
629f830108cSBarry Smith   */
630f830108cSBarry Smith   Abops = A->bops;
631f830108cSBarry Smith   Aops  = A->ops;
632*86bacbd2SBarry Smith   refct = A->refct;
633549d3d68SSatish Balay   ierr  = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
634451c4af8SSatish Balay   mat   = (Mat_SeqAIJ *) A->data;
635451c4af8SSatish Balay   PLogObjectParent(A,mat->icol);
636451c4af8SSatish Balay 
637f830108cSBarry Smith   A->bops  = Abops;
638f830108cSBarry Smith   A->ops   = Aops;
639bef8e0ddSBarry Smith   A->qlist = 0;
640*86bacbd2SBarry Smith   A->refct = refct;
641c38d4ed2SBarry Smith   /* copy over the type_name and name */
642c38d4ed2SBarry Smith   ierr     = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr);
643c38d4ed2SBarry Smith   ierr     = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr);
644f830108cSBarry Smith 
6450452661fSBarry Smith   PetscHeaderDestroy(C);
646c38d4ed2SBarry Smith 
6473a40ed3dSBarry Smith   PetscFunctionReturn(0);
648da3a660dSBarry Smith }
6490a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
6505615d1e5SSatish Balay #undef __FUNC__
6515615d1e5SSatish Balay #define __FUNC__ "MatSolve_SeqAIJ"
652416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
6538c37ef55SBarry Smith {
654416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
655416022c9SBarry Smith   IS         iscol = a->col, isrow = a->row;
656416022c9SBarry Smith   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
6573b2fbd54SBarry Smith   int        nz,shift = a->indexshift,*rout,*cout;
658416022c9SBarry Smith   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
6598c37ef55SBarry Smith 
6603a40ed3dSBarry Smith   PetscFunctionBegin;
6613a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
66291bf9adeSBarry Smith 
663e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
664e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
665416022c9SBarry Smith   tmp  = a->solve_work;
6668c37ef55SBarry Smith 
6673b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6683b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6698c37ef55SBarry Smith 
6708c37ef55SBarry Smith   /* forward solve the lower triangular */
6718c37ef55SBarry Smith   tmp[0] = b[*r++];
6720cf60383SBarry Smith   tmps   = tmp + shift;
6738c37ef55SBarry Smith   for ( i=1; i<n; i++ ) {
674dbb450caSBarry Smith     v   = aa + ai[i] + shift;
675dbb450caSBarry Smith     vi  = aj + ai[i] + shift;
676416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
6778c37ef55SBarry Smith     sum = b[*r++];
6780cf60383SBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
6798c37ef55SBarry Smith     tmp[i] = sum;
6808c37ef55SBarry Smith   }
6818c37ef55SBarry Smith 
6828c37ef55SBarry Smith   /* backward solve the upper triangular */
6838c37ef55SBarry Smith   for ( i=n-1; i>=0; i-- ){
684416022c9SBarry Smith     v   = aa + a->diag[i] + (!shift);
685416022c9SBarry Smith     vi  = aj + a->diag[i] + (!shift);
686416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
6878c37ef55SBarry Smith     sum = tmp[i];
6880cf60383SBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
689416022c9SBarry Smith     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
6908c37ef55SBarry Smith   }
6918c37ef55SBarry Smith 
6923b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
6933b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
694cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
695cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
696416022c9SBarry Smith   PLogFlops(2*a->nz - a->n);
6973a40ed3dSBarry Smith   PetscFunctionReturn(0);
6988c37ef55SBarry Smith }
699026e39d0SSatish Balay 
700930ae53cSBarry Smith /* ----------------------------------------------------------- */
701930ae53cSBarry Smith #undef __FUNC__
702930ae53cSBarry Smith #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
703930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
704930ae53cSBarry Smith {
705930ae53cSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
706d85afc42SSatish Balay   int        n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr;
707d85afc42SSatish Balay   Scalar     *x,*b, *aa = a->a, sum;
708aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
709d85afc42SSatish Balay   int        adiag_i,i,*vi,nz,ai_i;
710d85afc42SSatish Balay   Scalar     *v;
711d85afc42SSatish Balay #endif
712930ae53cSBarry Smith 
7133a40ed3dSBarry Smith   PetscFunctionBegin;
7143a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
715930ae53cSBarry Smith   if (a->indexshift) {
7163a40ed3dSBarry Smith      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
7173a40ed3dSBarry Smith      PetscFunctionReturn(0);
718930ae53cSBarry Smith   }
719930ae53cSBarry Smith 
720e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
721e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
722930ae53cSBarry Smith 
723aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
7241c4feecaSSatish Balay   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
7251c4feecaSSatish Balay #else
726930ae53cSBarry Smith   /* forward solve the lower triangular */
727930ae53cSBarry Smith   x[0] = b[0];
728930ae53cSBarry Smith   for ( i=1; i<n; i++ ) {
729930ae53cSBarry Smith     ai_i = ai[i];
730930ae53cSBarry Smith     v    = aa + ai_i;
731930ae53cSBarry Smith     vi   = aj + ai_i;
732930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
733930ae53cSBarry Smith     sum  = b[i];
734930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
735930ae53cSBarry Smith     x[i] = sum;
736930ae53cSBarry Smith   }
737930ae53cSBarry Smith 
738930ae53cSBarry Smith   /* backward solve the upper triangular */
739930ae53cSBarry Smith   for ( i=n-1; i>=0; i-- ){
740930ae53cSBarry Smith     adiag_i = adiag[i];
741d708749eSBarry Smith     v       = aa + adiag_i + 1;
742d708749eSBarry Smith     vi      = aj + adiag_i + 1;
743930ae53cSBarry Smith     nz      = ai[i+1] - adiag_i - 1;
744930ae53cSBarry Smith     sum     = x[i];
745930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
746930ae53cSBarry Smith     x[i]    = sum*aa[adiag_i];
747930ae53cSBarry Smith   }
7481c4feecaSSatish Balay #endif
749930ae53cSBarry Smith   PLogFlops(2*a->nz - a->n);
750cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
751cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7523a40ed3dSBarry Smith   PetscFunctionReturn(0);
753930ae53cSBarry Smith }
754930ae53cSBarry Smith 
7555615d1e5SSatish Balay #undef __FUNC__
7565615d1e5SSatish Balay #define __FUNC__ "MatSolveAdd_SeqAIJ"
757416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
758da3a660dSBarry Smith {
759416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
760416022c9SBarry Smith   IS         iscol = a->col, isrow = a->row;
761416022c9SBarry Smith   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
7623b2fbd54SBarry Smith   int        nz, shift = a->indexshift,*rout,*cout;
763416022c9SBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
764da3a660dSBarry Smith 
7653a40ed3dSBarry Smith   PetscFunctionBegin;
76678b31e54SBarry Smith   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
767da3a660dSBarry Smith 
768e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
769e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
770416022c9SBarry Smith   tmp  = a->solve_work;
771da3a660dSBarry Smith 
7723b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7733b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
774da3a660dSBarry Smith 
775da3a660dSBarry Smith   /* forward solve the lower triangular */
776da3a660dSBarry Smith   tmp[0] = b[*r++];
777da3a660dSBarry Smith   for ( i=1; i<n; i++ ) {
778dbb450caSBarry Smith     v   = aa + ai[i] + shift;
779dbb450caSBarry Smith     vi  = aj + ai[i] + shift;
780416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
781da3a660dSBarry Smith     sum = b[*r++];
782dbb450caSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
783da3a660dSBarry Smith     tmp[i] = sum;
784da3a660dSBarry Smith   }
785da3a660dSBarry Smith 
786da3a660dSBarry Smith   /* backward solve the upper triangular */
787da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
788416022c9SBarry Smith     v   = aa + a->diag[i] + (!shift);
789416022c9SBarry Smith     vi  = aj + a->diag[i] + (!shift);
790416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
791da3a660dSBarry Smith     sum = tmp[i];
792dbb450caSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
793416022c9SBarry Smith     tmp[i] = sum*aa[a->diag[i]+shift];
794da3a660dSBarry Smith     x[*c--] += tmp[i];
795da3a660dSBarry Smith   }
796da3a660dSBarry Smith 
7973b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
7983b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
799cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
800cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
801416022c9SBarry Smith   PLogFlops(2*a->nz);
802898183ecSLois Curfman McInnes 
8033a40ed3dSBarry Smith   PetscFunctionReturn(0);
804da3a660dSBarry Smith }
805da3a660dSBarry Smith /* -------------------------------------------------------------------*/
8065615d1e5SSatish Balay #undef __FUNC__
8077c922b88SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqAIJ"
8087c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx)
809da3a660dSBarry Smith {
810416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
8112235e667SBarry Smith   IS         iscol = a->col, isrow = a->row;
812416022c9SBarry Smith   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
813f1af5d2fSBarry Smith   int        nz,shift = a->indexshift,*rout,*cout, *diag = a->diag;
814f1af5d2fSBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, *v, s1;
815da3a660dSBarry Smith 
8163a40ed3dSBarry Smith   PetscFunctionBegin;
817e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
818e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819416022c9SBarry Smith   tmp  = a->solve_work;
820da3a660dSBarry Smith 
8212235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
8222235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
823da3a660dSBarry Smith 
824da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
8252235e667SBarry Smith   for ( i=0; i<n; i++ ) tmp[i] = b[c[i]];
826da3a660dSBarry Smith 
827da3a660dSBarry Smith   /* forward solve the U^T */
828da3a660dSBarry Smith   for ( i=0; i<n; i++ ) {
829f1af5d2fSBarry Smith     v   = aa + diag[i] + shift;
830f1af5d2fSBarry Smith     vi  = aj + diag[i] + (!shift);
831f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
832c38d4ed2SBarry Smith     s1  = tmp[i];
833c38d4ed2SBarry Smith     s1 *= *(v++);  /* multiply by inverse of diagonal entry */
834da3a660dSBarry Smith     while (nz--) {
835f1af5d2fSBarry Smith       tmp[*vi++ + shift] -= (*v++)*s1;
836da3a660dSBarry Smith     }
837c38d4ed2SBarry Smith     tmp[i] = s1;
838da3a660dSBarry Smith   }
839da3a660dSBarry Smith 
840da3a660dSBarry Smith   /* backward solve the L^T */
841da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
842f1af5d2fSBarry Smith     v   = aa + diag[i] - 1 + shift;
843f1af5d2fSBarry Smith     vi  = aj + diag[i] - 1 + shift;
844f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
845f1af5d2fSBarry Smith     s1  = tmp[i];
846da3a660dSBarry Smith     while (nz--) {
847f1af5d2fSBarry Smith       tmp[*vi-- + shift] -= (*v--)*s1;
848da3a660dSBarry Smith     }
849da3a660dSBarry Smith   }
850da3a660dSBarry Smith 
851da3a660dSBarry Smith   /* copy tmp into x according to permutation */
852da3a660dSBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
853da3a660dSBarry Smith 
8542235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8552235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
856cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
857cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
858da3a660dSBarry Smith 
859416022c9SBarry Smith   PLogFlops(2*a->nz-a->n);
8603a40ed3dSBarry Smith   PetscFunctionReturn(0);
861da3a660dSBarry Smith }
862da3a660dSBarry Smith 
8635615d1e5SSatish Balay #undef __FUNC__
8647c922b88SBarry Smith #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ"
8657c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
866da3a660dSBarry Smith {
867416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
8682235e667SBarry Smith   IS         iscol = a->col, isrow = a->row;
869416022c9SBarry Smith   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
870f1af5d2fSBarry Smith   int        nz,shift = a->indexshift, *rout, *cout, *diag = a->diag;
871416022c9SBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, *v;
8726abc6512SBarry Smith 
8733a40ed3dSBarry Smith   PetscFunctionBegin;
8746abc6512SBarry Smith   if (zz != xx) VecCopy(zz,xx);
8756abc6512SBarry Smith 
876e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
877e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
878416022c9SBarry Smith   tmp = a->solve_work;
8796abc6512SBarry Smith 
8802235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
8812235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
8826abc6512SBarry Smith 
8836abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
8842235e667SBarry Smith   for ( i=0; i<n; i++ ) tmp[i] = b[c[i]];
8856abc6512SBarry Smith 
8866abc6512SBarry Smith   /* forward solve the U^T */
8876abc6512SBarry Smith   for ( i=0; i<n; i++ ) {
888f1af5d2fSBarry Smith     v   = aa + diag[i] + shift;
889f1af5d2fSBarry Smith     vi  = aj + diag[i] + (!shift);
890f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
8916abc6512SBarry Smith     tmp[i] *= *v++;
8926abc6512SBarry Smith     while (nz--) {
893dbb450caSBarry Smith       tmp[*vi++ + shift] -= (*v++)*tmp[i];
8946abc6512SBarry Smith     }
8956abc6512SBarry Smith   }
8966abc6512SBarry Smith 
8976abc6512SBarry Smith   /* backward solve the L^T */
8986abc6512SBarry Smith   for ( i=n-1; i>=0; i-- ){
899f1af5d2fSBarry Smith     v   = aa + diag[i] - 1 + shift;
900f1af5d2fSBarry Smith     vi  = aj + diag[i] - 1 + shift;
901f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
9026abc6512SBarry Smith     while (nz--) {
903dbb450caSBarry Smith       tmp[*vi-- + shift] -= (*v--)*tmp[i];
9046abc6512SBarry Smith     }
9056abc6512SBarry Smith   }
9066abc6512SBarry Smith 
9076abc6512SBarry Smith   /* copy tmp into x according to permutation */
9086abc6512SBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
9096abc6512SBarry Smith 
9102235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
9112235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
912cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
913cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9146abc6512SBarry Smith 
915416022c9SBarry Smith   PLogFlops(2*a->nz);
9163a40ed3dSBarry Smith   PetscFunctionReturn(0);
917da3a660dSBarry Smith }
9189e25ed09SBarry Smith /* ----------------------------------------------------------------*/
9197c922b88SBarry Smith extern int MatMissingDiagonal_SeqAIJ(Mat);
92008480c60SBarry Smith 
9215615d1e5SSatish Balay #undef __FUNC__
9225615d1e5SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
9236cf997b0SBarry Smith int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
9249e25ed09SBarry Smith {
925416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
9269e25ed09SBarry Smith   IS         isicol;
927416022c9SBarry Smith   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
92856beaf04SBarry Smith   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
929335d9088SBarry Smith   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
9306cf997b0SBarry Smith   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
93177c4ece6SBarry Smith   PetscTruth col_identity, row_identity;
9326cf997b0SBarry Smith   double     f;
9339e25ed09SBarry Smith 
9343a40ed3dSBarry Smith   PetscFunctionBegin;
9356cf997b0SBarry Smith   if (info) {
9366cf997b0SBarry Smith     f             = info->fill;
9370cd17293SBarry Smith     levels        = (int) info->levels;
9380cd17293SBarry Smith     diagonal_fill = (int) info->diagonal_fill;
9396cf997b0SBarry Smith   } else {
9406cf997b0SBarry Smith     f             = 1.0;
9416cf997b0SBarry Smith     levels        = 0;
9426cf997b0SBarry Smith     diagonal_fill = 0;
9436cf997b0SBarry Smith   }
94482bf6240SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
94582bf6240SBarry Smith 
94608480c60SBarry Smith   /* special case that simply copies fill pattern */
947be0abb6dSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
948be0abb6dSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
949*86bacbd2SBarry Smith   if (!levels && row_identity && col_identity) {
9502e8a6d31SBarry Smith     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
95108480c60SBarry Smith     (*fact)->factor = FACTOR_LU;
95208480c60SBarry Smith     b               = (Mat_SeqAIJ *) (*fact)->data;
95308480c60SBarry Smith     if (!b->diag) {
9547c922b88SBarry Smith       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
95508480c60SBarry Smith     }
9567c922b88SBarry Smith     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
95708480c60SBarry Smith     b->row              = isrow;
95808480c60SBarry Smith     b->col              = iscol;
95982bf6240SBarry Smith     b->icol             = isicol;
9600452661fSBarry Smith     b->solve_work       = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
961f830108cSBarry Smith     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
962c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
963c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
9643a40ed3dSBarry Smith     PetscFunctionReturn(0);
96508480c60SBarry Smith   }
96608480c60SBarry Smith 
967898183ecSLois Curfman McInnes   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
968898183ecSLois Curfman McInnes   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
9699e25ed09SBarry Smith 
9709e25ed09SBarry Smith   /* get new row pointers */
9710452661fSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
972dbb450caSBarry Smith   ainew[0] = -shift;
9739e25ed09SBarry Smith   /* don't know how many column pointers are needed so estimate */
974dbb450caSBarry Smith   jmax = (int) (f*(ai[n]+!shift));
9750452661fSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
9769e25ed09SBarry Smith   /* ajfill is level of fill for each fill entry */
9770452661fSBarry Smith   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill);
9789e25ed09SBarry Smith   /* fill is a linked list of nonzeros in active row */
9790452661fSBarry Smith   fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill);
98056beaf04SBarry Smith   /* im is level for each filled value */
9810452661fSBarry Smith   im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im);
98256beaf04SBarry Smith   /* dloc is location of diagonal in factor */
9830452661fSBarry Smith   dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc);
98456beaf04SBarry Smith   dloc[0]  = 0;
98556beaf04SBarry Smith   for ( prow=0; prow<n; prow++ ) {
9866cf997b0SBarry Smith 
9876cf997b0SBarry Smith     /* copy current row into linked list */
98856beaf04SBarry Smith     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
989385f7a74SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
990dbb450caSBarry Smith     xi      = aj + ai[r[prow]] + shift;
9919e25ed09SBarry Smith     fill[n]    = n;
992435faa5fSBarry Smith     fill[prow] = -1; /* marker to indicate if diagonal exists */
9939e25ed09SBarry Smith     while (nz--) {
9949e25ed09SBarry Smith       fm  = n;
995dbb450caSBarry Smith       idx = ic[*xi++ + shift];
9969e25ed09SBarry Smith       do {
9979e25ed09SBarry Smith         m  = fm;
9989e25ed09SBarry Smith         fm = fill[m];
9999e25ed09SBarry Smith       } while (fm < idx);
10009e25ed09SBarry Smith       fill[m]   = idx;
10019e25ed09SBarry Smith       fill[idx] = fm;
100256beaf04SBarry Smith       im[idx]   = 0;
10039e25ed09SBarry Smith     }
10046cf997b0SBarry Smith 
10056cf997b0SBarry Smith     /* make sure diagonal entry is included */
1006435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
10076cf997b0SBarry Smith       fm = n;
1008435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
1009435faa5fSBarry Smith       fill[prow] = fill[fm]; /* insert diagonal into linked list */
1010435faa5fSBarry Smith       fill[fm]   = prow;
10116cf997b0SBarry Smith       im[prow]   = 0;
10126cf997b0SBarry Smith       nzf++;
1013335d9088SBarry Smith       dcount++;
10146cf997b0SBarry Smith     }
10156cf997b0SBarry Smith 
101656beaf04SBarry Smith     nzi = 0;
10179e25ed09SBarry Smith     row = fill[n];
101856beaf04SBarry Smith     while ( row < prow ) {
101956beaf04SBarry Smith       incrlev = im[row] + 1;
102056beaf04SBarry Smith       nz      = dloc[row];
10216cf997b0SBarry Smith       xi      = ajnew  + ainew[row] + shift + nz + 1;
1022dbb450caSBarry Smith       flev    = ajfill + ainew[row] + shift + nz + 1;
1023416022c9SBarry Smith       nnz     = ainew[row+1] - ainew[row] - nz - 1;
102456beaf04SBarry Smith       fm      = row;
102556beaf04SBarry Smith       while (nnz-- > 0) {
1026dbb450caSBarry Smith         idx = *xi++ + shift;
102756beaf04SBarry Smith         if (*flev + incrlev > levels) {
102856beaf04SBarry Smith           flev++;
102956beaf04SBarry Smith           continue;
103056beaf04SBarry Smith         }
103156beaf04SBarry Smith         do {
10329e25ed09SBarry Smith           m  = fm;
10339e25ed09SBarry Smith           fm = fill[m];
103456beaf04SBarry Smith         } while (fm < idx);
10359e25ed09SBarry Smith         if (fm != idx) {
103656beaf04SBarry Smith           im[idx]   = *flev + incrlev;
10379e25ed09SBarry Smith           fill[m]   = idx;
10389e25ed09SBarry Smith           fill[idx] = fm;
10399e25ed09SBarry Smith           fm        = idx;
104056beaf04SBarry Smith           nzf++;
1041ecf371e4SBarry Smith         } else {
104256beaf04SBarry Smith           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
10439e25ed09SBarry Smith         }
104456beaf04SBarry Smith         flev++;
10459e25ed09SBarry Smith       }
10469e25ed09SBarry Smith       row = fill[row];
104756beaf04SBarry Smith       nzi++;
10489e25ed09SBarry Smith     }
10499e25ed09SBarry Smith     /* copy new filled row into permanent storage */
105056beaf04SBarry Smith     ainew[prow+1] = ainew[prow] + nzf;
1051d7e8b826SBarry Smith     if (ainew[prow+1] > jmax-shift) {
1052ecf371e4SBarry Smith 
1053ecf371e4SBarry Smith       /* estimate how much additional space we will need */
1054ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1055ecf371e4SBarry Smith       /* just double the memory each time */
1056ecf371e4SBarry Smith       /*  maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */
1057ecf371e4SBarry Smith       int maxadd = jmax;
105856beaf04SBarry Smith       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1059bbb6d6a8SBarry Smith       jmax += maxadd;
1060ecf371e4SBarry Smith 
1061ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
10620452661fSBarry Smith       xi     = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1063549d3d68SSatish Balay       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1064606d414cSSatish Balay       ierr = PetscFree(ajnew);CHKERRQ(ierr);
106556beaf04SBarry Smith       ajnew  = xi;
10660452661fSBarry Smith       xi     = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1067549d3d68SSatish Balay       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1068606d414cSSatish Balay       ierr = PetscFree(ajfill);CHKERRQ(ierr);
106956beaf04SBarry Smith       ajfill = xi;
1070ecf371e4SBarry Smith       realloc++; /* count how many times we realloc */
10719e25ed09SBarry Smith     }
1072dbb450caSBarry Smith     xi          = ajnew + ainew[prow] + shift;
1073dbb450caSBarry Smith     flev        = ajfill + ainew[prow] + shift;
107456beaf04SBarry Smith     dloc[prow]  = nzi;
10759e25ed09SBarry Smith     fm          = fill[n];
107656beaf04SBarry Smith     while (nzf--) {
1077dbb450caSBarry Smith       *xi++   = fm - shift;
107856beaf04SBarry Smith       *flev++ = im[fm];
10799e25ed09SBarry Smith       fm      = fill[fm];
10809e25ed09SBarry Smith     }
10816cf997b0SBarry Smith     /* make sure row has diagonal entry */
10826cf997b0SBarry Smith     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
10836cf997b0SBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
10846cf997b0SBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
10856cf997b0SBarry Smith     }
10869e25ed09SBarry Smith   }
1087606d414cSSatish Balay   ierr = PetscFree(ajfill); CHKERRQ(ierr);
1088898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1089898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1090606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
1091606d414cSSatish Balay   ierr = PetscFree(im);CHKERRQ(ierr);
10929e25ed09SBarry Smith 
1093f64065bbSBarry Smith   {
1094464e8d44SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
1095c38d4ed2SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1096981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
1097981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
1098981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1099335d9088SBarry Smith     if (diagonal_fill) {
1100335d9088SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount);
1101335d9088SBarry Smith     }
1102f64065bbSBarry Smith   }
1103bbb6d6a8SBarry Smith 
11049e25ed09SBarry Smith   /* put together the new matrix */
1105b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1106fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
1107416022c9SBarry Smith   b = (Mat_SeqAIJ *) (*fact)->data;
1108606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
11097c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
1110dbb450caSBarry Smith   len = (ainew[n] + shift)*sizeof(Scalar);
11119e25ed09SBarry Smith   /* the next line frees the default space generated by the Create() */
1112606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
1113606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
11146b96ef82SSatish Balay   b->a          = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a);
1115416022c9SBarry Smith   b->j          = ajnew;
1116416022c9SBarry Smith   b->i          = ainew;
111756beaf04SBarry Smith   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
1118416022c9SBarry Smith   b->diag       = dloc;
1119416022c9SBarry Smith   b->ilen       = 0;
1120416022c9SBarry Smith   b->imax       = 0;
1121416022c9SBarry Smith   b->row        = isrow;
1122416022c9SBarry Smith   b->col        = iscol;
1123c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1124c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
112582bf6240SBarry Smith   b->icol       = isicol;
112682bf6240SBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
1127416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
112856beaf04SBarry Smith      Allocate dloc, solve_work, new a, new j */
1129dbb450caSBarry Smith   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
1130416022c9SBarry Smith   b->maxnz          = b->nz = ainew[n] + shift;
11319e25ed09SBarry Smith   (*fact)->factor   = FACTOR_LU;
1132ae068f58SLois Curfman McInnes 
1133ae068f58SLois Curfman McInnes   (*fact)->info.factor_mallocs    = realloc;
1134ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_given  = f;
1135ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
1136e93ccc4dSBarry Smith   (*fact)->factor                 =  FACTOR_LU;;
1137ae068f58SLois Curfman McInnes 
11383a40ed3dSBarry Smith   PetscFunctionReturn(0);
11399e25ed09SBarry Smith }
1140d5d45c9bSBarry Smith 
1141d5d45c9bSBarry Smith 
1142d5d45c9bSBarry Smith 
1143d5d45c9bSBarry Smith 
1144