xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 07d2056ac2235beccfc0693ec2621f29d3ddd04a)
1*07d2056aSBarry Smith /*$Id: aijfact.c,v 1.133 1999/12/16 23:25:12 bsmith Exp bsmith $*/
2289bc588SBarry Smith 
370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h"
490f02eecSBarry Smith #include "src/vec/vecimpl.h"
51c4feecaSSatish Balay #include "src/inline/dot.h"
63b2fbd54SBarry Smith 
75615d1e5SSatish Balay #undef __FUNC__
891e9ee9fSBarry Smith #define __FUNC__ "MatOrdering_Flow_SeqAIJ"
991e9ee9fSBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
103b2fbd54SBarry Smith {
113a40ed3dSBarry Smith   PetscFunctionBegin;
123a40ed3dSBarry Smith 
13e3372554SBarry Smith   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG)
15d64ed03dSBarry Smith   PetscFunctionReturn(0);
16d64ed03dSBarry Smith #endif
173b2fbd54SBarry Smith }
183b2fbd54SBarry Smith 
1986bacbd2SBarry Smith 
2086bacbd2SBarry Smith extern int MatMarkDiagonal_SeqAIJ(Mat);
2186bacbd2SBarry Smith extern int Mat_AIJ_CheckInode(Mat);
2286bacbd2SBarry Smith 
2386bacbd2SBarry Smith #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT)
2486bacbd2SBarry Smith 
2586bacbd2SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
2686bacbd2SBarry Smith #define dperm_  DPERM
2786bacbd2SBarry Smith #define ilutp_  ILUTP
2886bacbd2SBarry Smith #define ilu0_   ILU0
2986bacbd2SBarry Smith #define msrcsr_ MSRCSR
3086bacbd2SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3186bacbd2SBarry Smith #define dperm_  dperm
3286bacbd2SBarry Smith #define ilutp_  ilutp
3386bacbd2SBarry Smith #define ilu0_   ilu0
3486bacbd2SBarry Smith #define msrcsr_ msrcsr
3586bacbd2SBarry Smith #endif
3686bacbd2SBarry Smith 
3786bacbd2SBarry Smith EXTERN_C_BEGIN
3886bacbd2SBarry Smith extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*);
3986bacbd2SBarry Smith extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*);
4086bacbd2SBarry Smith extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*);
4186bacbd2SBarry Smith extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *);
4286bacbd2SBarry Smith EXTERN_C_END
4386bacbd2SBarry Smith 
4486bacbd2SBarry Smith #undef __FUNC__
4586bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ"
4686bacbd2SBarry Smith   /* ------------------------------------------------------------
4786bacbd2SBarry Smith 
4886bacbd2SBarry Smith           This interface was contribed by Tony Caola
4986bacbd2SBarry Smith 
5086bacbd2SBarry Smith      This routine is an interface to the pivoting drop-tolerance
5186bacbd2SBarry Smith      ILU routine written by Yousef Saad (saad@cs.umn.edu).  It
5286bacbd2SBarry Smith      was inspired by the non-pivoting iludt written by David
5386bacbd2SBarry Smith      Hysom (hysom@cs.odu.edu).
5486bacbd2SBarry Smith 
5586bacbd2SBarry Smith      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
5686bacbd2SBarry Smith      help in getting this routine ironed out.
5786bacbd2SBarry Smith 
5886bacbd2SBarry Smith      As of right now, there are a couple of things that could
5986bacbd2SBarry Smith      be, uh, better.
6086bacbd2SBarry Smith 
6186bacbd2SBarry Smith      1 - Since Saad's routine is Fortran based, memory cannot be
6286bacbd2SBarry Smith      malloc'd.  I was trying to get the expected fill from the
6386bacbd2SBarry Smith      preconditioner and use this number as the multiplier in
6486bacbd2SBarry Smith      the equation for jmax, below, but couldn't figure it out.
6586bacbd2SBarry Smith      Anyway, perhaps a better solution is to run SPARSKIT through
6686bacbd2SBarry Smith      f2c and incorporate mallocs(), but I want to graduate so I'll
6786bacbd2SBarry Smith      just rebuild Petsc. . .
6886bacbd2SBarry Smith 
6986bacbd2SBarry Smith      shift = 1, ishift = 0, for indices start at 1
7086bacbd2SBarry Smith      shift = 0, ishift = 1, for indices starting at 0
7186bacbd2SBarry Smith      ------------------------------------------------------------
7286bacbd2SBarry Smith   */
7386bacbd2SBarry Smith 
7486bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
7586bacbd2SBarry Smith {
7686bacbd2SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
77*07d2056aSBarry Smith   IS         iscolf, isicol, isirow;
7886bacbd2SBarry Smith   PetscTruth reorder;
7986bacbd2SBarry Smith   int        *c,*r,*ic,*ir, ierr, i, n = a->m;
8086bacbd2SBarry Smith   int        *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j;
81313ae024SBarry Smith   int        *ordcol, *iwk,*iperm, *jw;
8286bacbd2SBarry Smith   int        ishift = !a->indexshift,shift = -a->indexshift;
83b19713cbSBarry Smith   int        jmax,lfill,job,*o_i,*o_j;
84b19713cbSBarry Smith   Scalar     *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a;
8586bacbd2SBarry Smith 
8686bacbd2SBarry Smith   PetscFunctionBegin;
8786bacbd2SBarry Smith 
8886bacbd2SBarry Smith   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
8986bacbd2SBarry Smith   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax);
9086bacbd2SBarry Smith   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
9186bacbd2SBarry Smith   if (info->fill == PETSC_DEFAULT)    info->fill    = (n*info->dtcount)/a->nz;
9286bacbd2SBarry Smith   lfill   = info->dtcount/2;
9386bacbd2SBarry Smith   jmax    = info->fill*a->nz;
9486bacbd2SBarry Smith   permtol = info->dtcol;
9586bacbd2SBarry Smith 
9686bacbd2SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
9786bacbd2SBarry Smith   ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr);
9886bacbd2SBarry Smith 
9986bacbd2SBarry Smith 
10086bacbd2SBarry Smith   /* ------------------------------------------------------------
10186bacbd2SBarry Smith      If reorder=.TRUE., then the original matrix has to be
10286bacbd2SBarry Smith      reordered to reflect the user selected ordering scheme, and
10386bacbd2SBarry Smith      then de-reordered so it is in it's original format.
10486bacbd2SBarry Smith      Because Saad's dperm() is NOT in place, we have to copy
10586bacbd2SBarry Smith      the original matrix and allocate more storage. . .
10686bacbd2SBarry Smith      ------------------------------------------------------------
10786bacbd2SBarry Smith   */
10886bacbd2SBarry Smith 
10986bacbd2SBarry Smith   /* set reorder to true if either isrow or iscol is not identity */
11086bacbd2SBarry Smith   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
11186bacbd2SBarry Smith   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
11286bacbd2SBarry Smith   reorder = PetscNot(reorder);
11386bacbd2SBarry Smith 
11486bacbd2SBarry Smith 
11586bacbd2SBarry Smith   /* storage for ilu factor */
11686bacbd2SBarry Smith   new_i = (int *)    PetscMalloc((n+1)*sizeof(int));   CHKPTRQ(new_i);
11786bacbd2SBarry Smith   new_j = (int *)    PetscMalloc(jmax*sizeof(int));    CHKPTRQ(new_j);
11886bacbd2SBarry Smith   new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a);
11986bacbd2SBarry Smith 
12086bacbd2SBarry Smith   ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol);
12186bacbd2SBarry Smith 
12286bacbd2SBarry Smith   /* ------------------------------------------------------------
12386bacbd2SBarry Smith      Make sure that everything is Fortran formatted (1-Based)
12486bacbd2SBarry Smith      ------------------------------------------------------------
12586bacbd2SBarry Smith   */
126b19713cbSBarry Smith   if (ishift) {
127b19713cbSBarry Smith     for (i=old_i[0];i<old_i[n];i++) {
128b19713cbSBarry Smith       old_j[i]++;
12986bacbd2SBarry Smith     }
130b19713cbSBarry Smith     for(i=0;i<n+1;i++) {
131b19713cbSBarry Smith       old_i[i]++;
132b19713cbSBarry Smith     };
13386bacbd2SBarry Smith   };
13486bacbd2SBarry Smith 
13586bacbd2SBarry Smith   if (reorder) {
136c0c2c81eSBarry Smith     ierr = ISGetIndices(iscol,&c);           CHKERRQ(ierr);
137c0c2c81eSBarry Smith     ierr = ISGetIndices(isrow,&r);           CHKERRQ(ierr);
138c0c2c81eSBarry Smith     for(i=0;i<n;i++) {
139c0c2c81eSBarry Smith       r[i]  = r[i]+1;
140c0c2c81eSBarry Smith       c[i]  = c[i]+1;
141c0c2c81eSBarry Smith     }
142313ae024SBarry Smith     old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2);
143313ae024SBarry Smith     old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2);
144313ae024SBarry Smith     old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2);
145313ae024SBarry Smith     job = 3; dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
146c0c2c81eSBarry Smith     for (i=0;i<n;i++) {
147c0c2c81eSBarry Smith       r[i]  = r[i]-1;
148c0c2c81eSBarry Smith       c[i]  = c[i]-1;
149c0c2c81eSBarry Smith     }
150c0c2c81eSBarry Smith     ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr);
151c0c2c81eSBarry Smith     ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
152b19713cbSBarry Smith     o_a = old_a2;
153b19713cbSBarry Smith     o_j = old_j2;
154b19713cbSBarry Smith     o_i = old_i2;
155b19713cbSBarry Smith   } else {
156b19713cbSBarry Smith     o_a = old_a;
157b19713cbSBarry Smith     o_j = old_j;
158b19713cbSBarry Smith     o_i = old_i;
15986bacbd2SBarry Smith   }
16086bacbd2SBarry Smith 
16186bacbd2SBarry Smith   /* ------------------------------------------------------------
16286bacbd2SBarry Smith      Call Saad's ilutp() routine to generate the factorization
16386bacbd2SBarry Smith      ------------------------------------------------------------
16486bacbd2SBarry Smith   */
16586bacbd2SBarry Smith 
16686bacbd2SBarry Smith   iperm   = (int *)    PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm);
16786bacbd2SBarry Smith   jw      = (int *)    PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw);
16886bacbd2SBarry Smith   w       = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w);
16986bacbd2SBarry Smith 
170b19713cbSBarry Smith   ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
17186bacbd2SBarry Smith   if (ierr) {
17286bacbd2SBarry Smith     switch (ierr) {
17386bacbd2SBarry Smith       case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax);
17486bacbd2SBarry Smith                break;
17586bacbd2SBarry Smith       case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax);
17686bacbd2SBarry Smith                break;
17786bacbd2SBarry Smith       case -5: SETERRQ(1,1,"ilutp(), zero row encountered");
17886bacbd2SBarry Smith                break;
17986bacbd2SBarry Smith       case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong");
18086bacbd2SBarry Smith                break;
18186bacbd2SBarry Smith       case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax);
18286bacbd2SBarry Smith                break;
18386bacbd2SBarry Smith       default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr);
18486bacbd2SBarry Smith     }
18586bacbd2SBarry Smith   }
18686bacbd2SBarry Smith 
18786bacbd2SBarry Smith   ierr = PetscFree(w);CHKERRQ(ierr);
18886bacbd2SBarry Smith   ierr = PetscFree(jw);CHKERRQ(ierr);
18986bacbd2SBarry Smith 
19086bacbd2SBarry Smith   /* ------------------------------------------------------------
19186bacbd2SBarry Smith      Saad's routine gives the result in Modified Sparse Row (msr)
19286bacbd2SBarry Smith      Convert to Compressed Sparse Row format (csr)
19386bacbd2SBarry Smith      ------------------------------------------------------------
19486bacbd2SBarry Smith   */
19586bacbd2SBarry Smith 
19686bacbd2SBarry Smith   wk  = (Scalar *)    PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk);
19786bacbd2SBarry Smith   iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk);
19886bacbd2SBarry Smith 
19986bacbd2SBarry Smith   msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
20086bacbd2SBarry Smith 
20186bacbd2SBarry Smith   ierr = PetscFree(iwk);CHKERRQ(ierr);
20286bacbd2SBarry Smith   ierr = PetscFree(wk);CHKERRQ(ierr);
20386bacbd2SBarry Smith 
20486bacbd2SBarry Smith   if (reorder) {
20586bacbd2SBarry Smith     ierr = PetscFree(old_a2);CHKERRQ(ierr);
20686bacbd2SBarry Smith     ierr = PetscFree(old_j2);CHKERRQ(ierr);
20786bacbd2SBarry Smith     ierr = PetscFree(old_i2);CHKERRQ(ierr);
208313ae024SBarry Smith   } else {
209b19713cbSBarry Smith     /* fix permutation of old_j that the factorization introduced */
210b19713cbSBarry Smith     for (i=old_i[0]; i<=old_i[n]; i++) {
211b19713cbSBarry Smith       old_j[i-1] = iperm[old_j[i-1]-1];
212b19713cbSBarry Smith     }
213b19713cbSBarry Smith   }
214b19713cbSBarry Smith 
215b801d0f9SBarry Smith   /* get rid of the shift to indices starting at 1 */
216b19713cbSBarry Smith   if (ishift) {
21786bacbd2SBarry Smith     for (i=0; i<n+1; i++) {
218b19713cbSBarry Smith       old_i[i]--;
219b19713cbSBarry Smith     }
220b19713cbSBarry Smith     for (i=old_i[0];i<old_i[n];i++) {
221b19713cbSBarry Smith       old_j[i]--;
222b19713cbSBarry Smith     }
22386bacbd2SBarry Smith   }
22486bacbd2SBarry Smith 
225b19713cbSBarry Smith   /* Make the factored matrix 0-based */
226b19713cbSBarry Smith   if (ishift) {
22786bacbd2SBarry Smith     for (i=0; i<n+1; i++) {
228b19713cbSBarry Smith       new_i[i]--;
229b19713cbSBarry Smith     }
230b19713cbSBarry Smith     for (i=new_i[0];i<new_i[n];i++) {
231b19713cbSBarry Smith       new_j[i]--;
232b19713cbSBarry Smith     }
23386bacbd2SBarry Smith   }
23486bacbd2SBarry Smith 
23586bacbd2SBarry Smith   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
23686bacbd2SBarry Smith   /*-- permute the right-hand-side and solution vectors           --*/
237c0c2c81eSBarry Smith   ierr = ISGetIndices(isicol,&ic);          CHKERRQ(ierr);
23886bacbd2SBarry Smith   for(i=0; i<n; i++) {
23986bacbd2SBarry Smith     ordcol[i] = ic[iperm[i]-1];
24086bacbd2SBarry Smith   };
24186bacbd2SBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
242*07d2056aSBarry Smith   ierr = ISDestroy(isicol);CHKERRQ(ierr);
24386bacbd2SBarry Smith 
244c0c2c81eSBarry Smith   ierr = PetscFree(iperm);CHKERRQ(ierr);
245c0c2c81eSBarry Smith 
24686bacbd2SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf);
247*07d2056aSBarry Smith   ierr = PetscFree(ordcol);CHKERRQ(ierr);
24886bacbd2SBarry Smith 
24986bacbd2SBarry Smith   /*----- put together the new matrix -----*/
25086bacbd2SBarry Smith 
25186bacbd2SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
25286bacbd2SBarry Smith   (*fact)->factor    = FACTOR_LU;
25386bacbd2SBarry Smith   (*fact)->assembled = PETSC_TRUE;
25486bacbd2SBarry Smith 
25586bacbd2SBarry Smith   b = (Mat_SeqAIJ *) (*fact)->data;
25686bacbd2SBarry Smith   ierr = PetscFree(b->imax);CHKERRQ(ierr);
25786bacbd2SBarry Smith   b->sorted        = PETSC_FALSE;
258*07d2056aSBarry Smith   b->singlemalloc  = PETSC_FALSE;
259b19713cbSBarry Smith   /* the next line frees the default space generated by the MatCreate() */
26086bacbd2SBarry Smith   ierr             = PetscFree(b->a);CHKERRQ(ierr);
26186bacbd2SBarry Smith   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
26286bacbd2SBarry Smith   b->a             = new_a;
26386bacbd2SBarry Smith   b->j             = new_j;
26486bacbd2SBarry Smith   b->i             = new_i;
26586bacbd2SBarry Smith   b->ilen          = 0;
26686bacbd2SBarry Smith   b->imax          = 0;
267*07d2056aSBarry Smith   /*  I am not sure why these are the inverse are the row and column permutations */
268313ae024SBarry Smith   b->row           = isirow;
26986bacbd2SBarry Smith   b->col           = iscolf;
27086bacbd2SBarry Smith   b->solve_work    =  (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
27186bacbd2SBarry Smith   b->maxnz = b->nz = new_i[n];
27286bacbd2SBarry Smith   b->indexshift    = a->indexshift;
27386bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
27486bacbd2SBarry Smith   (*fact)->info.factor_mallocs = 0;
27586bacbd2SBarry Smith 
27686bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
27786bacbd2SBarry Smith 
27886bacbd2SBarry Smith   /* check out for identical nodes. If found, use inode functions */
27986bacbd2SBarry Smith   ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr);
28086bacbd2SBarry Smith 
28186bacbd2SBarry Smith   PetscFunctionReturn(0);
28286bacbd2SBarry Smith }
28386bacbd2SBarry Smith #else
28486bacbd2SBarry Smith #undef __FUNC__
28586bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ"
286c3b2863dSBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
28786bacbd2SBarry Smith {
28886bacbd2SBarry Smith   PetscFunctionBegin;
28986bacbd2SBarry Smith   SETERRQ(1,1,"You must install Saad's ILUDT to use this");
29086bacbd2SBarry Smith }
29186bacbd2SBarry Smith #endif
29286bacbd2SBarry Smith 
293289bc588SBarry Smith /*
294289bc588SBarry Smith     Factorization code for AIJ format.
295289bc588SBarry Smith */
2965615d1e5SSatish Balay #undef __FUNC__
2975615d1e5SSatish Balay #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
298416022c9SBarry Smith int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
299289bc588SBarry Smith {
300416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
301289bc588SBarry Smith   IS         isicol;
302416022c9SBarry Smith   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
303416022c9SBarry Smith   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
30491bf9adeSBarry Smith   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
305289bc588SBarry Smith 
3063a40ed3dSBarry Smith   PetscFunctionBegin;
307d3cbd50cSLois Curfman McInnes   PetscValidHeaderSpecific(isrow,IS_COOKIE);
308d3cbd50cSLois Curfman McInnes   PetscValidHeaderSpecific(iscol,IS_COOKIE);
309f1af5d2fSBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square");
3103b2fbd54SBarry Smith 
31178b31e54SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
312ac121b3dSBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
313ac121b3dSBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
314289bc588SBarry Smith 
315289bc588SBarry Smith   /* get new row pointers */
3160452661fSBarry Smith   ainew    = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
317dbb450caSBarry Smith   ainew[0] = -shift;
318289bc588SBarry Smith   /* don't know how many column pointers are needed so estimate */
319dbb450caSBarry Smith   jmax  = (int) (f*ai[n]+(!shift));
3200452661fSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
321289bc588SBarry Smith   /* fill is a linked list of nonzeros in active row */
3220452661fSBarry Smith   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill);
3232fb3ed76SBarry Smith   im   = fill + n + 1;
324289bc588SBarry Smith   /* idnew is location of diagonal in factor */
3250452661fSBarry Smith   idnew    = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew);
326dbb450caSBarry Smith   idnew[0] = -shift;
327289bc588SBarry Smith 
328289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
329289bc588SBarry Smith     /* first copy previous fill into linked list */
330289bc588SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
3316b96ef82SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
332dbb450caSBarry Smith     ajtmp   = aj + ai[r[i]] + shift;
333289bc588SBarry Smith     fill[n] = n;
334289bc588SBarry Smith     while (nz--) {
335289bc588SBarry Smith       fm  = n;
336dbb450caSBarry Smith       idx = ic[*ajtmp++ + shift];
337289bc588SBarry Smith       do {
338289bc588SBarry Smith         m  = fm;
339289bc588SBarry Smith         fm = fill[m];
340289bc588SBarry Smith       } while (fm < idx);
341289bc588SBarry Smith       fill[m]   = idx;
342289bc588SBarry Smith       fill[idx] = fm;
343289bc588SBarry Smith     }
344289bc588SBarry Smith     row = fill[n];
345289bc588SBarry Smith     while ( row < i ) {
346dbb450caSBarry Smith       ajtmp = ajnew + idnew[row] + (!shift);
3472fb3ed76SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
3482fb3ed76SBarry Smith       nz    = im[row] - nzbd;
349289bc588SBarry Smith       fm    = row;
3502fb3ed76SBarry Smith       while (nz-- > 0) {
351dbb450caSBarry Smith         idx = *ajtmp++ + shift;
3522fb3ed76SBarry Smith         nzbd++;
3532fb3ed76SBarry Smith         if (idx == i) im[row] = nzbd;
354289bc588SBarry Smith         do {
355289bc588SBarry Smith           m  = fm;
356289bc588SBarry Smith           fm = fill[m];
357289bc588SBarry Smith         } while (fm < idx);
358289bc588SBarry Smith         if (fm != idx) {
359289bc588SBarry Smith           fill[m]   = idx;
360289bc588SBarry Smith           fill[idx] = fm;
361289bc588SBarry Smith           fm        = idx;
362289bc588SBarry Smith           nnz++;
363289bc588SBarry Smith         }
364289bc588SBarry Smith       }
365289bc588SBarry Smith       row = fill[row];
366289bc588SBarry Smith     }
367289bc588SBarry Smith     /* copy new filled row into permanent storage */
368289bc588SBarry Smith     ainew[i+1] = ainew[i] + nnz;
369496e697eSBarry Smith     if (ainew[i+1] > jmax) {
370ecf371e4SBarry Smith 
371ecf371e4SBarry Smith       /* estimate how much additional space we will need */
372ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
373ecf371e4SBarry Smith       /* just double the memory each time */
374ecf371e4SBarry Smith       int maxadd = jmax;
375ecf371e4SBarry Smith       /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */
376bbb6d6a8SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
3772fb3ed76SBarry Smith       jmax += maxadd;
378ecf371e4SBarry Smith 
379ecf371e4SBarry Smith       /* allocate a longer ajnew */
3800452661fSBarry Smith       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
381549d3d68SSatish Balay       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr);
382606d414cSSatish Balay       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
383289bc588SBarry Smith       ajnew = ajtmp;
3842fb3ed76SBarry Smith       realloc++; /* count how many times we realloc */
385289bc588SBarry Smith     }
386dbb450caSBarry Smith     ajtmp = ajnew + ainew[i] + shift;
387289bc588SBarry Smith     fm    = fill[n];
388289bc588SBarry Smith     nzi   = 0;
3892fb3ed76SBarry Smith     im[i] = nnz;
390289bc588SBarry Smith     while (nnz--) {
391289bc588SBarry Smith       if (fm < i) nzi++;
392dbb450caSBarry Smith       *ajtmp++ = fm - shift;
393289bc588SBarry Smith       fm       = fill[fm];
394289bc588SBarry Smith     }
395289bc588SBarry Smith     idnew[i] = ainew[i] + nzi;
396289bc588SBarry Smith   }
397464e8d44SSatish Balay   if (ai[n] != 0) {
398464e8d44SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
399c38d4ed2SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
400981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
401981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
402981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
40305bf559cSSatish Balay   } else {
404981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
40505bf559cSSatish Balay   }
4062fb3ed76SBarry Smith 
407898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
408898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
4091987afe7SBarry Smith 
410606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
411289bc588SBarry Smith 
412289bc588SBarry Smith   /* put together the new matrix */
413b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr);
4141987afe7SBarry Smith   PLogObjectParent(*B,isicol);
415416022c9SBarry Smith   b = (Mat_SeqAIJ *) (*B)->data;
416606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
4177c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
418e8d4e0b9SBarry Smith   /* the next line frees the default space generated by the Create() */
419606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
420606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
42191bf9adeSBarry Smith   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
422416022c9SBarry Smith   b->j          = ajnew;
423416022c9SBarry Smith   b->i          = ainew;
424416022c9SBarry Smith   b->diag       = idnew;
425416022c9SBarry Smith   b->ilen       = 0;
426416022c9SBarry Smith   b->imax       = 0;
427416022c9SBarry Smith   b->row        = isrow;
428416022c9SBarry Smith   b->col        = iscol;
429c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
430c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
43182bf6240SBarry Smith   b->icol       = isicol;
43291bf9adeSBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
433416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
4347fd98bd6SLois Curfman McInnes      Allocate idnew, solve_work, new a, new j */
435416022c9SBarry Smith   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
436416022c9SBarry Smith   b->maxnz = b->nz = ainew[n] + shift;
4377fd98bd6SLois Curfman McInnes 
438e93ccc4dSBarry Smith   (*B)->factor                 =  FACTOR_LU;;
439ae068f58SLois Curfman McInnes   (*B)->info.factor_mallocs    = realloc;
440ae068f58SLois Curfman McInnes   (*B)->info.fill_ratio_given  = f;
441703deb49SSatish Balay   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant if A has inodes */
442703deb49SSatish Balay 
443e93ccc4dSBarry Smith   if (ai[n] != 0) {
444e93ccc4dSBarry Smith     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]);
445eea2913aSSatish Balay   } else {
446eea2913aSSatish Balay     (*B)->info.fill_ratio_needed = 0.0;
447eea2913aSSatish Balay   }
4483a40ed3dSBarry Smith   PetscFunctionReturn(0);
449289bc588SBarry Smith }
4500a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
451184914b5SBarry Smith extern int Mat_AIJ_CheckInode(Mat);
45241c01911SSatish Balay 
4535615d1e5SSatish Balay #undef __FUNC__
4545615d1e5SSatish Balay #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
455416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
456289bc588SBarry Smith {
457416022c9SBarry Smith   Mat        C = *B;
458416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
45982bf6240SBarry Smith   IS         isrow = b->row, isicol = b->icol;
460416022c9SBarry Smith   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
4611987afe7SBarry Smith   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
462f2582111SSatish Balay   int        *diag_offset = b->diag,diag,k;
46335aab85fSBarry Smith   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
4643a40ed3dSBarry Smith   register   int    *pj;
4658ecf7165SBarry Smith   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
46635aab85fSBarry Smith   double     ssum;
46735aab85fSBarry Smith   register   Scalar *pv, *rtmps,*u_values;
468289bc588SBarry Smith 
4693a40ed3dSBarry Smith   PetscFunctionBegin;
47082bf6240SBarry Smith 
47178b31e54SBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
47278b31e54SBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
4730452661fSBarry Smith   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp);
474549d3d68SSatish Balay   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr);
4750cf60383SBarry Smith   rtmps = rtmp + shift; ics = ic + shift;
476289bc588SBarry Smith 
4776cf997b0SBarry Smith   /* precalculate row sums */
47835aab85fSBarry Smith   if (preserve_row_sums) {
47935aab85fSBarry Smith     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums);
48035aab85fSBarry Smith     for ( i=0; i<n; i++ ) {
48135aab85fSBarry Smith       nz  = a->i[r[i]+1] - a->i[r[i]];
48235aab85fSBarry Smith       v   = a->a + a->i[r[i]] + shift;
48335aab85fSBarry Smith       sum = 0.0;
48435aab85fSBarry Smith       for ( j=0; j<nz; j++ ) sum += v[j];
48535aab85fSBarry Smith       rowsums[i] = sum;
48635aab85fSBarry Smith     }
48735aab85fSBarry Smith   }
48835aab85fSBarry Smith 
489289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
490289bc588SBarry Smith     nz    = ai[i+1] - ai[i];
491dbb450caSBarry Smith     ajtmp = aj + ai[i] + shift;
49248e94559SBarry Smith     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
493289bc588SBarry Smith 
494289bc588SBarry Smith     /* load in initial (unfactored row) */
495416022c9SBarry Smith     nz       = a->i[r[i]+1] - a->i[r[i]];
496416022c9SBarry Smith     ajtmpold = a->j + a->i[r[i]] + shift;
497416022c9SBarry Smith     v        = a->a + a->i[r[i]] + shift;
4980cf60383SBarry Smith     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
499289bc588SBarry Smith 
500dbb450caSBarry Smith     row = *ajtmp++ + shift;
501f2582111SSatish Balay     while  (row < i ) {
5028c37ef55SBarry Smith       pc = rtmp + row;
5038c37ef55SBarry Smith       if (*pc != 0.0) {
5041987afe7SBarry Smith         pv         = b->a + diag_offset[row] + shift;
5051987afe7SBarry Smith         pj         = b->j + diag_offset[row] + (!shift);
50635aab85fSBarry Smith         multiplier = *pc / *pv++;
5078c37ef55SBarry Smith         *pc        = multiplier;
508f2582111SSatish Balay         nz         = ai[row+1] - diag_offset[row] - 1;
509f2582111SSatish Balay         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
510f2582111SSatish Balay         PLogFlops(2*nz);
511289bc588SBarry Smith       }
512dbb450caSBarry Smith       row = *ajtmp++ + shift;
513289bc588SBarry Smith     }
514416022c9SBarry Smith     /* finished row so stick it into b->a */
515416022c9SBarry Smith     pv = b->a + ai[i] + shift;
516416022c9SBarry Smith     pj = b->j + ai[i] + shift;
5178c37ef55SBarry Smith     nz = ai[i+1] - ai[i];
518416022c9SBarry Smith     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
51935aab85fSBarry Smith     diag = diag_offset[i] - ai[i];
52035aab85fSBarry Smith     /*
52135aab85fSBarry Smith           Possibly adjust diagonal entry on current row to force
52235aab85fSBarry Smith         LU matrix to have same row sum as initial matrix.
52335aab85fSBarry Smith     */
524d708749eSBarry Smith     if (pv[diag] == 0.0) {
5256cf997b0SBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i);
526d708749eSBarry Smith     }
52735aab85fSBarry Smith     if (preserve_row_sums) {
52835aab85fSBarry Smith       pj  = b->j + ai[i] + shift;
52935aab85fSBarry Smith       sum = rowsums[i];
53035aab85fSBarry Smith       for ( j=0; j<diag; j++ ) {
53135aab85fSBarry Smith         u_values  = b->a + diag_offset[pj[j]] + shift;
53235aab85fSBarry Smith         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
53335aab85fSBarry Smith         inner_sum = 0.0;
53435aab85fSBarry Smith         for ( k=0; k<nz; k++ ) {
53535aab85fSBarry Smith           inner_sum += u_values[k];
53635aab85fSBarry Smith         }
53735aab85fSBarry Smith         sum -= pv[j]*inner_sum;
53835aab85fSBarry Smith 
53935aab85fSBarry Smith       }
54035aab85fSBarry Smith       nz       = ai[i+1] - diag_offset[i] - 1;
54135aab85fSBarry Smith       u_values = b->a + diag_offset[i] + 1 + shift;
54235aab85fSBarry Smith       for ( k=0; k<nz; k++ ) {
54335aab85fSBarry Smith         sum -= u_values[k];
54435aab85fSBarry Smith       }
54535aab85fSBarry Smith       ssum = PetscAbsScalar(sum/pv[diag]);
54635aab85fSBarry Smith       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
54735aab85fSBarry Smith     }
54835aab85fSBarry Smith     /* check pivot entry for current row */
5498c37ef55SBarry Smith   }
5500f11f9aeSSatish Balay 
55135aab85fSBarry Smith   /* invert diagonal entries for simplier triangular solves */
55235aab85fSBarry Smith   for ( i=0; i<n; i++ ) {
55335aab85fSBarry Smith     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
55435aab85fSBarry Smith   }
55535aab85fSBarry Smith 
556606d414cSSatish Balay   if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);}
557606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
55878b31e54SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
55978b31e54SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
560416022c9SBarry Smith   C->factor = FACTOR_LU;
56141c01911SSatish Balay   ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr);
562c456f294SBarry Smith   C->assembled = PETSC_TRUE;
563416022c9SBarry Smith   PLogFlops(b->n);
5643a40ed3dSBarry Smith   PetscFunctionReturn(0);
565289bc588SBarry Smith }
5660a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
5675615d1e5SSatish Balay #undef __FUNC__
5685615d1e5SSatish Balay #define __FUNC__ "MatLUFactor_SeqAIJ"
569416022c9SBarry Smith int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
570da3a660dSBarry Smith {
571416022c9SBarry Smith   Mat_SeqAIJ     *mat = (Mat_SeqAIJ *) A->data;
57286bacbd2SBarry Smith   int            ierr,refct;
573416022c9SBarry Smith   Mat            C;
574f830108cSBarry Smith   PetscOps       *Abops;
5750a6ffc59SBarry Smith   MatOps         Aops;
576416022c9SBarry Smith 
5773a40ed3dSBarry Smith   PetscFunctionBegin;
578f2582111SSatish Balay   ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr);
579f2582111SSatish Balay   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
580da3a660dSBarry Smith 
581da3a660dSBarry Smith   /* free all the data structures from mat */
582606d414cSSatish Balay   ierr = PetscFree(mat->a);CHKERRQ(ierr);
583606d414cSSatish Balay   if (!mat->singlemalloc) {
584606d414cSSatish Balay     ierr = PetscFree(mat->i);CHKERRQ(ierr);
585606d414cSSatish Balay     ierr = PetscFree(mat->j);CHKERRQ(ierr);
586606d414cSSatish Balay   }
587606d414cSSatish Balay   if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);}
588606d414cSSatish Balay   if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);}
589606d414cSSatish Balay   if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);}
590606d414cSSatish Balay   if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);}
591606d414cSSatish Balay   if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);}
5920f0aacb9SBarry Smith   if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);}
593606d414cSSatish Balay   ierr = PetscFree(mat);CHKERRQ(ierr);
594da3a660dSBarry Smith 
59517642b18SBarry Smith   ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
59617642b18SBarry Smith   ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
59717642b18SBarry Smith 
598f830108cSBarry Smith   /*
599f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
600f830108cSBarry Smith     A pointers for the bops and ops but copy everything
601f830108cSBarry Smith     else from C.
602f830108cSBarry Smith   */
603f830108cSBarry Smith   Abops = A->bops;
604f830108cSBarry Smith   Aops  = A->ops;
60586bacbd2SBarry Smith   refct = A->refct;
606549d3d68SSatish Balay   ierr  = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
607451c4af8SSatish Balay   mat   = (Mat_SeqAIJ *) A->data;
608451c4af8SSatish Balay   PLogObjectParent(A,mat->icol);
609451c4af8SSatish Balay 
610f830108cSBarry Smith   A->bops  = Abops;
611f830108cSBarry Smith   A->ops   = Aops;
612bef8e0ddSBarry Smith   A->qlist = 0;
61386bacbd2SBarry Smith   A->refct = refct;
614c38d4ed2SBarry Smith   /* copy over the type_name and name */
615c38d4ed2SBarry Smith   ierr     = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr);
616c38d4ed2SBarry Smith   ierr     = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr);
617f830108cSBarry Smith 
6180452661fSBarry Smith   PetscHeaderDestroy(C);
619c38d4ed2SBarry Smith 
6203a40ed3dSBarry Smith   PetscFunctionReturn(0);
621da3a660dSBarry Smith }
6220a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
6235615d1e5SSatish Balay #undef __FUNC__
6245615d1e5SSatish Balay #define __FUNC__ "MatSolve_SeqAIJ"
625416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
6268c37ef55SBarry Smith {
627416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
628416022c9SBarry Smith   IS         iscol = a->col, isrow = a->row;
629416022c9SBarry Smith   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
6303b2fbd54SBarry Smith   int        nz,shift = a->indexshift,*rout,*cout;
631416022c9SBarry Smith   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
6328c37ef55SBarry Smith 
6333a40ed3dSBarry Smith   PetscFunctionBegin;
6343a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
63591bf9adeSBarry Smith 
636e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
637e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
638416022c9SBarry Smith   tmp  = a->solve_work;
6398c37ef55SBarry Smith 
6403b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6413b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6428c37ef55SBarry Smith 
6438c37ef55SBarry Smith   /* forward solve the lower triangular */
6448c37ef55SBarry Smith   tmp[0] = b[*r++];
6450cf60383SBarry Smith   tmps   = tmp + shift;
6468c37ef55SBarry Smith   for ( i=1; i<n; i++ ) {
647dbb450caSBarry Smith     v   = aa + ai[i] + shift;
648dbb450caSBarry Smith     vi  = aj + ai[i] + shift;
649416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
6508c37ef55SBarry Smith     sum = b[*r++];
6510cf60383SBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
6528c37ef55SBarry Smith     tmp[i] = sum;
6538c37ef55SBarry Smith   }
6548c37ef55SBarry Smith 
6558c37ef55SBarry Smith   /* backward solve the upper triangular */
6568c37ef55SBarry Smith   for ( i=n-1; i>=0; i-- ){
657416022c9SBarry Smith     v   = aa + a->diag[i] + (!shift);
658416022c9SBarry Smith     vi  = aj + a->diag[i] + (!shift);
659416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
6608c37ef55SBarry Smith     sum = tmp[i];
6610cf60383SBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
662416022c9SBarry Smith     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
6638c37ef55SBarry Smith   }
6648c37ef55SBarry Smith 
6653b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
6663b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
667cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
668cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
669416022c9SBarry Smith   PLogFlops(2*a->nz - a->n);
6703a40ed3dSBarry Smith   PetscFunctionReturn(0);
6718c37ef55SBarry Smith }
672026e39d0SSatish Balay 
673930ae53cSBarry Smith /* ----------------------------------------------------------- */
674930ae53cSBarry Smith #undef __FUNC__
675930ae53cSBarry Smith #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
676930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
677930ae53cSBarry Smith {
678930ae53cSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
679d85afc42SSatish Balay   int        n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr;
680d85afc42SSatish Balay   Scalar     *x,*b, *aa = a->a, sum;
681aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
682d85afc42SSatish Balay   int        adiag_i,i,*vi,nz,ai_i;
683d85afc42SSatish Balay   Scalar     *v;
684d85afc42SSatish Balay #endif
685930ae53cSBarry Smith 
6863a40ed3dSBarry Smith   PetscFunctionBegin;
6873a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
688930ae53cSBarry Smith   if (a->indexshift) {
6893a40ed3dSBarry Smith      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
6903a40ed3dSBarry Smith      PetscFunctionReturn(0);
691930ae53cSBarry Smith   }
692930ae53cSBarry Smith 
693e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
694e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
695930ae53cSBarry Smith 
696aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
6971c4feecaSSatish Balay   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
6981c4feecaSSatish Balay #else
699930ae53cSBarry Smith   /* forward solve the lower triangular */
700930ae53cSBarry Smith   x[0] = b[0];
701930ae53cSBarry Smith   for ( i=1; i<n; i++ ) {
702930ae53cSBarry Smith     ai_i = ai[i];
703930ae53cSBarry Smith     v    = aa + ai_i;
704930ae53cSBarry Smith     vi   = aj + ai_i;
705930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
706930ae53cSBarry Smith     sum  = b[i];
707930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
708930ae53cSBarry Smith     x[i] = sum;
709930ae53cSBarry Smith   }
710930ae53cSBarry Smith 
711930ae53cSBarry Smith   /* backward solve the upper triangular */
712930ae53cSBarry Smith   for ( i=n-1; i>=0; i-- ){
713930ae53cSBarry Smith     adiag_i = adiag[i];
714d708749eSBarry Smith     v       = aa + adiag_i + 1;
715d708749eSBarry Smith     vi      = aj + adiag_i + 1;
716930ae53cSBarry Smith     nz      = ai[i+1] - adiag_i - 1;
717930ae53cSBarry Smith     sum     = x[i];
718930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
719930ae53cSBarry Smith     x[i]    = sum*aa[adiag_i];
720930ae53cSBarry Smith   }
7211c4feecaSSatish Balay #endif
722930ae53cSBarry Smith   PLogFlops(2*a->nz - a->n);
723cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
724cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7253a40ed3dSBarry Smith   PetscFunctionReturn(0);
726930ae53cSBarry Smith }
727930ae53cSBarry Smith 
7285615d1e5SSatish Balay #undef __FUNC__
7295615d1e5SSatish Balay #define __FUNC__ "MatSolveAdd_SeqAIJ"
730416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
731da3a660dSBarry Smith {
732416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
733416022c9SBarry Smith   IS         iscol = a->col, isrow = a->row;
734416022c9SBarry Smith   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
7353b2fbd54SBarry Smith   int        nz, shift = a->indexshift,*rout,*cout;
736416022c9SBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
737da3a660dSBarry Smith 
7383a40ed3dSBarry Smith   PetscFunctionBegin;
73978b31e54SBarry Smith   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
740da3a660dSBarry Smith 
741e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
742e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
743416022c9SBarry Smith   tmp  = a->solve_work;
744da3a660dSBarry Smith 
7453b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7463b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
747da3a660dSBarry Smith 
748da3a660dSBarry Smith   /* forward solve the lower triangular */
749da3a660dSBarry Smith   tmp[0] = b[*r++];
750da3a660dSBarry Smith   for ( i=1; i<n; i++ ) {
751dbb450caSBarry Smith     v   = aa + ai[i] + shift;
752dbb450caSBarry Smith     vi  = aj + ai[i] + shift;
753416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
754da3a660dSBarry Smith     sum = b[*r++];
755dbb450caSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
756da3a660dSBarry Smith     tmp[i] = sum;
757da3a660dSBarry Smith   }
758da3a660dSBarry Smith 
759da3a660dSBarry Smith   /* backward solve the upper triangular */
760da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
761416022c9SBarry Smith     v   = aa + a->diag[i] + (!shift);
762416022c9SBarry Smith     vi  = aj + a->diag[i] + (!shift);
763416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
764da3a660dSBarry Smith     sum = tmp[i];
765dbb450caSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
766416022c9SBarry Smith     tmp[i] = sum*aa[a->diag[i]+shift];
767da3a660dSBarry Smith     x[*c--] += tmp[i];
768da3a660dSBarry Smith   }
769da3a660dSBarry Smith 
7703b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
7713b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
772cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
773cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
774416022c9SBarry Smith   PLogFlops(2*a->nz);
775898183ecSLois Curfman McInnes 
7763a40ed3dSBarry Smith   PetscFunctionReturn(0);
777da3a660dSBarry Smith }
778da3a660dSBarry Smith /* -------------------------------------------------------------------*/
7795615d1e5SSatish Balay #undef __FUNC__
7807c922b88SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqAIJ"
7817c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx)
782da3a660dSBarry Smith {
783416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
7842235e667SBarry Smith   IS         iscol = a->col, isrow = a->row;
785416022c9SBarry Smith   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
786f1af5d2fSBarry Smith   int        nz,shift = a->indexshift,*rout,*cout, *diag = a->diag;
787f1af5d2fSBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, *v, s1;
788da3a660dSBarry Smith 
7893a40ed3dSBarry Smith   PetscFunctionBegin;
790e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
791e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
792416022c9SBarry Smith   tmp  = a->solve_work;
793da3a660dSBarry Smith 
7942235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7952235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
796da3a660dSBarry Smith 
797da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
7982235e667SBarry Smith   for ( i=0; i<n; i++ ) tmp[i] = b[c[i]];
799da3a660dSBarry Smith 
800da3a660dSBarry Smith   /* forward solve the U^T */
801da3a660dSBarry Smith   for ( i=0; i<n; i++ ) {
802f1af5d2fSBarry Smith     v   = aa + diag[i] + shift;
803f1af5d2fSBarry Smith     vi  = aj + diag[i] + (!shift);
804f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
805c38d4ed2SBarry Smith     s1  = tmp[i];
806c38d4ed2SBarry Smith     s1 *= *(v++);  /* multiply by inverse of diagonal entry */
807da3a660dSBarry Smith     while (nz--) {
808f1af5d2fSBarry Smith       tmp[*vi++ + shift] -= (*v++)*s1;
809da3a660dSBarry Smith     }
810c38d4ed2SBarry Smith     tmp[i] = s1;
811da3a660dSBarry Smith   }
812da3a660dSBarry Smith 
813da3a660dSBarry Smith   /* backward solve the L^T */
814da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
815f1af5d2fSBarry Smith     v   = aa + diag[i] - 1 + shift;
816f1af5d2fSBarry Smith     vi  = aj + diag[i] - 1 + shift;
817f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
818f1af5d2fSBarry Smith     s1  = tmp[i];
819da3a660dSBarry Smith     while (nz--) {
820f1af5d2fSBarry Smith       tmp[*vi-- + shift] -= (*v--)*s1;
821da3a660dSBarry Smith     }
822da3a660dSBarry Smith   }
823da3a660dSBarry Smith 
824da3a660dSBarry Smith   /* copy tmp into x according to permutation */
825da3a660dSBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
826da3a660dSBarry Smith 
8272235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8282235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
829cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
830cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
831da3a660dSBarry Smith 
832416022c9SBarry Smith   PLogFlops(2*a->nz-a->n);
8333a40ed3dSBarry Smith   PetscFunctionReturn(0);
834da3a660dSBarry Smith }
835da3a660dSBarry Smith 
8365615d1e5SSatish Balay #undef __FUNC__
8377c922b88SBarry Smith #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ"
8387c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
839da3a660dSBarry Smith {
840416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
8412235e667SBarry Smith   IS         iscol = a->col, isrow = a->row;
842416022c9SBarry Smith   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
843f1af5d2fSBarry Smith   int        nz,shift = a->indexshift, *rout, *cout, *diag = a->diag;
844416022c9SBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, *v;
8456abc6512SBarry Smith 
8463a40ed3dSBarry Smith   PetscFunctionBegin;
8476abc6512SBarry Smith   if (zz != xx) VecCopy(zz,xx);
8486abc6512SBarry Smith 
849e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
850e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
851416022c9SBarry Smith   tmp = a->solve_work;
8526abc6512SBarry Smith 
8532235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
8542235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
8556abc6512SBarry Smith 
8566abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
8572235e667SBarry Smith   for ( i=0; i<n; i++ ) tmp[i] = b[c[i]];
8586abc6512SBarry Smith 
8596abc6512SBarry Smith   /* forward solve the U^T */
8606abc6512SBarry Smith   for ( i=0; i<n; i++ ) {
861f1af5d2fSBarry Smith     v   = aa + diag[i] + shift;
862f1af5d2fSBarry Smith     vi  = aj + diag[i] + (!shift);
863f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
8646abc6512SBarry Smith     tmp[i] *= *v++;
8656abc6512SBarry Smith     while (nz--) {
866dbb450caSBarry Smith       tmp[*vi++ + shift] -= (*v++)*tmp[i];
8676abc6512SBarry Smith     }
8686abc6512SBarry Smith   }
8696abc6512SBarry Smith 
8706abc6512SBarry Smith   /* backward solve the L^T */
8716abc6512SBarry Smith   for ( i=n-1; i>=0; i-- ){
872f1af5d2fSBarry Smith     v   = aa + diag[i] - 1 + shift;
873f1af5d2fSBarry Smith     vi  = aj + diag[i] - 1 + shift;
874f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
8756abc6512SBarry Smith     while (nz--) {
876dbb450caSBarry Smith       tmp[*vi-- + shift] -= (*v--)*tmp[i];
8776abc6512SBarry Smith     }
8786abc6512SBarry Smith   }
8796abc6512SBarry Smith 
8806abc6512SBarry Smith   /* copy tmp into x according to permutation */
8816abc6512SBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
8826abc6512SBarry Smith 
8832235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8842235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
885cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
886cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8876abc6512SBarry Smith 
888416022c9SBarry Smith   PLogFlops(2*a->nz);
8893a40ed3dSBarry Smith   PetscFunctionReturn(0);
890da3a660dSBarry Smith }
8919e25ed09SBarry Smith /* ----------------------------------------------------------------*/
8927c922b88SBarry Smith extern int MatMissingDiagonal_SeqAIJ(Mat);
89308480c60SBarry Smith 
8945615d1e5SSatish Balay #undef __FUNC__
8955615d1e5SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
8966cf997b0SBarry Smith int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
8979e25ed09SBarry Smith {
898416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
8999e25ed09SBarry Smith   IS         isicol;
900416022c9SBarry Smith   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
90156beaf04SBarry Smith   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
902335d9088SBarry Smith   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
9036cf997b0SBarry Smith   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
90477c4ece6SBarry Smith   PetscTruth col_identity, row_identity;
9056cf997b0SBarry Smith   double     f;
9069e25ed09SBarry Smith 
9073a40ed3dSBarry Smith   PetscFunctionBegin;
9086cf997b0SBarry Smith   if (info) {
9096cf997b0SBarry Smith     f             = info->fill;
9100cd17293SBarry Smith     levels        = (int) info->levels;
9110cd17293SBarry Smith     diagonal_fill = (int) info->diagonal_fill;
9126cf997b0SBarry Smith   } else {
9136cf997b0SBarry Smith     f             = 1.0;
9146cf997b0SBarry Smith     levels        = 0;
9156cf997b0SBarry Smith     diagonal_fill = 0;
9166cf997b0SBarry Smith   }
91782bf6240SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
91882bf6240SBarry Smith 
91908480c60SBarry Smith   /* special case that simply copies fill pattern */
920be0abb6dSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
921be0abb6dSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
92286bacbd2SBarry Smith   if (!levels && row_identity && col_identity) {
9232e8a6d31SBarry Smith     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
92408480c60SBarry Smith     (*fact)->factor = FACTOR_LU;
92508480c60SBarry Smith     b               = (Mat_SeqAIJ *) (*fact)->data;
92608480c60SBarry Smith     if (!b->diag) {
9277c922b88SBarry Smith       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
92808480c60SBarry Smith     }
9297c922b88SBarry Smith     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
93008480c60SBarry Smith     b->row              = isrow;
93108480c60SBarry Smith     b->col              = iscol;
93282bf6240SBarry Smith     b->icol             = isicol;
9330452661fSBarry Smith     b->solve_work       = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
934f830108cSBarry Smith     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
935c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
936c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
9373a40ed3dSBarry Smith     PetscFunctionReturn(0);
93808480c60SBarry Smith   }
93908480c60SBarry Smith 
940898183ecSLois Curfman McInnes   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
941898183ecSLois Curfman McInnes   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
9429e25ed09SBarry Smith 
9439e25ed09SBarry Smith   /* get new row pointers */
9440452661fSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
945dbb450caSBarry Smith   ainew[0] = -shift;
9469e25ed09SBarry Smith   /* don't know how many column pointers are needed so estimate */
947dbb450caSBarry Smith   jmax = (int) (f*(ai[n]+!shift));
9480452661fSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
9499e25ed09SBarry Smith   /* ajfill is level of fill for each fill entry */
9500452661fSBarry Smith   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill);
9519e25ed09SBarry Smith   /* fill is a linked list of nonzeros in active row */
9520452661fSBarry Smith   fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill);
95356beaf04SBarry Smith   /* im is level for each filled value */
9540452661fSBarry Smith   im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im);
95556beaf04SBarry Smith   /* dloc is location of diagonal in factor */
9560452661fSBarry Smith   dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc);
95756beaf04SBarry Smith   dloc[0]  = 0;
95856beaf04SBarry Smith   for ( prow=0; prow<n; prow++ ) {
9596cf997b0SBarry Smith 
9606cf997b0SBarry Smith     /* copy current row into linked list */
96156beaf04SBarry Smith     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
962385f7a74SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
963dbb450caSBarry Smith     xi      = aj + ai[r[prow]] + shift;
9649e25ed09SBarry Smith     fill[n]    = n;
965435faa5fSBarry Smith     fill[prow] = -1; /* marker to indicate if diagonal exists */
9669e25ed09SBarry Smith     while (nz--) {
9679e25ed09SBarry Smith       fm  = n;
968dbb450caSBarry Smith       idx = ic[*xi++ + shift];
9699e25ed09SBarry Smith       do {
9709e25ed09SBarry Smith         m  = fm;
9719e25ed09SBarry Smith         fm = fill[m];
9729e25ed09SBarry Smith       } while (fm < idx);
9739e25ed09SBarry Smith       fill[m]   = idx;
9749e25ed09SBarry Smith       fill[idx] = fm;
97556beaf04SBarry Smith       im[idx]   = 0;
9769e25ed09SBarry Smith     }
9776cf997b0SBarry Smith 
9786cf997b0SBarry Smith     /* make sure diagonal entry is included */
979435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
9806cf997b0SBarry Smith       fm = n;
981435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
982435faa5fSBarry Smith       fill[prow] = fill[fm]; /* insert diagonal into linked list */
983435faa5fSBarry Smith       fill[fm]   = prow;
9846cf997b0SBarry Smith       im[prow]   = 0;
9856cf997b0SBarry Smith       nzf++;
986335d9088SBarry Smith       dcount++;
9876cf997b0SBarry Smith     }
9886cf997b0SBarry Smith 
98956beaf04SBarry Smith     nzi = 0;
9909e25ed09SBarry Smith     row = fill[n];
99156beaf04SBarry Smith     while ( row < prow ) {
99256beaf04SBarry Smith       incrlev = im[row] + 1;
99356beaf04SBarry Smith       nz      = dloc[row];
9946cf997b0SBarry Smith       xi      = ajnew  + ainew[row] + shift + nz + 1;
995dbb450caSBarry Smith       flev    = ajfill + ainew[row] + shift + nz + 1;
996416022c9SBarry Smith       nnz     = ainew[row+1] - ainew[row] - nz - 1;
99756beaf04SBarry Smith       fm      = row;
99856beaf04SBarry Smith       while (nnz-- > 0) {
999dbb450caSBarry Smith         idx = *xi++ + shift;
100056beaf04SBarry Smith         if (*flev + incrlev > levels) {
100156beaf04SBarry Smith           flev++;
100256beaf04SBarry Smith           continue;
100356beaf04SBarry Smith         }
100456beaf04SBarry Smith         do {
10059e25ed09SBarry Smith           m  = fm;
10069e25ed09SBarry Smith           fm = fill[m];
100756beaf04SBarry Smith         } while (fm < idx);
10089e25ed09SBarry Smith         if (fm != idx) {
100956beaf04SBarry Smith           im[idx]   = *flev + incrlev;
10109e25ed09SBarry Smith           fill[m]   = idx;
10119e25ed09SBarry Smith           fill[idx] = fm;
10129e25ed09SBarry Smith           fm        = idx;
101356beaf04SBarry Smith           nzf++;
1014ecf371e4SBarry Smith         } else {
101556beaf04SBarry Smith           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
10169e25ed09SBarry Smith         }
101756beaf04SBarry Smith         flev++;
10189e25ed09SBarry Smith       }
10199e25ed09SBarry Smith       row = fill[row];
102056beaf04SBarry Smith       nzi++;
10219e25ed09SBarry Smith     }
10229e25ed09SBarry Smith     /* copy new filled row into permanent storage */
102356beaf04SBarry Smith     ainew[prow+1] = ainew[prow] + nzf;
1024d7e8b826SBarry Smith     if (ainew[prow+1] > jmax-shift) {
1025ecf371e4SBarry Smith 
1026ecf371e4SBarry Smith       /* estimate how much additional space we will need */
1027ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1028ecf371e4SBarry Smith       /* just double the memory each time */
1029ecf371e4SBarry Smith       /*  maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */
1030ecf371e4SBarry Smith       int maxadd = jmax;
103156beaf04SBarry Smith       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1032bbb6d6a8SBarry Smith       jmax += maxadd;
1033ecf371e4SBarry Smith 
1034ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
10350452661fSBarry Smith       xi     = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1036549d3d68SSatish Balay       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1037606d414cSSatish Balay       ierr = PetscFree(ajnew);CHKERRQ(ierr);
103856beaf04SBarry Smith       ajnew  = xi;
10390452661fSBarry Smith       xi     = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1040549d3d68SSatish Balay       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1041606d414cSSatish Balay       ierr = PetscFree(ajfill);CHKERRQ(ierr);
104256beaf04SBarry Smith       ajfill = xi;
1043ecf371e4SBarry Smith       realloc++; /* count how many times we realloc */
10449e25ed09SBarry Smith     }
1045dbb450caSBarry Smith     xi          = ajnew + ainew[prow] + shift;
1046dbb450caSBarry Smith     flev        = ajfill + ainew[prow] + shift;
104756beaf04SBarry Smith     dloc[prow]  = nzi;
10489e25ed09SBarry Smith     fm          = fill[n];
104956beaf04SBarry Smith     while (nzf--) {
1050dbb450caSBarry Smith       *xi++   = fm - shift;
105156beaf04SBarry Smith       *flev++ = im[fm];
10529e25ed09SBarry Smith       fm      = fill[fm];
10539e25ed09SBarry Smith     }
10546cf997b0SBarry Smith     /* make sure row has diagonal entry */
10556cf997b0SBarry Smith     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
10566cf997b0SBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
10576cf997b0SBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
10586cf997b0SBarry Smith     }
10599e25ed09SBarry Smith   }
1060606d414cSSatish Balay   ierr = PetscFree(ajfill); CHKERRQ(ierr);
1061898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1062898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1063606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
1064606d414cSSatish Balay   ierr = PetscFree(im);CHKERRQ(ierr);
10659e25ed09SBarry Smith 
1066f64065bbSBarry Smith   {
1067464e8d44SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
1068c38d4ed2SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1069981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
1070981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
1071981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1072335d9088SBarry Smith     if (diagonal_fill) {
1073335d9088SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount);
1074335d9088SBarry Smith     }
1075f64065bbSBarry Smith   }
1076bbb6d6a8SBarry Smith 
10779e25ed09SBarry Smith   /* put together the new matrix */
1078b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1079fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
1080416022c9SBarry Smith   b = (Mat_SeqAIJ *) (*fact)->data;
1081606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
10827c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
1083dbb450caSBarry Smith   len = (ainew[n] + shift)*sizeof(Scalar);
10849e25ed09SBarry Smith   /* the next line frees the default space generated by the Create() */
1085606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
1086606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
10876b96ef82SSatish Balay   b->a          = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a);
1088416022c9SBarry Smith   b->j          = ajnew;
1089416022c9SBarry Smith   b->i          = ainew;
109056beaf04SBarry Smith   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
1091416022c9SBarry Smith   b->diag       = dloc;
1092416022c9SBarry Smith   b->ilen       = 0;
1093416022c9SBarry Smith   b->imax       = 0;
1094416022c9SBarry Smith   b->row        = isrow;
1095416022c9SBarry Smith   b->col        = iscol;
1096c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1097c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
109882bf6240SBarry Smith   b->icol       = isicol;
109982bf6240SBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
1100416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
110156beaf04SBarry Smith      Allocate dloc, solve_work, new a, new j */
1102dbb450caSBarry Smith   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
1103416022c9SBarry Smith   b->maxnz          = b->nz = ainew[n] + shift;
11049e25ed09SBarry Smith   (*fact)->factor   = FACTOR_LU;
1105ae068f58SLois Curfman McInnes 
1106ae068f58SLois Curfman McInnes   (*fact)->info.factor_mallocs    = realloc;
1107ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_given  = f;
1108ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
1109e93ccc4dSBarry Smith   (*fact)->factor                 =  FACTOR_LU;;
1110ae068f58SLois Curfman McInnes 
11113a40ed3dSBarry Smith   PetscFunctionReturn(0);
11129e25ed09SBarry Smith }
1113d5d45c9bSBarry Smith 
1114d5d45c9bSBarry Smith 
1115d5d45c9bSBarry Smith 
1116d5d45c9bSBarry Smith 
1117