xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision c0c2c81e95b2c5027789f2235812afc5b7df827f)
1*c0c2c81eSBarry Smith /*$Id: aijfact.c,v 1.131 1999/12/16 23:06:22 bsmith Exp bsmith $*/
2289bc588SBarry Smith 
370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h"
490f02eecSBarry Smith #include "src/vec/vecimpl.h"
51c4feecaSSatish Balay #include "src/inline/dot.h"
63b2fbd54SBarry Smith 
75615d1e5SSatish Balay #undef __FUNC__
891e9ee9fSBarry Smith #define __FUNC__ "MatOrdering_Flow_SeqAIJ"
991e9ee9fSBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
103b2fbd54SBarry Smith {
113a40ed3dSBarry Smith   PetscFunctionBegin;
123a40ed3dSBarry Smith 
13e3372554SBarry Smith   SETERRQ(PETSC_ERR_SUP,0,"Code not written");
14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG)
15d64ed03dSBarry Smith   PetscFunctionReturn(0);
16d64ed03dSBarry Smith #endif
173b2fbd54SBarry Smith }
183b2fbd54SBarry Smith 
1986bacbd2SBarry Smith 
2086bacbd2SBarry Smith extern int MatMarkDiagonal_SeqAIJ(Mat);
2186bacbd2SBarry Smith extern int Mat_AIJ_CheckInode(Mat);
2286bacbd2SBarry Smith 
2386bacbd2SBarry Smith #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT)
2486bacbd2SBarry Smith 
2586bacbd2SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
2686bacbd2SBarry Smith #define dperm_  DPERM
2786bacbd2SBarry Smith #define ilutp_  ILUTP
2886bacbd2SBarry Smith #define ilu0_   ILU0
2986bacbd2SBarry Smith #define msrcsr_ MSRCSR
3086bacbd2SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3186bacbd2SBarry Smith #define dperm_  dperm
3286bacbd2SBarry Smith #define ilutp_  ilutp
3386bacbd2SBarry Smith #define ilu0_   ilu0
3486bacbd2SBarry Smith #define msrcsr_ msrcsr
3586bacbd2SBarry Smith #endif
3686bacbd2SBarry Smith 
3786bacbd2SBarry Smith EXTERN_C_BEGIN
3886bacbd2SBarry Smith extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*);
3986bacbd2SBarry Smith extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*);
4086bacbd2SBarry Smith extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*);
4186bacbd2SBarry Smith extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *);
4286bacbd2SBarry Smith EXTERN_C_END
4386bacbd2SBarry Smith 
4486bacbd2SBarry Smith #undef __FUNC__
4586bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ"
4686bacbd2SBarry Smith   /* ------------------------------------------------------------
4786bacbd2SBarry Smith 
4886bacbd2SBarry Smith           This interface was contribed by Tony Caola
4986bacbd2SBarry Smith 
5086bacbd2SBarry Smith      This routine is an interface to the pivoting drop-tolerance
5186bacbd2SBarry Smith      ILU routine written by Yousef Saad (saad@cs.umn.edu).  It
5286bacbd2SBarry Smith      was inspired by the non-pivoting iludt written by David
5386bacbd2SBarry Smith      Hysom (hysom@cs.odu.edu).
5486bacbd2SBarry Smith 
5586bacbd2SBarry Smith      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
5686bacbd2SBarry Smith      help in getting this routine ironed out.
5786bacbd2SBarry Smith 
5886bacbd2SBarry Smith      As of right now, there are a couple of things that could
5986bacbd2SBarry Smith      be, uh, better.
6086bacbd2SBarry Smith 
6186bacbd2SBarry Smith      1 - Since Saad's routine is Fortran based, memory cannot be
6286bacbd2SBarry Smith      malloc'd.  I was trying to get the expected fill from the
6386bacbd2SBarry Smith      preconditioner and use this number as the multiplier in
6486bacbd2SBarry Smith      the equation for jmax, below, but couldn't figure it out.
6586bacbd2SBarry Smith      Anyway, perhaps a better solution is to run SPARSKIT through
6686bacbd2SBarry Smith      f2c and incorporate mallocs(), but I want to graduate so I'll
6786bacbd2SBarry Smith      just rebuild Petsc. . .
6886bacbd2SBarry Smith 
6986bacbd2SBarry Smith      shift = 1, ishift = 0, for indices start at 1
7086bacbd2SBarry Smith      shift = 0, ishift = 1, for indices starting at 0
7186bacbd2SBarry Smith      ------------------------------------------------------------
7286bacbd2SBarry Smith   */
7386bacbd2SBarry Smith 
7486bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
7586bacbd2SBarry Smith {
7686bacbd2SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
7786bacbd2SBarry Smith   IS         iscolf, isrowf, isicol, isirow;
7886bacbd2SBarry Smith   PetscTruth reorder;
7986bacbd2SBarry Smith   int        *c,*r,*ic,*ir, ierr, i, n = a->m;
8086bacbd2SBarry Smith   int        *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j;
8186bacbd2SBarry Smith   int        *ordcol, *ordrow,*iwk,*iperm, *jw;
8286bacbd2SBarry Smith   int        ishift = !a->indexshift,shift = -a->indexshift;
83b19713cbSBarry Smith   int        jmax,lfill,job,*o_i,*o_j;
84b19713cbSBarry Smith   Scalar     *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a;
8586bacbd2SBarry Smith 
8686bacbd2SBarry Smith   PetscFunctionBegin;
8786bacbd2SBarry Smith 
8886bacbd2SBarry Smith   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
8986bacbd2SBarry Smith   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax);
9086bacbd2SBarry Smith   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
9186bacbd2SBarry Smith   if (info->fill == PETSC_DEFAULT)    info->fill    = (n*info->dtcount)/a->nz;
9286bacbd2SBarry Smith   lfill   = info->dtcount/2;
9386bacbd2SBarry Smith   jmax    = info->fill*a->nz;
9486bacbd2SBarry Smith   permtol = info->dtcol;
9586bacbd2SBarry Smith 
9686bacbd2SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
9786bacbd2SBarry Smith   ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr);
9886bacbd2SBarry Smith 
9986bacbd2SBarry Smith 
10086bacbd2SBarry Smith   /* ------------------------------------------------------------
10186bacbd2SBarry Smith      If reorder=.TRUE., then the original matrix has to be
10286bacbd2SBarry Smith      reordered to reflect the user selected ordering scheme, and
10386bacbd2SBarry Smith      then de-reordered so it is in it's original format.
10486bacbd2SBarry Smith      Because Saad's dperm() is NOT in place, we have to copy
10586bacbd2SBarry Smith      the original matrix and allocate more storage. . .
10686bacbd2SBarry Smith      ------------------------------------------------------------
10786bacbd2SBarry Smith   */
10886bacbd2SBarry Smith 
10986bacbd2SBarry Smith   /* set reorder to true if either isrow or iscol is not identity */
11086bacbd2SBarry Smith   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
11186bacbd2SBarry Smith   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
11286bacbd2SBarry Smith   reorder = PetscNot(reorder);
11386bacbd2SBarry Smith 
11486bacbd2SBarry Smith 
11586bacbd2SBarry Smith   /* storage for ilu factor */
11686bacbd2SBarry Smith   new_i = (int *)    PetscMalloc((n+1)*sizeof(int));   CHKPTRQ(new_i);
11786bacbd2SBarry Smith   new_j = (int *)    PetscMalloc(jmax*sizeof(int));    CHKPTRQ(new_j);
11886bacbd2SBarry Smith   new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a);
11986bacbd2SBarry Smith 
12086bacbd2SBarry Smith   if (reorder) {
12186bacbd2SBarry Smith     old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2);
12286bacbd2SBarry Smith     old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2);
12386bacbd2SBarry Smith     old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2);
12486bacbd2SBarry Smith   }
12586bacbd2SBarry Smith 
12686bacbd2SBarry Smith   ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol);
12786bacbd2SBarry Smith   ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow);
12886bacbd2SBarry Smith 
12986bacbd2SBarry Smith   /* ------------------------------------------------------------
13086bacbd2SBarry Smith      Make sure that everything is Fortran formatted (1-Based)
13186bacbd2SBarry Smith      ------------------------------------------------------------
13286bacbd2SBarry Smith   */
133b19713cbSBarry Smith   if (ishift) {
134b19713cbSBarry Smith     for (i=old_i[0];i<old_i[n];i++) {
135b19713cbSBarry Smith       old_j[i]++;
13686bacbd2SBarry Smith     }
137b19713cbSBarry Smith     for(i=0;i<n+1;i++) {
138b19713cbSBarry Smith       old_i[i]++;
139b19713cbSBarry Smith     };
14086bacbd2SBarry Smith   };
14186bacbd2SBarry Smith 
14286bacbd2SBarry Smith 
14386bacbd2SBarry Smith 
14486bacbd2SBarry Smith   if (reorder) {
14586bacbd2SBarry Smith     job = 3;
146*c0c2c81eSBarry Smith     ierr = ISGetIndices(iscol,&c);           CHKERRQ(ierr);
147*c0c2c81eSBarry Smith     ierr = ISGetIndices(isrow,&r);           CHKERRQ(ierr);
148*c0c2c81eSBarry Smith     for(i=0;i<n;i++) {
149*c0c2c81eSBarry Smith       r[i]  = r[i]+1;
150*c0c2c81eSBarry Smith       c[i]  = c[i]+1;
151*c0c2c81eSBarry Smith     }
152b19713cbSBarry Smith     dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
153*c0c2c81eSBarry Smith     for (i=0;i<n;i++) {
154*c0c2c81eSBarry Smith       r[i]  = r[i]-1;
155*c0c2c81eSBarry Smith       c[i]  = c[i]-1;
156*c0c2c81eSBarry Smith     }
157*c0c2c81eSBarry Smith     ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr);
158*c0c2c81eSBarry Smith     ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
159b19713cbSBarry Smith     o_a = old_a2;
160b19713cbSBarry Smith     o_j = old_j2;
161b19713cbSBarry Smith     o_i = old_i2;
162b19713cbSBarry Smith   } else {
163b19713cbSBarry Smith     o_a = old_a;
164b19713cbSBarry Smith     o_j = old_j;
165b19713cbSBarry Smith     o_i = old_i;
16686bacbd2SBarry Smith   }
16786bacbd2SBarry Smith 
16886bacbd2SBarry Smith   /* ------------------------------------------------------------
16986bacbd2SBarry Smith      Call Saad's ilutp() routine to generate the factorization
17086bacbd2SBarry Smith      ------------------------------------------------------------
17186bacbd2SBarry Smith   */
17286bacbd2SBarry Smith 
17386bacbd2SBarry Smith   iperm   = (int *)    PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm);
17486bacbd2SBarry Smith   jw      = (int *)    PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw);
17586bacbd2SBarry Smith   w       = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w);
17686bacbd2SBarry Smith 
177b19713cbSBarry Smith   ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
17886bacbd2SBarry Smith   if (ierr) {
17986bacbd2SBarry Smith     switch (ierr) {
18086bacbd2SBarry Smith       case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax);
18186bacbd2SBarry Smith                break;
18286bacbd2SBarry Smith       case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax);
18386bacbd2SBarry Smith                break;
18486bacbd2SBarry Smith       case -5: SETERRQ(1,1,"ilutp(), zero row encountered");
18586bacbd2SBarry Smith                break;
18686bacbd2SBarry Smith       case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong");
18786bacbd2SBarry Smith                break;
18886bacbd2SBarry Smith       case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax);
18986bacbd2SBarry Smith                break;
19086bacbd2SBarry Smith       default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr);
19186bacbd2SBarry Smith     }
19286bacbd2SBarry Smith   }
19386bacbd2SBarry Smith 
19486bacbd2SBarry Smith   ierr = PetscFree(w);CHKERRQ(ierr);
19586bacbd2SBarry Smith   ierr = PetscFree(jw);CHKERRQ(ierr);
19686bacbd2SBarry Smith 
19786bacbd2SBarry Smith 
19886bacbd2SBarry Smith   /* ------------------------------------------------------------
19986bacbd2SBarry Smith      Saad's routine gives the result in Modified Sparse Row (msr)
20086bacbd2SBarry Smith      Convert to Compressed Sparse Row format (csr)
20186bacbd2SBarry Smith      ------------------------------------------------------------
20286bacbd2SBarry Smith   */
20386bacbd2SBarry Smith 
20486bacbd2SBarry Smith   wk  = (Scalar *)    PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk);
20586bacbd2SBarry Smith   iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk);
20686bacbd2SBarry Smith 
20786bacbd2SBarry Smith   msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
20886bacbd2SBarry Smith 
20986bacbd2SBarry Smith   ierr = PetscFree(iwk);CHKERRQ(ierr);
21086bacbd2SBarry Smith   ierr = PetscFree(wk);CHKERRQ(ierr);
21186bacbd2SBarry Smith 
21286bacbd2SBarry Smith   if (reorder) {
21386bacbd2SBarry Smith     ierr = PetscFree(old_a2);CHKERRQ(ierr);
21486bacbd2SBarry Smith     ierr = PetscFree(old_j2);CHKERRQ(ierr);
21586bacbd2SBarry Smith     ierr = PetscFree(old_i2);CHKERRQ(ierr);
21686bacbd2SBarry Smith   }
21786bacbd2SBarry Smith 
218b19713cbSBarry Smith   /* fix permutation of old_j that the factorization introduced */
219b19713cbSBarry Smith   if (!reorder) {
220b19713cbSBarry Smith     for (i=old_i[0]; i<=old_i[n]; i++) {
221b19713cbSBarry Smith       old_j[i-1] = iperm[old_j[i-1]-1];
222b19713cbSBarry Smith     }
223b19713cbSBarry Smith   }
224b19713cbSBarry Smith 
225b801d0f9SBarry Smith   /* get rid of the shift to indices starting at 1 */
226b19713cbSBarry Smith   if (ishift) {
22786bacbd2SBarry Smith     for (i=0; i<n+1; i++) {
228b19713cbSBarry Smith       old_i[i]--;
229b19713cbSBarry Smith     }
230b19713cbSBarry Smith     for (i=old_i[0];i<old_i[n];i++) {
231b19713cbSBarry Smith       old_j[i]--;
232b19713cbSBarry Smith     }
23386bacbd2SBarry Smith   }
23486bacbd2SBarry Smith 
235b19713cbSBarry Smith   /* Make the factored matrix 0-based */
236b19713cbSBarry Smith   if (ishift) {
23786bacbd2SBarry Smith     for (i=0; i<n+1; i++) {
238b19713cbSBarry Smith       new_i[i]--;
239b19713cbSBarry Smith     }
240b19713cbSBarry Smith     for (i=new_i[0];i<new_i[n];i++) {
241b19713cbSBarry Smith       new_j[i]--;
242b19713cbSBarry Smith     }
24386bacbd2SBarry Smith   }
24486bacbd2SBarry Smith 
24586bacbd2SBarry Smith 
24686bacbd2SBarry Smith   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
24786bacbd2SBarry Smith   /*-- permute the right-hand-side and solution vectors           --*/
248*c0c2c81eSBarry Smith   ierr = ISGetIndices(isicol,&ic);          CHKERRQ(ierr);
249*c0c2c81eSBarry Smith   ierr = ISGetIndices(isirow,&ir);          CHKERRQ(ierr);
25086bacbd2SBarry Smith   for(i=0; i<n; i++) {
25186bacbd2SBarry Smith     ordcol[i] = ic[iperm[i]-1];
25286bacbd2SBarry Smith     ordrow[i] = ir[i];
25386bacbd2SBarry Smith   };
25486bacbd2SBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
25586bacbd2SBarry Smith   ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr);
25686bacbd2SBarry Smith 
257*c0c2c81eSBarry Smith   ierr = PetscFree(iperm);CHKERRQ(ierr);
258*c0c2c81eSBarry Smith 
25986bacbd2SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf);
26086bacbd2SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf);
26186bacbd2SBarry Smith 
26286bacbd2SBarry Smith   /*----- put together the new matrix -----*/
26386bacbd2SBarry Smith 
26486bacbd2SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr);
26586bacbd2SBarry Smith   (*fact)->factor    = FACTOR_LU;
26686bacbd2SBarry Smith   (*fact)->assembled = PETSC_TRUE;
26786bacbd2SBarry Smith 
26886bacbd2SBarry Smith   b = (Mat_SeqAIJ *) (*fact)->data;
26986bacbd2SBarry Smith   ierr = PetscFree(b->imax);CHKERRQ(ierr);
27086bacbd2SBarry Smith   b->sorted        = PETSC_FALSE;
27186bacbd2SBarry Smith   b->singlemalloc  = PETSC_TRUE;
272b19713cbSBarry Smith   /* the next line frees the default space generated by the MatCreate() */
27386bacbd2SBarry Smith   ierr             = PetscFree(b->a);CHKERRQ(ierr);
27486bacbd2SBarry Smith   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
27586bacbd2SBarry Smith   b->a             = new_a;
27686bacbd2SBarry Smith   b->j             = new_j;
27786bacbd2SBarry Smith   b->i             = new_i;
27886bacbd2SBarry Smith   b->ilen          = 0;
27986bacbd2SBarry Smith   b->imax          = 0;
28086bacbd2SBarry Smith   b->row           = isrowf;
28186bacbd2SBarry Smith   b->col           = iscolf;
28286bacbd2SBarry Smith   b->solve_work    =  (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
28386bacbd2SBarry Smith   b->maxnz = b->nz = new_i[n];
28486bacbd2SBarry Smith   b->indexshift    = a->indexshift;
28586bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
28686bacbd2SBarry Smith   (*fact)->info.factor_mallocs = 0;
28786bacbd2SBarry Smith 
28886bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
28986bacbd2SBarry Smith 
29086bacbd2SBarry Smith   /* check out for identical nodes. If found, use inode functions */
29186bacbd2SBarry Smith   ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr);
29286bacbd2SBarry Smith 
29386bacbd2SBarry Smith   PetscFunctionReturn(0);
29486bacbd2SBarry Smith }
29586bacbd2SBarry Smith #else
29686bacbd2SBarry Smith #undef __FUNC__
29786bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ"
298c3b2863dSBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
29986bacbd2SBarry Smith {
30086bacbd2SBarry Smith   PetscFunctionBegin;
30186bacbd2SBarry Smith   SETERRQ(1,1,"You must install Saad's ILUDT to use this");
30286bacbd2SBarry Smith }
30386bacbd2SBarry Smith #endif
30486bacbd2SBarry Smith 
305289bc588SBarry Smith /*
306289bc588SBarry Smith     Factorization code for AIJ format.
307289bc588SBarry Smith */
3085615d1e5SSatish Balay #undef __FUNC__
3095615d1e5SSatish Balay #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ"
310416022c9SBarry Smith int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B)
311289bc588SBarry Smith {
312416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
313289bc588SBarry Smith   IS         isicol;
314416022c9SBarry Smith   int        *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j;
315416022c9SBarry Smith   int        *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift;
31691bf9adeSBarry Smith   int        *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im;
317289bc588SBarry Smith 
3183a40ed3dSBarry Smith   PetscFunctionBegin;
319d3cbd50cSLois Curfman McInnes   PetscValidHeaderSpecific(isrow,IS_COOKIE);
320d3cbd50cSLois Curfman McInnes   PetscValidHeaderSpecific(iscol,IS_COOKIE);
321f1af5d2fSBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square");
3223b2fbd54SBarry Smith 
32378b31e54SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
324ac121b3dSBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
325ac121b3dSBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
326289bc588SBarry Smith 
327289bc588SBarry Smith   /* get new row pointers */
3280452661fSBarry Smith   ainew    = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
329dbb450caSBarry Smith   ainew[0] = -shift;
330289bc588SBarry Smith   /* don't know how many column pointers are needed so estimate */
331dbb450caSBarry Smith   jmax  = (int) (f*ai[n]+(!shift));
3320452661fSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
333289bc588SBarry Smith   /* fill is a linked list of nonzeros in active row */
3340452661fSBarry Smith   fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill);
3352fb3ed76SBarry Smith   im   = fill + n + 1;
336289bc588SBarry Smith   /* idnew is location of diagonal in factor */
3370452661fSBarry Smith   idnew    = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew);
338dbb450caSBarry Smith   idnew[0] = -shift;
339289bc588SBarry Smith 
340289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
341289bc588SBarry Smith     /* first copy previous fill into linked list */
342289bc588SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
3436b96ef82SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
344dbb450caSBarry Smith     ajtmp   = aj + ai[r[i]] + shift;
345289bc588SBarry Smith     fill[n] = n;
346289bc588SBarry Smith     while (nz--) {
347289bc588SBarry Smith       fm  = n;
348dbb450caSBarry Smith       idx = ic[*ajtmp++ + shift];
349289bc588SBarry Smith       do {
350289bc588SBarry Smith         m  = fm;
351289bc588SBarry Smith         fm = fill[m];
352289bc588SBarry Smith       } while (fm < idx);
353289bc588SBarry Smith       fill[m]   = idx;
354289bc588SBarry Smith       fill[idx] = fm;
355289bc588SBarry Smith     }
356289bc588SBarry Smith     row = fill[n];
357289bc588SBarry Smith     while ( row < i ) {
358dbb450caSBarry Smith       ajtmp = ajnew + idnew[row] + (!shift);
3592fb3ed76SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
3602fb3ed76SBarry Smith       nz    = im[row] - nzbd;
361289bc588SBarry Smith       fm    = row;
3622fb3ed76SBarry Smith       while (nz-- > 0) {
363dbb450caSBarry Smith         idx = *ajtmp++ + shift;
3642fb3ed76SBarry Smith         nzbd++;
3652fb3ed76SBarry Smith         if (idx == i) im[row] = nzbd;
366289bc588SBarry Smith         do {
367289bc588SBarry Smith           m  = fm;
368289bc588SBarry Smith           fm = fill[m];
369289bc588SBarry Smith         } while (fm < idx);
370289bc588SBarry Smith         if (fm != idx) {
371289bc588SBarry Smith           fill[m]   = idx;
372289bc588SBarry Smith           fill[idx] = fm;
373289bc588SBarry Smith           fm        = idx;
374289bc588SBarry Smith           nnz++;
375289bc588SBarry Smith         }
376289bc588SBarry Smith       }
377289bc588SBarry Smith       row = fill[row];
378289bc588SBarry Smith     }
379289bc588SBarry Smith     /* copy new filled row into permanent storage */
380289bc588SBarry Smith     ainew[i+1] = ainew[i] + nnz;
381496e697eSBarry Smith     if (ainew[i+1] > jmax) {
382ecf371e4SBarry Smith 
383ecf371e4SBarry Smith       /* estimate how much additional space we will need */
384ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
385ecf371e4SBarry Smith       /* just double the memory each time */
386ecf371e4SBarry Smith       int maxadd = jmax;
387ecf371e4SBarry Smith       /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */
388bbb6d6a8SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
3892fb3ed76SBarry Smith       jmax += maxadd;
390ecf371e4SBarry Smith 
391ecf371e4SBarry Smith       /* allocate a longer ajnew */
3920452661fSBarry Smith       ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp);
393549d3d68SSatish Balay       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr);
394606d414cSSatish Balay       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
395289bc588SBarry Smith       ajnew = ajtmp;
3962fb3ed76SBarry Smith       realloc++; /* count how many times we realloc */
397289bc588SBarry Smith     }
398dbb450caSBarry Smith     ajtmp = ajnew + ainew[i] + shift;
399289bc588SBarry Smith     fm    = fill[n];
400289bc588SBarry Smith     nzi   = 0;
4012fb3ed76SBarry Smith     im[i] = nnz;
402289bc588SBarry Smith     while (nnz--) {
403289bc588SBarry Smith       if (fm < i) nzi++;
404dbb450caSBarry Smith       *ajtmp++ = fm - shift;
405289bc588SBarry Smith       fm       = fill[fm];
406289bc588SBarry Smith     }
407289bc588SBarry Smith     idnew[i] = ainew[i] + nzi;
408289bc588SBarry Smith   }
409464e8d44SSatish Balay   if (ai[n] != 0) {
410464e8d44SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
411c38d4ed2SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
412981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
413981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
414981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
41505bf559cSSatish Balay   } else {
416981c4779SBarry Smith     PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
41705bf559cSSatish Balay   }
4182fb3ed76SBarry Smith 
419898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
420898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
4211987afe7SBarry Smith 
422606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
423289bc588SBarry Smith 
424289bc588SBarry Smith   /* put together the new matrix */
425b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr);
4261987afe7SBarry Smith   PLogObjectParent(*B,isicol);
427416022c9SBarry Smith   b = (Mat_SeqAIJ *) (*B)->data;
428606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
4297c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
430e8d4e0b9SBarry Smith   /* the next line frees the default space generated by the Create() */
431606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
432606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
43391bf9adeSBarry Smith   b->a          = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a);
434416022c9SBarry Smith   b->j          = ajnew;
435416022c9SBarry Smith   b->i          = ainew;
436416022c9SBarry Smith   b->diag       = idnew;
437416022c9SBarry Smith   b->ilen       = 0;
438416022c9SBarry Smith   b->imax       = 0;
439416022c9SBarry Smith   b->row        = isrow;
440416022c9SBarry Smith   b->col        = iscol;
441c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
442c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
44382bf6240SBarry Smith   b->icol       = isicol;
44491bf9adeSBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
445416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
4467fd98bd6SLois Curfman McInnes      Allocate idnew, solve_work, new a, new j */
447416022c9SBarry Smith   PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
448416022c9SBarry Smith   b->maxnz = b->nz = ainew[n] + shift;
4497fd98bd6SLois Curfman McInnes 
450e93ccc4dSBarry Smith   (*B)->factor                 =  FACTOR_LU;;
451ae068f58SLois Curfman McInnes   (*B)->info.factor_mallocs    = realloc;
452ae068f58SLois Curfman McInnes   (*B)->info.fill_ratio_given  = f;
453703deb49SSatish Balay   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant if A has inodes */
454703deb49SSatish Balay 
455e93ccc4dSBarry Smith   if (ai[n] != 0) {
456e93ccc4dSBarry Smith     (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]);
457eea2913aSSatish Balay   } else {
458eea2913aSSatish Balay     (*B)->info.fill_ratio_needed = 0.0;
459eea2913aSSatish Balay   }
4603a40ed3dSBarry Smith   PetscFunctionReturn(0);
461289bc588SBarry Smith }
4620a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
463184914b5SBarry Smith extern int Mat_AIJ_CheckInode(Mat);
46441c01911SSatish Balay 
4655615d1e5SSatish Balay #undef __FUNC__
4665615d1e5SSatish Balay #define __FUNC__ "MatLUFactorNumeric_SeqAIJ"
467416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
468289bc588SBarry Smith {
469416022c9SBarry Smith   Mat        C = *B;
470416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data;
47182bf6240SBarry Smith   IS         isrow = b->row, isicol = b->icol;
472416022c9SBarry Smith   int        *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j;
4731987afe7SBarry Smith   int        *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift;
474f2582111SSatish Balay   int        *diag_offset = b->diag,diag,k;
47535aab85fSBarry Smith   int        preserve_row_sums = (int) a->ilu_preserve_row_sums;
4763a40ed3dSBarry Smith   register   int    *pj;
4778ecf7165SBarry Smith   Scalar     *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0;
47835aab85fSBarry Smith   double     ssum;
47935aab85fSBarry Smith   register   Scalar *pv, *rtmps,*u_values;
480289bc588SBarry Smith 
4813a40ed3dSBarry Smith   PetscFunctionBegin;
48282bf6240SBarry Smith 
48378b31e54SBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
48478b31e54SBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
4850452661fSBarry Smith   rtmp  = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp);
486549d3d68SSatish Balay   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr);
4870cf60383SBarry Smith   rtmps = rtmp + shift; ics = ic + shift;
488289bc588SBarry Smith 
4896cf997b0SBarry Smith   /* precalculate row sums */
49035aab85fSBarry Smith   if (preserve_row_sums) {
49135aab85fSBarry Smith     rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums);
49235aab85fSBarry Smith     for ( i=0; i<n; i++ ) {
49335aab85fSBarry Smith       nz  = a->i[r[i]+1] - a->i[r[i]];
49435aab85fSBarry Smith       v   = a->a + a->i[r[i]] + shift;
49535aab85fSBarry Smith       sum = 0.0;
49635aab85fSBarry Smith       for ( j=0; j<nz; j++ ) sum += v[j];
49735aab85fSBarry Smith       rowsums[i] = sum;
49835aab85fSBarry Smith     }
49935aab85fSBarry Smith   }
50035aab85fSBarry Smith 
501289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
502289bc588SBarry Smith     nz    = ai[i+1] - ai[i];
503dbb450caSBarry Smith     ajtmp = aj + ai[i] + shift;
50448e94559SBarry Smith     for  ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0;
505289bc588SBarry Smith 
506289bc588SBarry Smith     /* load in initial (unfactored row) */
507416022c9SBarry Smith     nz       = a->i[r[i]+1] - a->i[r[i]];
508416022c9SBarry Smith     ajtmpold = a->j + a->i[r[i]] + shift;
509416022c9SBarry Smith     v        = a->a + a->i[r[i]] + shift;
5100cf60383SBarry Smith     for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] =  v[j];
511289bc588SBarry Smith 
512dbb450caSBarry Smith     row = *ajtmp++ + shift;
513f2582111SSatish Balay     while  (row < i ) {
5148c37ef55SBarry Smith       pc = rtmp + row;
5158c37ef55SBarry Smith       if (*pc != 0.0) {
5161987afe7SBarry Smith         pv         = b->a + diag_offset[row] + shift;
5171987afe7SBarry Smith         pj         = b->j + diag_offset[row] + (!shift);
51835aab85fSBarry Smith         multiplier = *pc / *pv++;
5198c37ef55SBarry Smith         *pc        = multiplier;
520f2582111SSatish Balay         nz         = ai[row+1] - diag_offset[row] - 1;
521f2582111SSatish Balay         for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
522f2582111SSatish Balay         PLogFlops(2*nz);
523289bc588SBarry Smith       }
524dbb450caSBarry Smith       row = *ajtmp++ + shift;
525289bc588SBarry Smith     }
526416022c9SBarry Smith     /* finished row so stick it into b->a */
527416022c9SBarry Smith     pv = b->a + ai[i] + shift;
528416022c9SBarry Smith     pj = b->j + ai[i] + shift;
5298c37ef55SBarry Smith     nz = ai[i+1] - ai[i];
530416022c9SBarry Smith     for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];}
53135aab85fSBarry Smith     diag = diag_offset[i] - ai[i];
53235aab85fSBarry Smith     /*
53335aab85fSBarry Smith           Possibly adjust diagonal entry on current row to force
53435aab85fSBarry Smith         LU matrix to have same row sum as initial matrix.
53535aab85fSBarry Smith     */
536d708749eSBarry Smith     if (pv[diag] == 0.0) {
5376cf997b0SBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i);
538d708749eSBarry Smith     }
53935aab85fSBarry Smith     if (preserve_row_sums) {
54035aab85fSBarry Smith       pj  = b->j + ai[i] + shift;
54135aab85fSBarry Smith       sum = rowsums[i];
54235aab85fSBarry Smith       for ( j=0; j<diag; j++ ) {
54335aab85fSBarry Smith         u_values  = b->a + diag_offset[pj[j]] + shift;
54435aab85fSBarry Smith         nz        = ai[pj[j]+1] - diag_offset[pj[j]];
54535aab85fSBarry Smith         inner_sum = 0.0;
54635aab85fSBarry Smith         for ( k=0; k<nz; k++ ) {
54735aab85fSBarry Smith           inner_sum += u_values[k];
54835aab85fSBarry Smith         }
54935aab85fSBarry Smith         sum -= pv[j]*inner_sum;
55035aab85fSBarry Smith 
55135aab85fSBarry Smith       }
55235aab85fSBarry Smith       nz       = ai[i+1] - diag_offset[i] - 1;
55335aab85fSBarry Smith       u_values = b->a + diag_offset[i] + 1 + shift;
55435aab85fSBarry Smith       for ( k=0; k<nz; k++ ) {
55535aab85fSBarry Smith         sum -= u_values[k];
55635aab85fSBarry Smith       }
55735aab85fSBarry Smith       ssum = PetscAbsScalar(sum/pv[diag]);
55835aab85fSBarry Smith       if (ssum < 1000. && ssum > .001) pv[diag] = sum;
55935aab85fSBarry Smith     }
56035aab85fSBarry Smith     /* check pivot entry for current row */
5618c37ef55SBarry Smith   }
5620f11f9aeSSatish Balay 
56335aab85fSBarry Smith   /* invert diagonal entries for simplier triangular solves */
56435aab85fSBarry Smith   for ( i=0; i<n; i++ ) {
56535aab85fSBarry Smith     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
56635aab85fSBarry Smith   }
56735aab85fSBarry Smith 
568606d414cSSatish Balay   if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);}
569606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
57078b31e54SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
57178b31e54SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
572416022c9SBarry Smith   C->factor = FACTOR_LU;
57341c01911SSatish Balay   ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr);
574c456f294SBarry Smith   C->assembled = PETSC_TRUE;
575416022c9SBarry Smith   PLogFlops(b->n);
5763a40ed3dSBarry Smith   PetscFunctionReturn(0);
577289bc588SBarry Smith }
5780a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
5795615d1e5SSatish Balay #undef __FUNC__
5805615d1e5SSatish Balay #define __FUNC__ "MatLUFactor_SeqAIJ"
581416022c9SBarry Smith int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f)
582da3a660dSBarry Smith {
583416022c9SBarry Smith   Mat_SeqAIJ     *mat = (Mat_SeqAIJ *) A->data;
58486bacbd2SBarry Smith   int            ierr,refct;
585416022c9SBarry Smith   Mat            C;
586f830108cSBarry Smith   PetscOps       *Abops;
5870a6ffc59SBarry Smith   MatOps         Aops;
588416022c9SBarry Smith 
5893a40ed3dSBarry Smith   PetscFunctionBegin;
590f2582111SSatish Balay   ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr);
591f2582111SSatish Balay   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
592da3a660dSBarry Smith 
593da3a660dSBarry Smith   /* free all the data structures from mat */
594606d414cSSatish Balay   ierr = PetscFree(mat->a);CHKERRQ(ierr);
595606d414cSSatish Balay   if (!mat->singlemalloc) {
596606d414cSSatish Balay     ierr = PetscFree(mat->i);CHKERRQ(ierr);
597606d414cSSatish Balay     ierr = PetscFree(mat->j);CHKERRQ(ierr);
598606d414cSSatish Balay   }
599606d414cSSatish Balay   if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);}
600606d414cSSatish Balay   if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);}
601606d414cSSatish Balay   if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);}
602606d414cSSatish Balay   if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);}
603606d414cSSatish Balay   if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);}
6040f0aacb9SBarry Smith   if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);}
605606d414cSSatish Balay   ierr = PetscFree(mat);CHKERRQ(ierr);
606da3a660dSBarry Smith 
60717642b18SBarry Smith   ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
60817642b18SBarry Smith   ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
60917642b18SBarry Smith 
610f830108cSBarry Smith   /*
611f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
612f830108cSBarry Smith     A pointers for the bops and ops but copy everything
613f830108cSBarry Smith     else from C.
614f830108cSBarry Smith   */
615f830108cSBarry Smith   Abops = A->bops;
616f830108cSBarry Smith   Aops  = A->ops;
61786bacbd2SBarry Smith   refct = A->refct;
618549d3d68SSatish Balay   ierr  = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
619451c4af8SSatish Balay   mat   = (Mat_SeqAIJ *) A->data;
620451c4af8SSatish Balay   PLogObjectParent(A,mat->icol);
621451c4af8SSatish Balay 
622f830108cSBarry Smith   A->bops  = Abops;
623f830108cSBarry Smith   A->ops   = Aops;
624bef8e0ddSBarry Smith   A->qlist = 0;
62586bacbd2SBarry Smith   A->refct = refct;
626c38d4ed2SBarry Smith   /* copy over the type_name and name */
627c38d4ed2SBarry Smith   ierr     = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr);
628c38d4ed2SBarry Smith   ierr     = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr);
629f830108cSBarry Smith 
6300452661fSBarry Smith   PetscHeaderDestroy(C);
631c38d4ed2SBarry Smith 
6323a40ed3dSBarry Smith   PetscFunctionReturn(0);
633da3a660dSBarry Smith }
6340a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
6355615d1e5SSatish Balay #undef __FUNC__
6365615d1e5SSatish Balay #define __FUNC__ "MatSolve_SeqAIJ"
637416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx)
6388c37ef55SBarry Smith {
639416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
640416022c9SBarry Smith   IS         iscol = a->col, isrow = a->row;
641416022c9SBarry Smith   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
6423b2fbd54SBarry Smith   int        nz,shift = a->indexshift,*rout,*cout;
643416022c9SBarry Smith   Scalar     *x,*b,*tmp, *tmps, *aa = a->a, sum, *v;
6448c37ef55SBarry Smith 
6453a40ed3dSBarry Smith   PetscFunctionBegin;
6463a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
64791bf9adeSBarry Smith 
648e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
649e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
650416022c9SBarry Smith   tmp  = a->solve_work;
6518c37ef55SBarry Smith 
6523b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6533b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6548c37ef55SBarry Smith 
6558c37ef55SBarry Smith   /* forward solve the lower triangular */
6568c37ef55SBarry Smith   tmp[0] = b[*r++];
6570cf60383SBarry Smith   tmps   = tmp + shift;
6588c37ef55SBarry Smith   for ( i=1; i<n; i++ ) {
659dbb450caSBarry Smith     v   = aa + ai[i] + shift;
660dbb450caSBarry Smith     vi  = aj + ai[i] + shift;
661416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
6628c37ef55SBarry Smith     sum = b[*r++];
6630cf60383SBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
6648c37ef55SBarry Smith     tmp[i] = sum;
6658c37ef55SBarry Smith   }
6668c37ef55SBarry Smith 
6678c37ef55SBarry Smith   /* backward solve the upper triangular */
6688c37ef55SBarry Smith   for ( i=n-1; i>=0; i-- ){
669416022c9SBarry Smith     v   = aa + a->diag[i] + (!shift);
670416022c9SBarry Smith     vi  = aj + a->diag[i] + (!shift);
671416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
6728c37ef55SBarry Smith     sum = tmp[i];
6730cf60383SBarry Smith     while (nz--) sum -= *v++ * tmps[*vi++];
674416022c9SBarry Smith     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
6758c37ef55SBarry Smith   }
6768c37ef55SBarry Smith 
6773b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
6783b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
679cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
680cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
681416022c9SBarry Smith   PLogFlops(2*a->nz - a->n);
6823a40ed3dSBarry Smith   PetscFunctionReturn(0);
6838c37ef55SBarry Smith }
684026e39d0SSatish Balay 
685930ae53cSBarry Smith /* ----------------------------------------------------------- */
686930ae53cSBarry Smith #undef __FUNC__
687930ae53cSBarry Smith #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering"
688930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx)
689930ae53cSBarry Smith {
690930ae53cSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
691d85afc42SSatish Balay   int        n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr;
692d85afc42SSatish Balay   Scalar     *x,*b, *aa = a->a, sum;
693aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
694d85afc42SSatish Balay   int        adiag_i,i,*vi,nz,ai_i;
695d85afc42SSatish Balay   Scalar     *v;
696d85afc42SSatish Balay #endif
697930ae53cSBarry Smith 
6983a40ed3dSBarry Smith   PetscFunctionBegin;
6993a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
700930ae53cSBarry Smith   if (a->indexshift) {
7013a40ed3dSBarry Smith      ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr);
7023a40ed3dSBarry Smith      PetscFunctionReturn(0);
703930ae53cSBarry Smith   }
704930ae53cSBarry Smith 
705e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
706e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
707930ae53cSBarry Smith 
708aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
7091c4feecaSSatish Balay   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
7101c4feecaSSatish Balay #else
711930ae53cSBarry Smith   /* forward solve the lower triangular */
712930ae53cSBarry Smith   x[0] = b[0];
713930ae53cSBarry Smith   for ( i=1; i<n; i++ ) {
714930ae53cSBarry Smith     ai_i = ai[i];
715930ae53cSBarry Smith     v    = aa + ai_i;
716930ae53cSBarry Smith     vi   = aj + ai_i;
717930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
718930ae53cSBarry Smith     sum  = b[i];
719930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
720930ae53cSBarry Smith     x[i] = sum;
721930ae53cSBarry Smith   }
722930ae53cSBarry Smith 
723930ae53cSBarry Smith   /* backward solve the upper triangular */
724930ae53cSBarry Smith   for ( i=n-1; i>=0; i-- ){
725930ae53cSBarry Smith     adiag_i = adiag[i];
726d708749eSBarry Smith     v       = aa + adiag_i + 1;
727d708749eSBarry Smith     vi      = aj + adiag_i + 1;
728930ae53cSBarry Smith     nz      = ai[i+1] - adiag_i - 1;
729930ae53cSBarry Smith     sum     = x[i];
730930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
731930ae53cSBarry Smith     x[i]    = sum*aa[adiag_i];
732930ae53cSBarry Smith   }
7331c4feecaSSatish Balay #endif
734930ae53cSBarry Smith   PLogFlops(2*a->nz - a->n);
735cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
736cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7373a40ed3dSBarry Smith   PetscFunctionReturn(0);
738930ae53cSBarry Smith }
739930ae53cSBarry Smith 
7405615d1e5SSatish Balay #undef __FUNC__
7415615d1e5SSatish Balay #define __FUNC__ "MatSolveAdd_SeqAIJ"
742416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx)
743da3a660dSBarry Smith {
744416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
745416022c9SBarry Smith   IS         iscol = a->col, isrow = a->row;
746416022c9SBarry Smith   int        *r,*c, ierr, i,  n = a->m, *vi, *ai = a->i, *aj = a->j;
7473b2fbd54SBarry Smith   int        nz, shift = a->indexshift,*rout,*cout;
748416022c9SBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, sum, *v;
749da3a660dSBarry Smith 
7503a40ed3dSBarry Smith   PetscFunctionBegin;
75178b31e54SBarry Smith   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
752da3a660dSBarry Smith 
753e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
754e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
755416022c9SBarry Smith   tmp  = a->solve_work;
756da3a660dSBarry Smith 
7573b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7583b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
759da3a660dSBarry Smith 
760da3a660dSBarry Smith   /* forward solve the lower triangular */
761da3a660dSBarry Smith   tmp[0] = b[*r++];
762da3a660dSBarry Smith   for ( i=1; i<n; i++ ) {
763dbb450caSBarry Smith     v   = aa + ai[i] + shift;
764dbb450caSBarry Smith     vi  = aj + ai[i] + shift;
765416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
766da3a660dSBarry Smith     sum = b[*r++];
767dbb450caSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
768da3a660dSBarry Smith     tmp[i] = sum;
769da3a660dSBarry Smith   }
770da3a660dSBarry Smith 
771da3a660dSBarry Smith   /* backward solve the upper triangular */
772da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
773416022c9SBarry Smith     v   = aa + a->diag[i] + (!shift);
774416022c9SBarry Smith     vi  = aj + a->diag[i] + (!shift);
775416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
776da3a660dSBarry Smith     sum = tmp[i];
777dbb450caSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
778416022c9SBarry Smith     tmp[i] = sum*aa[a->diag[i]+shift];
779da3a660dSBarry Smith     x[*c--] += tmp[i];
780da3a660dSBarry Smith   }
781da3a660dSBarry Smith 
7823b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
7833b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
784cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
785cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
786416022c9SBarry Smith   PLogFlops(2*a->nz);
787898183ecSLois Curfman McInnes 
7883a40ed3dSBarry Smith   PetscFunctionReturn(0);
789da3a660dSBarry Smith }
790da3a660dSBarry Smith /* -------------------------------------------------------------------*/
7915615d1e5SSatish Balay #undef __FUNC__
7927c922b88SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqAIJ"
7937c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx)
794da3a660dSBarry Smith {
795416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
7962235e667SBarry Smith   IS         iscol = a->col, isrow = a->row;
797416022c9SBarry Smith   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
798f1af5d2fSBarry Smith   int        nz,shift = a->indexshift,*rout,*cout, *diag = a->diag;
799f1af5d2fSBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, *v, s1;
800da3a660dSBarry Smith 
8013a40ed3dSBarry Smith   PetscFunctionBegin;
802e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
803e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
804416022c9SBarry Smith   tmp  = a->solve_work;
805da3a660dSBarry Smith 
8062235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
8072235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
808da3a660dSBarry Smith 
809da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
8102235e667SBarry Smith   for ( i=0; i<n; i++ ) tmp[i] = b[c[i]];
811da3a660dSBarry Smith 
812da3a660dSBarry Smith   /* forward solve the U^T */
813da3a660dSBarry Smith   for ( i=0; i<n; i++ ) {
814f1af5d2fSBarry Smith     v   = aa + diag[i] + shift;
815f1af5d2fSBarry Smith     vi  = aj + diag[i] + (!shift);
816f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
817c38d4ed2SBarry Smith     s1  = tmp[i];
818c38d4ed2SBarry Smith     s1 *= *(v++);  /* multiply by inverse of diagonal entry */
819da3a660dSBarry Smith     while (nz--) {
820f1af5d2fSBarry Smith       tmp[*vi++ + shift] -= (*v++)*s1;
821da3a660dSBarry Smith     }
822c38d4ed2SBarry Smith     tmp[i] = s1;
823da3a660dSBarry Smith   }
824da3a660dSBarry Smith 
825da3a660dSBarry Smith   /* backward solve the L^T */
826da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
827f1af5d2fSBarry Smith     v   = aa + diag[i] - 1 + shift;
828f1af5d2fSBarry Smith     vi  = aj + diag[i] - 1 + shift;
829f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
830f1af5d2fSBarry Smith     s1  = tmp[i];
831da3a660dSBarry Smith     while (nz--) {
832f1af5d2fSBarry Smith       tmp[*vi-- + shift] -= (*v--)*s1;
833da3a660dSBarry Smith     }
834da3a660dSBarry Smith   }
835da3a660dSBarry Smith 
836da3a660dSBarry Smith   /* copy tmp into x according to permutation */
837da3a660dSBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
838da3a660dSBarry Smith 
8392235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8402235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
841cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
842cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
843da3a660dSBarry Smith 
844416022c9SBarry Smith   PLogFlops(2*a->nz-a->n);
8453a40ed3dSBarry Smith   PetscFunctionReturn(0);
846da3a660dSBarry Smith }
847da3a660dSBarry Smith 
8485615d1e5SSatish Balay #undef __FUNC__
8497c922b88SBarry Smith #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ"
8507c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx)
851da3a660dSBarry Smith {
852416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
8532235e667SBarry Smith   IS         iscol = a->col, isrow = a->row;
854416022c9SBarry Smith   int        *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j;
855f1af5d2fSBarry Smith   int        nz,shift = a->indexshift, *rout, *cout, *diag = a->diag;
856416022c9SBarry Smith   Scalar     *x,*b,*tmp, *aa = a->a, *v;
8576abc6512SBarry Smith 
8583a40ed3dSBarry Smith   PetscFunctionBegin;
8596abc6512SBarry Smith   if (zz != xx) VecCopy(zz,xx);
8606abc6512SBarry Smith 
861e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
862e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
863416022c9SBarry Smith   tmp = a->solve_work;
8646abc6512SBarry Smith 
8652235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
8662235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
8676abc6512SBarry Smith 
8686abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
8692235e667SBarry Smith   for ( i=0; i<n; i++ ) tmp[i] = b[c[i]];
8706abc6512SBarry Smith 
8716abc6512SBarry Smith   /* forward solve the U^T */
8726abc6512SBarry Smith   for ( i=0; i<n; i++ ) {
873f1af5d2fSBarry Smith     v   = aa + diag[i] + shift;
874f1af5d2fSBarry Smith     vi  = aj + diag[i] + (!shift);
875f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
8766abc6512SBarry Smith     tmp[i] *= *v++;
8776abc6512SBarry Smith     while (nz--) {
878dbb450caSBarry Smith       tmp[*vi++ + shift] -= (*v++)*tmp[i];
8796abc6512SBarry Smith     }
8806abc6512SBarry Smith   }
8816abc6512SBarry Smith 
8826abc6512SBarry Smith   /* backward solve the L^T */
8836abc6512SBarry Smith   for ( i=n-1; i>=0; i-- ){
884f1af5d2fSBarry Smith     v   = aa + diag[i] - 1 + shift;
885f1af5d2fSBarry Smith     vi  = aj + diag[i] - 1 + shift;
886f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
8876abc6512SBarry Smith     while (nz--) {
888dbb450caSBarry Smith       tmp[*vi-- + shift] -= (*v--)*tmp[i];
8896abc6512SBarry Smith     }
8906abc6512SBarry Smith   }
8916abc6512SBarry Smith 
8926abc6512SBarry Smith   /* copy tmp into x according to permutation */
8936abc6512SBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
8946abc6512SBarry Smith 
8952235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8962235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
897cb5b572fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
898cb5b572fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8996abc6512SBarry Smith 
900416022c9SBarry Smith   PLogFlops(2*a->nz);
9013a40ed3dSBarry Smith   PetscFunctionReturn(0);
902da3a660dSBarry Smith }
9039e25ed09SBarry Smith /* ----------------------------------------------------------------*/
9047c922b88SBarry Smith extern int MatMissingDiagonal_SeqAIJ(Mat);
90508480c60SBarry Smith 
9065615d1e5SSatish Balay #undef __FUNC__
9075615d1e5SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ"
9086cf997b0SBarry Smith int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
9099e25ed09SBarry Smith {
910416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b;
9119e25ed09SBarry Smith   IS         isicol;
912416022c9SBarry Smith   int        *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j;
91356beaf04SBarry Smith   int        *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
914335d9088SBarry Smith   int        *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
9156cf997b0SBarry Smith   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
91677c4ece6SBarry Smith   PetscTruth col_identity, row_identity;
9176cf997b0SBarry Smith   double     f;
9189e25ed09SBarry Smith 
9193a40ed3dSBarry Smith   PetscFunctionBegin;
9206cf997b0SBarry Smith   if (info) {
9216cf997b0SBarry Smith     f             = info->fill;
9220cd17293SBarry Smith     levels        = (int) info->levels;
9230cd17293SBarry Smith     diagonal_fill = (int) info->diagonal_fill;
9246cf997b0SBarry Smith   } else {
9256cf997b0SBarry Smith     f             = 1.0;
9266cf997b0SBarry Smith     levels        = 0;
9276cf997b0SBarry Smith     diagonal_fill = 0;
9286cf997b0SBarry Smith   }
92982bf6240SBarry Smith   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
93082bf6240SBarry Smith 
93108480c60SBarry Smith   /* special case that simply copies fill pattern */
932be0abb6dSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
933be0abb6dSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
93486bacbd2SBarry Smith   if (!levels && row_identity && col_identity) {
9352e8a6d31SBarry Smith     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
93608480c60SBarry Smith     (*fact)->factor = FACTOR_LU;
93708480c60SBarry Smith     b               = (Mat_SeqAIJ *) (*fact)->data;
93808480c60SBarry Smith     if (!b->diag) {
9397c922b88SBarry Smith       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
94008480c60SBarry Smith     }
9417c922b88SBarry Smith     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
94208480c60SBarry Smith     b->row              = isrow;
94308480c60SBarry Smith     b->col              = iscol;
94482bf6240SBarry Smith     b->icol             = isicol;
9450452661fSBarry Smith     b->solve_work       = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
946f830108cSBarry Smith     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
947c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
948c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
9493a40ed3dSBarry Smith     PetscFunctionReturn(0);
95008480c60SBarry Smith   }
95108480c60SBarry Smith 
952898183ecSLois Curfman McInnes   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
953898183ecSLois Curfman McInnes   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
9549e25ed09SBarry Smith 
9559e25ed09SBarry Smith   /* get new row pointers */
9560452661fSBarry Smith   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
957dbb450caSBarry Smith   ainew[0] = -shift;
9589e25ed09SBarry Smith   /* don't know how many column pointers are needed so estimate */
959dbb450caSBarry Smith   jmax = (int) (f*(ai[n]+!shift));
9600452661fSBarry Smith   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
9619e25ed09SBarry Smith   /* ajfill is level of fill for each fill entry */
9620452661fSBarry Smith   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill);
9639e25ed09SBarry Smith   /* fill is a linked list of nonzeros in active row */
9640452661fSBarry Smith   fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill);
96556beaf04SBarry Smith   /* im is level for each filled value */
9660452661fSBarry Smith   im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im);
96756beaf04SBarry Smith   /* dloc is location of diagonal in factor */
9680452661fSBarry Smith   dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc);
96956beaf04SBarry Smith   dloc[0]  = 0;
97056beaf04SBarry Smith   for ( prow=0; prow<n; prow++ ) {
9716cf997b0SBarry Smith 
9726cf997b0SBarry Smith     /* copy current row into linked list */
97356beaf04SBarry Smith     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
974385f7a74SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
975dbb450caSBarry Smith     xi      = aj + ai[r[prow]] + shift;
9769e25ed09SBarry Smith     fill[n]    = n;
977435faa5fSBarry Smith     fill[prow] = -1; /* marker to indicate if diagonal exists */
9789e25ed09SBarry Smith     while (nz--) {
9799e25ed09SBarry Smith       fm  = n;
980dbb450caSBarry Smith       idx = ic[*xi++ + shift];
9819e25ed09SBarry Smith       do {
9829e25ed09SBarry Smith         m  = fm;
9839e25ed09SBarry Smith         fm = fill[m];
9849e25ed09SBarry Smith       } while (fm < idx);
9859e25ed09SBarry Smith       fill[m]   = idx;
9869e25ed09SBarry Smith       fill[idx] = fm;
98756beaf04SBarry Smith       im[idx]   = 0;
9889e25ed09SBarry Smith     }
9896cf997b0SBarry Smith 
9906cf997b0SBarry Smith     /* make sure diagonal entry is included */
991435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
9926cf997b0SBarry Smith       fm = n;
993435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
994435faa5fSBarry Smith       fill[prow] = fill[fm]; /* insert diagonal into linked list */
995435faa5fSBarry Smith       fill[fm]   = prow;
9966cf997b0SBarry Smith       im[prow]   = 0;
9976cf997b0SBarry Smith       nzf++;
998335d9088SBarry Smith       dcount++;
9996cf997b0SBarry Smith     }
10006cf997b0SBarry Smith 
100156beaf04SBarry Smith     nzi = 0;
10029e25ed09SBarry Smith     row = fill[n];
100356beaf04SBarry Smith     while ( row < prow ) {
100456beaf04SBarry Smith       incrlev = im[row] + 1;
100556beaf04SBarry Smith       nz      = dloc[row];
10066cf997b0SBarry Smith       xi      = ajnew  + ainew[row] + shift + nz + 1;
1007dbb450caSBarry Smith       flev    = ajfill + ainew[row] + shift + nz + 1;
1008416022c9SBarry Smith       nnz     = ainew[row+1] - ainew[row] - nz - 1;
100956beaf04SBarry Smith       fm      = row;
101056beaf04SBarry Smith       while (nnz-- > 0) {
1011dbb450caSBarry Smith         idx = *xi++ + shift;
101256beaf04SBarry Smith         if (*flev + incrlev > levels) {
101356beaf04SBarry Smith           flev++;
101456beaf04SBarry Smith           continue;
101556beaf04SBarry Smith         }
101656beaf04SBarry Smith         do {
10179e25ed09SBarry Smith           m  = fm;
10189e25ed09SBarry Smith           fm = fill[m];
101956beaf04SBarry Smith         } while (fm < idx);
10209e25ed09SBarry Smith         if (fm != idx) {
102156beaf04SBarry Smith           im[idx]   = *flev + incrlev;
10229e25ed09SBarry Smith           fill[m]   = idx;
10239e25ed09SBarry Smith           fill[idx] = fm;
10249e25ed09SBarry Smith           fm        = idx;
102556beaf04SBarry Smith           nzf++;
1026ecf371e4SBarry Smith         } else {
102756beaf04SBarry Smith           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
10289e25ed09SBarry Smith         }
102956beaf04SBarry Smith         flev++;
10309e25ed09SBarry Smith       }
10319e25ed09SBarry Smith       row = fill[row];
103256beaf04SBarry Smith       nzi++;
10339e25ed09SBarry Smith     }
10349e25ed09SBarry Smith     /* copy new filled row into permanent storage */
103556beaf04SBarry Smith     ainew[prow+1] = ainew[prow] + nzf;
1036d7e8b826SBarry Smith     if (ainew[prow+1] > jmax-shift) {
1037ecf371e4SBarry Smith 
1038ecf371e4SBarry Smith       /* estimate how much additional space we will need */
1039ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1040ecf371e4SBarry Smith       /* just double the memory each time */
1041ecf371e4SBarry Smith       /*  maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */
1042ecf371e4SBarry Smith       int maxadd = jmax;
104356beaf04SBarry Smith       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1044bbb6d6a8SBarry Smith       jmax += maxadd;
1045ecf371e4SBarry Smith 
1046ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
10470452661fSBarry Smith       xi     = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1048549d3d68SSatish Balay       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1049606d414cSSatish Balay       ierr = PetscFree(ajnew);CHKERRQ(ierr);
105056beaf04SBarry Smith       ajnew  = xi;
10510452661fSBarry Smith       xi     = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1052549d3d68SSatish Balay       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr);
1053606d414cSSatish Balay       ierr = PetscFree(ajfill);CHKERRQ(ierr);
105456beaf04SBarry Smith       ajfill = xi;
1055ecf371e4SBarry Smith       realloc++; /* count how many times we realloc */
10569e25ed09SBarry Smith     }
1057dbb450caSBarry Smith     xi          = ajnew + ainew[prow] + shift;
1058dbb450caSBarry Smith     flev        = ajfill + ainew[prow] + shift;
105956beaf04SBarry Smith     dloc[prow]  = nzi;
10609e25ed09SBarry Smith     fm          = fill[n];
106156beaf04SBarry Smith     while (nzf--) {
1062dbb450caSBarry Smith       *xi++   = fm - shift;
106356beaf04SBarry Smith       *flev++ = im[fm];
10649e25ed09SBarry Smith       fm      = fill[fm];
10659e25ed09SBarry Smith     }
10666cf997b0SBarry Smith     /* make sure row has diagonal entry */
10676cf997b0SBarry Smith     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
10686cf997b0SBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
10696cf997b0SBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
10706cf997b0SBarry Smith     }
10719e25ed09SBarry Smith   }
1072606d414cSSatish Balay   ierr = PetscFree(ajfill); CHKERRQ(ierr);
1073898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1074898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1075606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
1076606d414cSSatish Balay   ierr = PetscFree(im);CHKERRQ(ierr);
10779e25ed09SBarry Smith 
1078f64065bbSBarry Smith   {
1079464e8d44SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
1080c38d4ed2SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1081981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
1082981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
1083981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1084335d9088SBarry Smith     if (diagonal_fill) {
1085335d9088SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount);
1086335d9088SBarry Smith     }
1087f64065bbSBarry Smith   }
1088bbb6d6a8SBarry Smith 
10899e25ed09SBarry Smith   /* put together the new matrix */
1090b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1091fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
1092416022c9SBarry Smith   b = (Mat_SeqAIJ *) (*fact)->data;
1093606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
10947c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
1095dbb450caSBarry Smith   len = (ainew[n] + shift)*sizeof(Scalar);
10969e25ed09SBarry Smith   /* the next line frees the default space generated by the Create() */
1097606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
1098606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
10996b96ef82SSatish Balay   b->a          = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a);
1100416022c9SBarry Smith   b->j          = ajnew;
1101416022c9SBarry Smith   b->i          = ainew;
110256beaf04SBarry Smith   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
1103416022c9SBarry Smith   b->diag       = dloc;
1104416022c9SBarry Smith   b->ilen       = 0;
1105416022c9SBarry Smith   b->imax       = 0;
1106416022c9SBarry Smith   b->row        = isrow;
1107416022c9SBarry Smith   b->col        = iscol;
1108c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1109c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
111082bf6240SBarry Smith   b->icol       = isicol;
111182bf6240SBarry Smith   b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work);
1112416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
111356beaf04SBarry Smith      Allocate dloc, solve_work, new a, new j */
1114dbb450caSBarry Smith   PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
1115416022c9SBarry Smith   b->maxnz          = b->nz = ainew[n] + shift;
11169e25ed09SBarry Smith   (*fact)->factor   = FACTOR_LU;
1117ae068f58SLois Curfman McInnes 
1118ae068f58SLois Curfman McInnes   (*fact)->info.factor_mallocs    = realloc;
1119ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_given  = f;
1120ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
1121e93ccc4dSBarry Smith   (*fact)->factor                 =  FACTOR_LU;;
1122ae068f58SLois Curfman McInnes 
11233a40ed3dSBarry Smith   PetscFunctionReturn(0);
11249e25ed09SBarry Smith }
1125d5d45c9bSBarry Smith 
1126d5d45c9bSBarry Smith 
1127d5d45c9bSBarry Smith 
1128d5d45c9bSBarry Smith 
1129