xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision b2da817d11b8f98ec4a15fcfa65f2fa8cfedc826)
173f4d377SMatthew Knepley /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/
2289bc588SBarry Smith 
370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h"
41c4feecaSSatish Balay #include "src/inline/dot.h"
5ed480e8bSBarry Smith #include "src/inline/spops.h"
63b2fbd54SBarry Smith 
74a2ae208SSatish Balay #undef __FUNCT__
84a2ae208SSatish Balay #define __FUNCT__ "MatOrdering_Flow_SeqAIJ"
9234db7c0SBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
103b2fbd54SBarry Smith {
113a40ed3dSBarry Smith   PetscFunctionBegin;
123a40ed3dSBarry Smith 
1329bbc08cSBarry Smith   SETERRQ(PETSC_ERR_SUP,"Code not written");
14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG)
15d64ed03dSBarry Smith   PetscFunctionReturn(0);
16d64ed03dSBarry Smith #endif
173b2fbd54SBarry Smith }
183b2fbd54SBarry Smith 
1986bacbd2SBarry Smith 
20ca44d042SBarry Smith EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
213a7fca6bSBarry Smith EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
2286bacbd2SBarry Smith 
2387828ca2SBarry Smith EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
2487828ca2SBarry Smith EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
2587828ca2SBarry Smith EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
2686bacbd2SBarry Smith 
274a2ae208SSatish Balay #undef __FUNCT__
284a2ae208SSatish Balay #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
2986bacbd2SBarry Smith   /* ------------------------------------------------------------
3086bacbd2SBarry Smith 
3186bacbd2SBarry Smith           This interface was contribed by Tony Caola
3286bacbd2SBarry Smith 
3386bacbd2SBarry Smith      This routine is an interface to the pivoting drop-tolerance
345bd2ddc7SBarry Smith      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
355bd2ddc7SBarry Smith      SPARSEKIT2.
365bd2ddc7SBarry Smith 
375bd2ddc7SBarry Smith      The SPARSEKIT2 routines used here are covered by the GNU
385bd2ddc7SBarry Smith      copyright; see the file gnu in this directory.
3986bacbd2SBarry Smith 
4086bacbd2SBarry Smith      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
4186bacbd2SBarry Smith      help in getting this routine ironed out.
4286bacbd2SBarry Smith 
435bd2ddc7SBarry Smith      The major drawback to this routine is that if info->fill is
445bd2ddc7SBarry Smith      not large enough it fails rather than allocating more space;
455bd2ddc7SBarry Smith      this can be fixed by hacking/improving the f2c version of
465bd2ddc7SBarry Smith      Yousef Saad's code.
4786bacbd2SBarry Smith 
4886bacbd2SBarry Smith      ------------------------------------------------------------
4986bacbd2SBarry Smith */
50b380c88cSHong Zhang int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
5186bacbd2SBarry Smith {
5260ec86bdSBarry Smith #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
5360ec86bdSBarry Smith   PetscFunctionBegin;
5460ec86bdSBarry Smith   SETERRQ(1,"This distribution does not include GNU Copyright code\n\
5560ec86bdSBarry Smith   You can obtain the drop tolerance routines by installing PETSc from\n\
5660ec86bdSBarry Smith   www.mcs.anl.gov/petsc\n");
5760ec86bdSBarry Smith #else
5886bacbd2SBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b;
5907d2056aSBarry Smith   IS           iscolf,isicol,isirow;
6086bacbd2SBarry Smith   PetscTruth   reorder;
61273d9f13SBarry Smith   int          *c,*r,*ic,ierr,i,n = A->m;
62329f5518SBarry Smith   int          *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63313ae024SBarry Smith   int          *ordcol,*iwk,*iperm,*jw;
64b19713cbSBarry Smith   int          jmax,lfill,job,*o_i,*o_j;
6587828ca2SBarry Smith   PetscScalar  *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66329f5518SBarry Smith   PetscReal    permtol,af;
6786bacbd2SBarry Smith 
6886bacbd2SBarry Smith   PetscFunctionBegin;
6986bacbd2SBarry Smith 
7086bacbd2SBarry Smith   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
7186bacbd2SBarry Smith   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
7286bacbd2SBarry Smith   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
7333259db3SBarry Smith   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
746faa4630SBarry Smith   lfill   = (int)(info->dtcount/2.0);
756faa4630SBarry Smith   jmax    = (int)(info->fill*a->nz);
7686bacbd2SBarry Smith   permtol = info->dtcol;
7786bacbd2SBarry Smith 
7886bacbd2SBarry Smith 
7986bacbd2SBarry Smith   /* ------------------------------------------------------------
8086bacbd2SBarry Smith      If reorder=.TRUE., then the original matrix has to be
8186bacbd2SBarry Smith      reordered to reflect the user selected ordering scheme, and
8286bacbd2SBarry Smith      then de-reordered so it is in it's original format.
8386bacbd2SBarry Smith      Because Saad's dperm() is NOT in place, we have to copy
8486bacbd2SBarry Smith      the original matrix and allocate more storage. . .
8586bacbd2SBarry Smith      ------------------------------------------------------------
8686bacbd2SBarry Smith   */
8786bacbd2SBarry Smith 
8886bacbd2SBarry Smith   /* set reorder to true if either isrow or iscol is not identity */
8986bacbd2SBarry Smith   ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr);
9086bacbd2SBarry Smith   if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);}
9186bacbd2SBarry Smith   reorder = PetscNot(reorder);
9286bacbd2SBarry Smith 
9386bacbd2SBarry Smith 
9486bacbd2SBarry Smith   /* storage for ilu factor */
95b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr);
96b0a32e0cSBarry Smith   ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr);
9787828ca2SBarry Smith   ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr);
98b0a32e0cSBarry Smith   ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr);
9986bacbd2SBarry Smith 
10086bacbd2SBarry Smith   /* ------------------------------------------------------------
10186bacbd2SBarry Smith      Make sure that everything is Fortran formatted (1-Based)
10286bacbd2SBarry Smith      ------------------------------------------------------------
10386bacbd2SBarry Smith   */
104b19713cbSBarry Smith   for (i=old_i[0];i<old_i[n];i++) {
105b19713cbSBarry Smith     old_j[i]++;
10686bacbd2SBarry Smith   }
107b19713cbSBarry Smith   for(i=0;i<n+1;i++) {
108b19713cbSBarry Smith     old_i[i]++;
109b19713cbSBarry Smith   };
110010a20caSHong Zhang 
11186bacbd2SBarry Smith 
11286bacbd2SBarry Smith   if (reorder) {
113c0c2c81eSBarry Smith     ierr = ISGetIndices(iscol,&c);           CHKERRQ(ierr);
114c0c2c81eSBarry Smith     ierr = ISGetIndices(isrow,&r);           CHKERRQ(ierr);
115c0c2c81eSBarry Smith     for(i=0;i<n;i++) {
116c0c2c81eSBarry Smith       r[i]  = r[i]+1;
117c0c2c81eSBarry Smith       c[i]  = c[i]+1;
118c0c2c81eSBarry Smith     }
119b0a32e0cSBarry Smith     ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr);
120b0a32e0cSBarry Smith     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr);
12187828ca2SBarry Smith     ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr);
1225bd2ddc7SBarry Smith     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
123c0c2c81eSBarry Smith     for (i=0;i<n;i++) {
124c0c2c81eSBarry Smith       r[i]  = r[i]-1;
125c0c2c81eSBarry Smith       c[i]  = c[i]-1;
126c0c2c81eSBarry Smith     }
127c0c2c81eSBarry Smith     ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
128c0c2c81eSBarry Smith     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
129b19713cbSBarry Smith     o_a = old_a2;
130b19713cbSBarry Smith     o_j = old_j2;
131b19713cbSBarry Smith     o_i = old_i2;
132b19713cbSBarry Smith   } else {
133b19713cbSBarry Smith     o_a = old_a;
134b19713cbSBarry Smith     o_j = old_j;
135b19713cbSBarry Smith     o_i = old_i;
13686bacbd2SBarry Smith   }
13786bacbd2SBarry Smith 
13886bacbd2SBarry Smith   /* ------------------------------------------------------------
13986bacbd2SBarry Smith      Call Saad's ilutp() routine to generate the factorization
14086bacbd2SBarry Smith      ------------------------------------------------------------
14186bacbd2SBarry Smith   */
14286bacbd2SBarry Smith 
143b0a32e0cSBarry Smith   ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr);
144b0a32e0cSBarry Smith   ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr);
14587828ca2SBarry Smith   ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr);
14686bacbd2SBarry Smith 
14787828ca2SBarry Smith   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
14886bacbd2SBarry Smith   if (ierr) {
14986bacbd2SBarry Smith     switch (ierr) {
15029bbc08cSBarry Smith       case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
15129bbc08cSBarry Smith       case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
15229bbc08cSBarry Smith       case -5: SETERRQ(1,"ilutp(), zero row encountered");
15329bbc08cSBarry Smith       case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
15429bbc08cSBarry Smith       case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
15529bbc08cSBarry Smith       default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
15686bacbd2SBarry Smith     }
15786bacbd2SBarry Smith   }
15886bacbd2SBarry Smith 
15986bacbd2SBarry Smith   ierr = PetscFree(w);CHKERRQ(ierr);
16086bacbd2SBarry Smith   ierr = PetscFree(jw);CHKERRQ(ierr);
16186bacbd2SBarry Smith 
16286bacbd2SBarry Smith   /* ------------------------------------------------------------
16386bacbd2SBarry Smith      Saad's routine gives the result in Modified Sparse Row (msr)
16486bacbd2SBarry Smith      Convert to Compressed Sparse Row format (csr)
16586bacbd2SBarry Smith      ------------------------------------------------------------
16686bacbd2SBarry Smith   */
16786bacbd2SBarry Smith 
16887828ca2SBarry Smith   ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr);
169b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr);
17086bacbd2SBarry Smith 
1715bd2ddc7SBarry Smith   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
17286bacbd2SBarry Smith 
17386bacbd2SBarry Smith   ierr = PetscFree(iwk);CHKERRQ(ierr);
17486bacbd2SBarry Smith   ierr = PetscFree(wk);CHKERRQ(ierr);
17586bacbd2SBarry Smith 
17686bacbd2SBarry Smith   if (reorder) {
17786bacbd2SBarry Smith     ierr = PetscFree(old_a2);CHKERRQ(ierr);
17886bacbd2SBarry Smith     ierr = PetscFree(old_j2);CHKERRQ(ierr);
17986bacbd2SBarry Smith     ierr = PetscFree(old_i2);CHKERRQ(ierr);
180313ae024SBarry Smith   } else {
181b19713cbSBarry Smith     /* fix permutation of old_j that the factorization introduced */
182f170005cSBarry Smith     for (i=old_i[0]; i<old_i[n]; i++) {
183b19713cbSBarry Smith       old_j[i-1] = iperm[old_j[i-1]-1];
184b19713cbSBarry Smith     }
185b19713cbSBarry Smith   }
186b19713cbSBarry Smith 
187b801d0f9SBarry Smith   /* get rid of the shift to indices starting at 1 */
18886bacbd2SBarry Smith   for (i=0; i<n+1; i++) {
189b19713cbSBarry Smith     old_i[i]--;
190b19713cbSBarry Smith   }
191b19713cbSBarry Smith   for (i=old_i[0];i<old_i[n];i++) {
192b19713cbSBarry Smith     old_j[i]--;
193b19713cbSBarry Smith   }
19486bacbd2SBarry Smith 
195b19713cbSBarry Smith   /* Make the factored matrix 0-based */
19686bacbd2SBarry Smith   for (i=0; i<n+1; i++) {
197b19713cbSBarry Smith     new_i[i]--;
198b19713cbSBarry Smith   }
199b19713cbSBarry Smith   for (i=new_i[0];i<new_i[n];i++) {
200b19713cbSBarry Smith     new_j[i]--;
201b19713cbSBarry Smith   }
20286bacbd2SBarry Smith 
20386bacbd2SBarry Smith   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
20486bacbd2SBarry Smith   /*-- permute the right-hand-side and solution vectors           --*/
2054c49b128SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
2064c49b128SBarry Smith   ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr);
207c0c2c81eSBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
20886bacbd2SBarry Smith   for(i=0; i<n; i++) {
20986bacbd2SBarry Smith     ordcol[i] = ic[iperm[i]-1];
21086bacbd2SBarry Smith   };
21186bacbd2SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
21207d2056aSBarry Smith   ierr = ISDestroy(isicol);CHKERRQ(ierr);
21386bacbd2SBarry Smith 
214c0c2c81eSBarry Smith   ierr = PetscFree(iperm);CHKERRQ(ierr);
215c0c2c81eSBarry Smith 
216329f5518SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr);
21707d2056aSBarry Smith   ierr = PetscFree(ordcol);CHKERRQ(ierr);
21886bacbd2SBarry Smith 
21986bacbd2SBarry Smith   /*----- put together the new matrix -----*/
22086bacbd2SBarry Smith 
22186bacbd2SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
22286bacbd2SBarry Smith   (*fact)->factor    = FACTOR_LU;
22386bacbd2SBarry Smith   (*fact)->assembled = PETSC_TRUE;
22486bacbd2SBarry Smith 
22586bacbd2SBarry Smith   b = (Mat_SeqAIJ*)(*fact)->data;
22686bacbd2SBarry Smith   ierr = PetscFree(b->imax);CHKERRQ(ierr);
22786bacbd2SBarry Smith   b->sorted        = PETSC_FALSE;
22807d2056aSBarry Smith   b->singlemalloc  = PETSC_FALSE;
229b19713cbSBarry Smith   /* the next line frees the default space generated by the MatCreate() */
23086bacbd2SBarry Smith   ierr             = PetscFree(b->a);CHKERRQ(ierr);
23186bacbd2SBarry Smith   ierr             = PetscFree(b->ilen);CHKERRQ(ierr);
23286bacbd2SBarry Smith   b->a             = new_a;
23386bacbd2SBarry Smith   b->j             = new_j;
23486bacbd2SBarry Smith   b->i             = new_i;
23586bacbd2SBarry Smith   b->ilen          = 0;
23686bacbd2SBarry Smith   b->imax          = 0;
2371f9e874aSBarry Smith   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
238313ae024SBarry Smith   b->row           = isirow;
23986bacbd2SBarry Smith   b->col           = iscolf;
24087828ca2SBarry Smith   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
24186bacbd2SBarry Smith   b->maxnz = b->nz = new_i[n];
24286bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
24386bacbd2SBarry Smith   (*fact)->info.factor_mallocs = 0;
24486bacbd2SBarry Smith 
24586bacbd2SBarry Smith   ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr);
24686bacbd2SBarry Smith 
24786bacbd2SBarry Smith   /* check out for identical nodes. If found, use inode functions */
2483a7fca6bSBarry Smith   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
24986bacbd2SBarry Smith 
250431e710aSBarry Smith   af = ((double)b->nz)/((double)a->nz) + .001;
251b0a32e0cSBarry Smith   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
252b0a32e0cSBarry Smith   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
253b0a32e0cSBarry Smith   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
254b0a32e0cSBarry Smith   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
255431e710aSBarry Smith 
25686bacbd2SBarry Smith   PetscFunctionReturn(0);
25760ec86bdSBarry Smith #endif
25886bacbd2SBarry Smith }
25986bacbd2SBarry Smith 
260289bc588SBarry Smith /*
261289bc588SBarry Smith     Factorization code for AIJ format.
262289bc588SBarry Smith */
2634a2ae208SSatish Balay #undef __FUNCT__
264b9617806SBarry Smith #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
265b380c88cSHong Zhang int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
266289bc588SBarry Smith {
267416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
268289bc588SBarry Smith   IS         isicol;
269273d9f13SBarry Smith   int        *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
270010a20caSHong Zhang   int        *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
27191bf9adeSBarry Smith   int        *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
2729e046878SBarry Smith   PetscReal  f;
273289bc588SBarry Smith 
2743a40ed3dSBarry Smith   PetscFunctionBegin;
27529bbc08cSBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
2764c49b128SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
277ac121b3dSBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
278ac121b3dSBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
279289bc588SBarry Smith 
280289bc588SBarry Smith   /* get new row pointers */
281b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr);
282010a20caSHong Zhang   ainew[0] = 0;
283289bc588SBarry Smith   /* don't know how many column pointers are needed so estimate */
284d3d32019SBarry Smith   f = info->fill;
285010a20caSHong Zhang   jmax  = (int)(f*ai[n]+1);
286b0a32e0cSBarry Smith   ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr);
287289bc588SBarry Smith   /* fill is a linked list of nonzeros in active row */
288b0a32e0cSBarry Smith   ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr);
2892fb3ed76SBarry Smith   im   = fill + n + 1;
290289bc588SBarry Smith   /* idnew is location of diagonal in factor */
291b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr);
292010a20caSHong Zhang   idnew[0] = 0;
293289bc588SBarry Smith 
294289bc588SBarry Smith   for (i=0; i<n; i++) {
295289bc588SBarry Smith     /* first copy previous fill into linked list */
296289bc588SBarry Smith     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
29729bbc08cSBarry Smith     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
298010a20caSHong Zhang     ajtmp   = aj + ai[r[i]];
299289bc588SBarry Smith     fill[n] = n;
300289bc588SBarry Smith     while (nz--) {
301289bc588SBarry Smith       fm  = n;
302010a20caSHong Zhang       idx = ic[*ajtmp++];
303289bc588SBarry Smith       do {
304289bc588SBarry Smith         m  = fm;
305289bc588SBarry Smith         fm = fill[m];
306289bc588SBarry Smith       } while (fm < idx);
307289bc588SBarry Smith       fill[m]   = idx;
308289bc588SBarry Smith       fill[idx] = fm;
309289bc588SBarry Smith     }
310289bc588SBarry Smith     row = fill[n];
311289bc588SBarry Smith     while (row < i) {
312010a20caSHong Zhang       ajtmp = ajnew + idnew[row] + 1;
3132fb3ed76SBarry Smith       nzbd  = 1 + idnew[row] - ainew[row];
3142fb3ed76SBarry Smith       nz    = im[row] - nzbd;
315289bc588SBarry Smith       fm    = row;
3162fb3ed76SBarry Smith       while (nz-- > 0) {
317010a20caSHong Zhang         idx = *ajtmp++ ;
3182fb3ed76SBarry Smith         nzbd++;
3192fb3ed76SBarry Smith         if (idx == i) im[row] = nzbd;
320289bc588SBarry Smith         do {
321289bc588SBarry Smith           m  = fm;
322289bc588SBarry Smith           fm = fill[m];
323289bc588SBarry Smith         } while (fm < idx);
324289bc588SBarry Smith         if (fm != idx) {
325289bc588SBarry Smith           fill[m]   = idx;
326289bc588SBarry Smith           fill[idx] = fm;
327289bc588SBarry Smith           fm        = idx;
328289bc588SBarry Smith           nnz++;
329289bc588SBarry Smith         }
330289bc588SBarry Smith       }
331289bc588SBarry Smith       row = fill[row];
332289bc588SBarry Smith     }
333289bc588SBarry Smith     /* copy new filled row into permanent storage */
334289bc588SBarry Smith     ainew[i+1] = ainew[i] + nnz;
335496e697eSBarry Smith     if (ainew[i+1] > jmax) {
336ecf371e4SBarry Smith 
337ecf371e4SBarry Smith       /* estimate how much additional space we will need */
338ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
339ecf371e4SBarry Smith       /* just double the memory each time */
340ecf371e4SBarry Smith       int maxadd = jmax;
341ecf371e4SBarry Smith       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
342bbb6d6a8SBarry Smith       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
3432fb3ed76SBarry Smith       jmax += maxadd;
344ecf371e4SBarry Smith 
345ecf371e4SBarry Smith       /* allocate a longer ajnew */
346b0a32e0cSBarry Smith       ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr);
347010a20caSHong Zhang       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr);
348606d414cSSatish Balay       ierr  = PetscFree(ajnew);CHKERRQ(ierr);
349289bc588SBarry Smith       ajnew = ajtmp;
3502fb3ed76SBarry Smith       realloc++; /* count how many times we realloc */
351289bc588SBarry Smith     }
352010a20caSHong Zhang     ajtmp = ajnew + ainew[i];
353289bc588SBarry Smith     fm    = fill[n];
354289bc588SBarry Smith     nzi   = 0;
3552fb3ed76SBarry Smith     im[i] = nnz;
356289bc588SBarry Smith     while (nnz--) {
357289bc588SBarry Smith       if (fm < i) nzi++;
358010a20caSHong Zhang       *ajtmp++ = fm ;
359289bc588SBarry Smith       fm       = fill[fm];
360289bc588SBarry Smith     }
361289bc588SBarry Smith     idnew[i] = ainew[i] + nzi;
362289bc588SBarry Smith   }
363464e8d44SSatish Balay   if (ai[n] != 0) {
364329f5518SBarry Smith     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
365b0a32e0cSBarry Smith     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
366b0a32e0cSBarry Smith     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
367b0a32e0cSBarry Smith     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
368b0a32e0cSBarry Smith     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
36905bf559cSSatish Balay   } else {
370b0a32e0cSBarry Smith     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
37105bf559cSSatish Balay   }
3722fb3ed76SBarry Smith 
373898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
374898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3751987afe7SBarry Smith 
376606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
377289bc588SBarry Smith 
378289bc588SBarry Smith   /* put together the new matrix */
379b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr);
380b0a32e0cSBarry Smith   PetscLogObjectParent(*B,isicol);
381416022c9SBarry Smith   b = (Mat_SeqAIJ*)(*B)->data;
382606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
3837c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
384e8d4e0b9SBarry Smith   /* the next line frees the default space generated by the Create() */
385606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
386606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
387010a20caSHong Zhang   ierr          = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
388416022c9SBarry Smith   b->j          = ajnew;
389416022c9SBarry Smith   b->i          = ainew;
390416022c9SBarry Smith   b->diag       = idnew;
391416022c9SBarry Smith   b->ilen       = 0;
392416022c9SBarry Smith   b->imax       = 0;
393416022c9SBarry Smith   b->row        = isrow;
394416022c9SBarry Smith   b->col        = iscol;
395085256b3SBarry Smith   b->lu_damping        = info->damping;
39687828ca2SBarry Smith   b->lu_zeropivot      = info->zeropivot;
3972cea7109SBarry Smith   b->lu_shift          = info->shift;
3982cea7109SBarry Smith   b->lu_shift_fraction = info->shift_fraction;
399c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
400c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
40182bf6240SBarry Smith   b->icol       = isicol;
40287828ca2SBarry Smith   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
403416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
4047fd98bd6SLois Curfman McInnes      Allocate idnew, solve_work, new a, new j */
405010a20caSHong Zhang   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar)));
406010a20caSHong Zhang   b->maxnz = b->nz = ainew[n] ;
4077fd98bd6SLois Curfman McInnes 
408329f5518SBarry Smith   (*B)->factor                 =  FACTOR_LU;
409ae068f58SLois Curfman McInnes   (*B)->info.factor_mallocs    = realloc;
410ae068f58SLois Curfman McInnes   (*B)->info.fill_ratio_given  = f;
4113a7fca6bSBarry Smith   ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr);
4127dda7e10SBarry Smith   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
413703deb49SSatish Balay 
414e93ccc4dSBarry Smith   if (ai[n] != 0) {
415329f5518SBarry Smith     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
416eea2913aSSatish Balay   } else {
417eea2913aSSatish Balay     (*B)->info.fill_ratio_needed = 0.0;
418eea2913aSSatish Balay   }
4193a40ed3dSBarry Smith   PetscFunctionReturn(0);
420289bc588SBarry Smith }
4210a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
4223a7fca6bSBarry Smith EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
42341c01911SSatish Balay 
4244a2ae208SSatish Balay #undef __FUNCT__
4254a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
426416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
427289bc588SBarry Smith {
428416022c9SBarry Smith   Mat          C = *B;
429416022c9SBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
43082bf6240SBarry Smith   IS           isrow = b->row,isicol = b->icol;
431273d9f13SBarry Smith   int          *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
432010a20caSHong Zhang   int          *ajtmpold,*ajtmp,nz,row,*ics;
4330cf777b8SBarry Smith   int          *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
43487828ca2SBarry Smith   PetscScalar  *rtmp,*v,*pc,multiplier,*pv,*rtmps;
4350cf777b8SBarry Smith   PetscReal    damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
4360cf777b8SBarry Smith   PetscReal    row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
4370cf777b8SBarry Smith   PetscTruth   damp,lushift;
438289bc588SBarry Smith 
4393a40ed3dSBarry Smith   PetscFunctionBegin;
44078b31e54SBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
44178b31e54SBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
44287828ca2SBarry Smith   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
44387828ca2SBarry Smith   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
444010a20caSHong Zhang   rtmps = rtmp; ics = ic;
445289bc588SBarry Smith 
4466cc28720Svictorle   if (!a->diag) {
4470cf777b8SBarry Smith     ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr);
4480cf777b8SBarry Smith   }
4490cf777b8SBarry Smith 
4500cf777b8SBarry Smith   if (b->lu_shift) { /* set max shift */
4510cf777b8SBarry Smith     int *aai = a->i,*ddiag = a->diag;
4520cf777b8SBarry Smith     shift_top = 0;
4536cc28720Svictorle     for (i=0; i<n; i++) {
454010a20caSHong Zhang       d = PetscAbsScalar((a->a)[ddiag[i]]);
4556cc28720Svictorle       /* calculate amt of shift needed for this row */
4566c037d1bSvictorle       if (d<=0) {
4573aeef889SHong Zhang         row_shift = 0;
4583aeef889SHong Zhang       } else {
4593aeef889SHong Zhang         row_shift = -2*d;
4603aeef889SHong Zhang       }
461010a20caSHong Zhang       v = a->a+aai[i];
4628a2e2d9cSvictorle       for (j=0; j<aai[i+1]-aai[i]; j++)
4636cc28720Svictorle 	row_shift += PetscAbsScalar(v[j]);
4646cc28720Svictorle       if (row_shift>shift_top) shift_top = row_shift;
4656cc28720Svictorle     }
4666cc28720Svictorle   }
4676cc28720Svictorle 
4686cc28720Svictorle   shift_fraction = 0; shift_amount = 0;
469085256b3SBarry Smith   do {
4708a5eb818SBarry Smith     damp    = PETSC_FALSE;
4716cc28720Svictorle     lushift = PETSC_FALSE;
472289bc588SBarry Smith     for (i=0; i<n; i++) {
473289bc588SBarry Smith       nz    = ai[i+1] - ai[i];
474010a20caSHong Zhang       ajtmp = aj + ai[i];
47548e94559SBarry Smith       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
476289bc588SBarry Smith 
477289bc588SBarry Smith       /* load in initial (unfactored row) */
478416022c9SBarry Smith       nz       = a->i[r[i]+1] - a->i[r[i]];
479010a20caSHong Zhang       ajtmpold = a->j + a->i[r[i]];
480010a20caSHong Zhang       v        = a->a + a->i[r[i]];
481085256b3SBarry Smith       for (j=0; j<nz; j++) {
482085256b3SBarry Smith         rtmp[ics[ajtmpold[j]]] = v[j];
483085256b3SBarry Smith       }
484e2dd4fc4Svictorle       rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
485289bc588SBarry Smith 
486010a20caSHong Zhang       row = *ajtmp++ ;
487f2582111SSatish Balay       while  (row < i) {
4888c37ef55SBarry Smith         pc = rtmp + row;
4898c37ef55SBarry Smith         if (*pc != 0.0) {
490010a20caSHong Zhang           pv         = b->a + diag_offset[row] ;
491010a20caSHong Zhang           pj         = b->j + diag_offset[row] + 1;
49235aab85fSBarry Smith           multiplier = *pc / *pv++;
4938c37ef55SBarry Smith           *pc        = multiplier;
494f2582111SSatish Balay           nz         = ai[row+1] - diag_offset[row] - 1;
495f2582111SSatish Balay           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
496b0a32e0cSBarry Smith           PetscLogFlops(2*nz);
497289bc588SBarry Smith         }
498010a20caSHong Zhang         row = *ajtmp++;
499289bc588SBarry Smith       }
500416022c9SBarry Smith       /* finished row so stick it into b->a */
501010a20caSHong Zhang       pv   = b->a + ai[i] ;
502010a20caSHong Zhang       pj   = b->j + ai[i] ;
5038c37ef55SBarry Smith       nz   = ai[i+1] - ai[i];
50435aab85fSBarry Smith       diag = diag_offset[i] - ai[i];
505d3d32019SBarry Smith       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
506d3d32019SBarry Smith       rs   = 0.0;
507d3d32019SBarry Smith       for (j=0; j<nz; j++) {
508d3d32019SBarry Smith         pv[j] = rtmps[pj[j]];
509d3d32019SBarry Smith         if (j != diag) rs += PetscAbsScalar(pv[j]);
510d3d32019SBarry Smith       }
5116cc28720Svictorle #define MAX_NSHIFT 5
512*b2da817dSvictorle       if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
5136cc28720Svictorle 	if (nshift>MAX_NSHIFT) {
5140cf777b8SBarry Smith 	  SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner");
5156cc28720Svictorle 	} else if (nshift==MAX_NSHIFT) {
5166cc28720Svictorle 	  shift_fraction = shift_hi;
5176c037d1bSvictorle 	  lushift      = PETSC_FALSE;
5186cc28720Svictorle 	} else {
5196cc28720Svictorle 	  shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
5206c037d1bSvictorle 	  lushift      = PETSC_TRUE;
5216cc28720Svictorle 	}
5226cc28720Svictorle 	shift_amount = shift_fraction * shift_top;
5230cf777b8SBarry Smith         nshift++;
5240cf777b8SBarry Smith         break;
5256cc28720Svictorle       }
526d3d32019SBarry Smith       if (PetscAbsScalar(pv[diag]) < zeropivot*rs) {
527d3d32019SBarry Smith         if (damping) {
5288a5eb818SBarry Smith           if (ndamp) damping *= 2.0;
529085256b3SBarry Smith           damp = PETSC_TRUE;
530085256b3SBarry Smith           ndamp++;
531085256b3SBarry Smith           break;
532085256b3SBarry Smith         } else {
53363bb2aa6SBarry Smith           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
534d708749eSBarry Smith         }
53535aab85fSBarry Smith       }
53635aab85fSBarry Smith     }
5376cc28720Svictorle     if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
5386cc28720Svictorle       /*
5396c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
5406cc28720Svictorle        * then try lower shift
5416cc28720Svictorle        */
5420cf777b8SBarry Smith       shift_hi       = shift_fraction;
5430cf777b8SBarry Smith       shift_fraction = (shift_hi+shift_lo)/2.;
5446cc28720Svictorle       shift_amount   = shift_fraction * shift_top;
5450cf777b8SBarry Smith       lushift        = PETSC_TRUE;
5460cf777b8SBarry Smith       nshift++;
5476cc28720Svictorle     }
5486cc28720Svictorle   } while (damp || lushift);
5490f11f9aeSSatish Balay 
55035aab85fSBarry Smith   /* invert diagonal entries for simplier triangular solves */
55135aab85fSBarry Smith   for (i=0; i<n; i++) {
552010a20caSHong Zhang     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
55335aab85fSBarry Smith   }
55435aab85fSBarry Smith 
555606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
55678b31e54SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
55778b31e54SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
558416022c9SBarry Smith   C->factor = FACTOR_LU;
5597dda7e10SBarry Smith   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
560c456f294SBarry Smith   C->assembled = PETSC_TRUE;
561b0a32e0cSBarry Smith   PetscLogFlops(C->n);
562d3d32019SBarry Smith   if (ndamp) {
563b0a32e0cSBarry Smith     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping);
564085256b3SBarry Smith   }
5656cc28720Svictorle   if (nshift) {
5666cc28720Svictorle     b->lu_shift_fraction = shift_fraction;
567fb10cecfSBarry Smith     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift);
5686cc28720Svictorle   }
5693a40ed3dSBarry Smith   PetscFunctionReturn(0);
570289bc588SBarry Smith }
5710889b5dcSvictorle 
5720889b5dcSvictorle #undef __FUNCT__
5730889b5dcSvictorle #define __FUNCT__ "MatUsePETSc_SeqAIJ"
5740889b5dcSvictorle int MatUsePETSc_SeqAIJ(Mat A)
5750889b5dcSvictorle {
5760889b5dcSvictorle   PetscFunctionBegin;
5770889b5dcSvictorle   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
5780889b5dcSvictorle   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
5790889b5dcSvictorle   PetscFunctionReturn(0);
5800889b5dcSvictorle }
5810889b5dcSvictorle 
5820889b5dcSvictorle 
5830a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
5844a2ae208SSatish Balay #undef __FUNCT__
5854a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqAIJ"
586b380c88cSHong Zhang int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
587da3a660dSBarry Smith {
588273d9f13SBarry Smith   int ierr;
589416022c9SBarry Smith   Mat C;
590416022c9SBarry Smith 
5913a40ed3dSBarry Smith   PetscFunctionBegin;
5929e046878SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
593f2582111SSatish Balay   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
594273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
595440f4c53SSatish Balay   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
5963a40ed3dSBarry Smith   PetscFunctionReturn(0);
597da3a660dSBarry Smith }
5980a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
5994a2ae208SSatish Balay #undef __FUNCT__
6004a2ae208SSatish Balay #define __FUNCT__ "MatSolve_SeqAIJ"
601416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
6028c37ef55SBarry Smith {
603416022c9SBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
604416022c9SBarry Smith   IS           iscol = a->col,isrow = a->row;
605273d9f13SBarry Smith   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
606010a20caSHong Zhang   int          nz,*rout,*cout;
60787828ca2SBarry Smith   PetscScalar  *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
6088c37ef55SBarry Smith 
6093a40ed3dSBarry Smith   PetscFunctionBegin;
6103a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
61191bf9adeSBarry Smith 
612b1d4fb26SBarry Smith   ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
613b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
614416022c9SBarry Smith   tmp  = a->solve_work;
6158c37ef55SBarry Smith 
6163b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6173b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6188c37ef55SBarry Smith 
6198c37ef55SBarry Smith   /* forward solve the lower triangular */
6208c37ef55SBarry Smith   tmp[0] = b[*r++];
621010a20caSHong Zhang   tmps   = tmp;
6228c37ef55SBarry Smith   for (i=1; i<n; i++) {
623010a20caSHong Zhang     v   = aa + ai[i] ;
624010a20caSHong Zhang     vi  = aj + ai[i] ;
62553e7425aSBarry Smith     nz  = a->diag[i] - ai[i];
62653e7425aSBarry Smith     sum = b[*r++];
627ed480e8bSBarry Smith     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
6288c37ef55SBarry Smith     tmp[i] = sum;
6298c37ef55SBarry Smith   }
6308c37ef55SBarry Smith 
6318c37ef55SBarry Smith   /* backward solve the upper triangular */
6328c37ef55SBarry Smith   for (i=n-1; i>=0; i--){
633010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
634010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
635416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
6368c37ef55SBarry Smith     sum = tmp[i];
637ed480e8bSBarry Smith     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
638010a20caSHong Zhang     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
6398c37ef55SBarry Smith   }
6408c37ef55SBarry Smith 
6413b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
6423b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
643b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
644b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
645b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz - A->n);
6463a40ed3dSBarry Smith   PetscFunctionReturn(0);
6478c37ef55SBarry Smith }
648026e39d0SSatish Balay 
649930ae53cSBarry Smith /* ----------------------------------------------------------- */
6504a2ae208SSatish Balay #undef __FUNCT__
6514a2ae208SSatish Balay #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
652930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
653930ae53cSBarry Smith {
654930ae53cSBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
655273d9f13SBarry Smith   int          n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
656362ced78SSatish Balay   PetscScalar  *x,*b,*aa = a->a;
657aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
658d85afc42SSatish Balay   int          adiag_i,i,*vi,nz,ai_i;
659362ced78SSatish Balay   PetscScalar  *v,sum;
660d85afc42SSatish Balay #endif
661930ae53cSBarry Smith 
6623a40ed3dSBarry Smith   PetscFunctionBegin;
6633a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
664930ae53cSBarry Smith 
665b1d4fb26SBarry Smith   ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
666b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
667930ae53cSBarry Smith 
668aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
6691c4feecaSSatish Balay   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
6701c4feecaSSatish Balay #else
671930ae53cSBarry Smith   /* forward solve the lower triangular */
672930ae53cSBarry Smith   x[0] = b[0];
673930ae53cSBarry Smith   for (i=1; i<n; i++) {
674930ae53cSBarry Smith     ai_i = ai[i];
675930ae53cSBarry Smith     v    = aa + ai_i;
676930ae53cSBarry Smith     vi   = aj + ai_i;
677930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
678930ae53cSBarry Smith     sum  = b[i];
679930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
680930ae53cSBarry Smith     x[i] = sum;
681930ae53cSBarry Smith   }
682930ae53cSBarry Smith 
683930ae53cSBarry Smith   /* backward solve the upper triangular */
684930ae53cSBarry Smith   for (i=n-1; i>=0; i--){
685930ae53cSBarry Smith     adiag_i = adiag[i];
686d708749eSBarry Smith     v       = aa + adiag_i + 1;
687d708749eSBarry Smith     vi      = aj + adiag_i + 1;
688930ae53cSBarry Smith     nz      = ai[i+1] - adiag_i - 1;
689930ae53cSBarry Smith     sum     = x[i];
690930ae53cSBarry Smith     while (nz--) sum -= *v++ * x[*vi++];
691930ae53cSBarry Smith     x[i]    = sum*aa[adiag_i];
692930ae53cSBarry Smith   }
6931c4feecaSSatish Balay #endif
694b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz - A->n);
695b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
696b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
6973a40ed3dSBarry Smith   PetscFunctionReturn(0);
698930ae53cSBarry Smith }
699930ae53cSBarry Smith 
7004a2ae208SSatish Balay #undef __FUNCT__
7014a2ae208SSatish Balay #define __FUNCT__ "MatSolveAdd_SeqAIJ"
702416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
703da3a660dSBarry Smith {
704416022c9SBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
705416022c9SBarry Smith   IS           iscol = a->col,isrow = a->row;
706273d9f13SBarry Smith   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
707010a20caSHong Zhang   int          nz,*rout,*cout;
70887828ca2SBarry Smith   PetscScalar  *x,*b,*tmp,*aa = a->a,sum,*v;
709da3a660dSBarry Smith 
7103a40ed3dSBarry Smith   PetscFunctionBegin;
71178b31e54SBarry Smith   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
712da3a660dSBarry Smith 
713b1d4fb26SBarry Smith   ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
714b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
715416022c9SBarry Smith   tmp  = a->solve_work;
716da3a660dSBarry Smith 
7173b2fbd54SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7183b2fbd54SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
719da3a660dSBarry Smith 
720da3a660dSBarry Smith   /* forward solve the lower triangular */
721da3a660dSBarry Smith   tmp[0] = b[*r++];
722da3a660dSBarry Smith   for (i=1; i<n; i++) {
723010a20caSHong Zhang     v   = aa + ai[i] ;
724010a20caSHong Zhang     vi  = aj + ai[i] ;
725416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
726da3a660dSBarry Smith     sum = b[*r++];
727010a20caSHong Zhang     while (nz--) sum -= *v++ * tmp[*vi++ ];
728da3a660dSBarry Smith     tmp[i] = sum;
729da3a660dSBarry Smith   }
730da3a660dSBarry Smith 
731da3a660dSBarry Smith   /* backward solve the upper triangular */
732da3a660dSBarry Smith   for (i=n-1; i>=0; i--){
733010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
734010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
735416022c9SBarry Smith     nz  = ai[i+1] - a->diag[i] - 1;
736da3a660dSBarry Smith     sum = tmp[i];
737010a20caSHong Zhang     while (nz--) sum -= *v++ * tmp[*vi++ ];
738010a20caSHong Zhang     tmp[i] = sum*aa[a->diag[i]];
739da3a660dSBarry Smith     x[*c--] += tmp[i];
740da3a660dSBarry Smith   }
741da3a660dSBarry Smith 
7423b2fbd54SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
7433b2fbd54SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
744b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
745b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
746b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz);
747898183ecSLois Curfman McInnes 
7483a40ed3dSBarry Smith   PetscFunctionReturn(0);
749da3a660dSBarry Smith }
750da3a660dSBarry Smith /* -------------------------------------------------------------------*/
7514a2ae208SSatish Balay #undef __FUNCT__
7524a2ae208SSatish Balay #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
7537c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
754da3a660dSBarry Smith {
755416022c9SBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
7562235e667SBarry Smith   IS           iscol = a->col,isrow = a->row;
757273d9f13SBarry Smith   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
758010a20caSHong Zhang   int          nz,*rout,*cout,*diag = a->diag;
75987828ca2SBarry Smith   PetscScalar  *x,*b,*tmp,*aa = a->a,*v,s1;
760da3a660dSBarry Smith 
7613a40ed3dSBarry Smith   PetscFunctionBegin;
762b1d4fb26SBarry Smith   ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
763b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
764416022c9SBarry Smith   tmp  = a->solve_work;
765da3a660dSBarry Smith 
7662235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7672235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
768da3a660dSBarry Smith 
769da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
7702235e667SBarry Smith   for (i=0; i<n; i++) tmp[i] = b[c[i]];
771da3a660dSBarry Smith 
772da3a660dSBarry Smith   /* forward solve the U^T */
773da3a660dSBarry Smith   for (i=0; i<n; i++) {
774010a20caSHong Zhang     v   = aa + diag[i] ;
775010a20caSHong Zhang     vi  = aj + diag[i] + 1;
776f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
777c38d4ed2SBarry Smith     s1  = tmp[i];
778ef66eb69SBarry Smith     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
779da3a660dSBarry Smith     while (nz--) {
780010a20caSHong Zhang       tmp[*vi++ ] -= (*v++)*s1;
781da3a660dSBarry Smith     }
782c38d4ed2SBarry Smith     tmp[i] = s1;
783da3a660dSBarry Smith   }
784da3a660dSBarry Smith 
785da3a660dSBarry Smith   /* backward solve the L^T */
786da3a660dSBarry Smith   for (i=n-1; i>=0; i--){
787010a20caSHong Zhang     v   = aa + diag[i] - 1 ;
788010a20caSHong Zhang     vi  = aj + diag[i] - 1 ;
789f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
790f1af5d2fSBarry Smith     s1  = tmp[i];
791da3a660dSBarry Smith     while (nz--) {
792010a20caSHong Zhang       tmp[*vi-- ] -= (*v--)*s1;
793da3a660dSBarry Smith     }
794da3a660dSBarry Smith   }
795da3a660dSBarry Smith 
796da3a660dSBarry Smith   /* copy tmp into x according to permutation */
797da3a660dSBarry Smith   for (i=0; i<n; i++) x[r[i]] = tmp[i];
798da3a660dSBarry Smith 
7992235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8002235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
801b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
802b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
803da3a660dSBarry Smith 
804b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz-A->n);
8053a40ed3dSBarry Smith   PetscFunctionReturn(0);
806da3a660dSBarry Smith }
807da3a660dSBarry Smith 
8084a2ae208SSatish Balay #undef __FUNCT__
8094a2ae208SSatish Balay #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
8107c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
811da3a660dSBarry Smith {
812416022c9SBarry Smith   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
8132235e667SBarry Smith   IS           iscol = a->col,isrow = a->row;
814273d9f13SBarry Smith   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
815010a20caSHong Zhang   int          nz,*rout,*cout,*diag = a->diag;
81687828ca2SBarry Smith   PetscScalar  *x,*b,*tmp,*aa = a->a,*v;
8176abc6512SBarry Smith 
8183a40ed3dSBarry Smith   PetscFunctionBegin;
8196abc6512SBarry Smith   if (zz != xx) VecCopy(zz,xx);
8206abc6512SBarry Smith 
821b1d4fb26SBarry Smith   ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
822b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
823416022c9SBarry Smith   tmp = a->solve_work;
8246abc6512SBarry Smith 
8252235e667SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
8262235e667SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
8276abc6512SBarry Smith 
8286abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
8292235e667SBarry Smith   for (i=0; i<n; i++) tmp[i] = b[c[i]];
8306abc6512SBarry Smith 
8316abc6512SBarry Smith   /* forward solve the U^T */
8326abc6512SBarry Smith   for (i=0; i<n; i++) {
833010a20caSHong Zhang     v   = aa + diag[i] ;
834010a20caSHong Zhang     vi  = aj + diag[i] + 1;
835f1af5d2fSBarry Smith     nz  = ai[i+1] - diag[i] - 1;
8366abc6512SBarry Smith     tmp[i] *= *v++;
8376abc6512SBarry Smith     while (nz--) {
838010a20caSHong Zhang       tmp[*vi++ ] -= (*v++)*tmp[i];
8396abc6512SBarry Smith     }
8406abc6512SBarry Smith   }
8416abc6512SBarry Smith 
8426abc6512SBarry Smith   /* backward solve the L^T */
8436abc6512SBarry Smith   for (i=n-1; i>=0; i--){
844010a20caSHong Zhang     v   = aa + diag[i] - 1 ;
845010a20caSHong Zhang     vi  = aj + diag[i] - 1 ;
846f1af5d2fSBarry Smith     nz  = diag[i] - ai[i];
8476abc6512SBarry Smith     while (nz--) {
848010a20caSHong Zhang       tmp[*vi-- ] -= (*v--)*tmp[i];
8496abc6512SBarry Smith     }
8506abc6512SBarry Smith   }
8516abc6512SBarry Smith 
8526abc6512SBarry Smith   /* copy tmp into x according to permutation */
8536abc6512SBarry Smith   for (i=0; i<n; i++) x[r[i]] += tmp[i];
8546abc6512SBarry Smith 
8552235e667SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8562235e667SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
857b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
858b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
8596abc6512SBarry Smith 
860b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz);
8613a40ed3dSBarry Smith   PetscFunctionReturn(0);
862da3a660dSBarry Smith }
8639e25ed09SBarry Smith /* ----------------------------------------------------------------*/
864ca44d042SBarry Smith EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
86508480c60SBarry Smith 
8664a2ae208SSatish Balay #undef __FUNCT__
8674a2ae208SSatish Balay #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
868b380c88cSHong Zhang int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
8699e25ed09SBarry Smith {
870416022c9SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
8719e25ed09SBarry Smith   IS         isicol;
872273d9f13SBarry Smith   int        *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
87356beaf04SBarry Smith   int        *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
874335d9088SBarry Smith   int        *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
875010a20caSHong Zhang   int        incrlev,nnz,i,levels,diagonal_fill;
87677c4ece6SBarry Smith   PetscTruth col_identity,row_identity;
877329f5518SBarry Smith   PetscReal  f;
8789e25ed09SBarry Smith 
8793a40ed3dSBarry Smith   PetscFunctionBegin;
8806cf997b0SBarry Smith   f             = info->fill;
8810cd17293SBarry Smith   levels        = (int)info->levels;
8820cd17293SBarry Smith   diagonal_fill = (int)info->diagonal_fill;
8834c49b128SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
88482bf6240SBarry Smith 
88508480c60SBarry Smith   /* special case that simply copies fill pattern */
886be0abb6dSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
887be0abb6dSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
88886bacbd2SBarry Smith   if (!levels && row_identity && col_identity) {
8892e8a6d31SBarry Smith     ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
89008480c60SBarry Smith     (*fact)->factor = FACTOR_LU;
89108480c60SBarry Smith     b               = (Mat_SeqAIJ*)(*fact)->data;
89208480c60SBarry Smith     if (!b->diag) {
8937c922b88SBarry Smith       ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
89408480c60SBarry Smith     }
8957c922b88SBarry Smith     ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr);
89608480c60SBarry Smith     b->row              = isrow;
89708480c60SBarry Smith     b->col              = iscol;
89882bf6240SBarry Smith     b->icol             = isicol;
899a2e34c3dSBarry Smith     b->lu_damping       = info->damping;
90087828ca2SBarry Smith     b->lu_zeropivot     = info->zeropivot;
9012cea7109SBarry Smith     b->lu_shift         = info->shift;
9022cea7109SBarry Smith     b->lu_shift_fraction= info->shift_fraction;
90387828ca2SBarry Smith     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
904f830108cSBarry Smith     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
905c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
906c38d4ed2SBarry Smith     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
9073a40ed3dSBarry Smith     PetscFunctionReturn(0);
90808480c60SBarry Smith   }
90908480c60SBarry Smith 
910898183ecSLois Curfman McInnes   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
911898183ecSLois Curfman McInnes   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
9129e25ed09SBarry Smith 
9139e25ed09SBarry Smith   /* get new row pointers */
914b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr);
915010a20caSHong Zhang   ainew[0] = 0;
9169e25ed09SBarry Smith   /* don't know how many column pointers are needed so estimate */
917010a20caSHong Zhang   jmax = (int)(f*(ai[n]+1));
918b0a32e0cSBarry Smith   ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr);
9199e25ed09SBarry Smith   /* ajfill is level of fill for each fill entry */
920b0a32e0cSBarry Smith   ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr);
9219e25ed09SBarry Smith   /* fill is a linked list of nonzeros in active row */
922b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr);
92356beaf04SBarry Smith   /* im is level for each filled value */
924b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr);
92556beaf04SBarry Smith   /* dloc is location of diagonal in factor */
926b0a32e0cSBarry Smith   ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr);
92756beaf04SBarry Smith   dloc[0]  = 0;
92856beaf04SBarry Smith   for (prow=0; prow<n; prow++) {
9296cf997b0SBarry Smith 
9306cf997b0SBarry Smith     /* copy current row into linked list */
93156beaf04SBarry Smith     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
93229bbc08cSBarry Smith     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
933010a20caSHong Zhang     xi      = aj + ai[r[prow]] ;
9349e25ed09SBarry Smith     fill[n]    = n;
935435faa5fSBarry Smith     fill[prow] = -1; /* marker to indicate if diagonal exists */
9369e25ed09SBarry Smith     while (nz--) {
9379e25ed09SBarry Smith       fm  = n;
938010a20caSHong Zhang       idx = ic[*xi++ ];
9399e25ed09SBarry Smith       do {
9409e25ed09SBarry Smith         m  = fm;
9419e25ed09SBarry Smith         fm = fill[m];
9429e25ed09SBarry Smith       } while (fm < idx);
9439e25ed09SBarry Smith       fill[m]   = idx;
9449e25ed09SBarry Smith       fill[idx] = fm;
94556beaf04SBarry Smith       im[idx]   = 0;
9469e25ed09SBarry Smith     }
9476cf997b0SBarry Smith 
9486cf997b0SBarry Smith     /* make sure diagonal entry is included */
949435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
9506cf997b0SBarry Smith       fm = n;
951435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
952435faa5fSBarry Smith       fill[prow] = fill[fm]; /* insert diagonal into linked list */
953435faa5fSBarry Smith       fill[fm]   = prow;
9546cf997b0SBarry Smith       im[prow]   = 0;
9556cf997b0SBarry Smith       nzf++;
956335d9088SBarry Smith       dcount++;
9576cf997b0SBarry Smith     }
9586cf997b0SBarry Smith 
95956beaf04SBarry Smith     nzi = 0;
9609e25ed09SBarry Smith     row = fill[n];
96156beaf04SBarry Smith     while (row < prow) {
96256beaf04SBarry Smith       incrlev = im[row] + 1;
96356beaf04SBarry Smith       nz      = dloc[row];
964010a20caSHong Zhang       xi      = ajnew  + ainew[row]  + nz + 1;
965010a20caSHong Zhang       flev    = ajfill + ainew[row]  + nz + 1;
966416022c9SBarry Smith       nnz     = ainew[row+1] - ainew[row] - nz - 1;
96756beaf04SBarry Smith       fm      = row;
96856beaf04SBarry Smith       while (nnz-- > 0) {
969010a20caSHong Zhang         idx = *xi++ ;
97056beaf04SBarry Smith         if (*flev + incrlev > levels) {
97156beaf04SBarry Smith           flev++;
97256beaf04SBarry Smith           continue;
97356beaf04SBarry Smith         }
97456beaf04SBarry Smith         do {
9759e25ed09SBarry Smith           m  = fm;
9769e25ed09SBarry Smith           fm = fill[m];
97756beaf04SBarry Smith         } while (fm < idx);
9789e25ed09SBarry Smith         if (fm != idx) {
97956beaf04SBarry Smith           im[idx]   = *flev + incrlev;
9809e25ed09SBarry Smith           fill[m]   = idx;
9819e25ed09SBarry Smith           fill[idx] = fm;
9829e25ed09SBarry Smith           fm        = idx;
98356beaf04SBarry Smith           nzf++;
984ecf371e4SBarry Smith         } else {
98556beaf04SBarry Smith           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
9869e25ed09SBarry Smith         }
98756beaf04SBarry Smith         flev++;
9889e25ed09SBarry Smith       }
9899e25ed09SBarry Smith       row = fill[row];
99056beaf04SBarry Smith       nzi++;
9919e25ed09SBarry Smith     }
9929e25ed09SBarry Smith     /* copy new filled row into permanent storage */
99356beaf04SBarry Smith     ainew[prow+1] = ainew[prow] + nzf;
994010a20caSHong Zhang     if (ainew[prow+1] > jmax) {
995ecf371e4SBarry Smith 
996ecf371e4SBarry Smith       /* estimate how much additional space we will need */
997ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
998ecf371e4SBarry Smith       /* just double the memory each time */
999ecf371e4SBarry Smith       /*  maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1000ecf371e4SBarry Smith       int maxadd = jmax;
100156beaf04SBarry Smith       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1002bbb6d6a8SBarry Smith       jmax += maxadd;
1003ecf371e4SBarry Smith 
1004ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
1005b0a32e0cSBarry Smith       ierr   = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr);
1006010a20caSHong Zhang       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr);
1007606d414cSSatish Balay       ierr   = PetscFree(ajnew);CHKERRQ(ierr);
100856beaf04SBarry Smith       ajnew  = xi;
1009b0a32e0cSBarry Smith       ierr   = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr);
1010010a20caSHong Zhang       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr);
1011606d414cSSatish Balay       ierr   = PetscFree(ajfill);CHKERRQ(ierr);
101256beaf04SBarry Smith       ajfill = xi;
1013ecf371e4SBarry Smith       realloc++; /* count how many times we realloc */
10149e25ed09SBarry Smith     }
1015010a20caSHong Zhang     xi          = ajnew + ainew[prow] ;
1016010a20caSHong Zhang     flev        = ajfill + ainew[prow] ;
101756beaf04SBarry Smith     dloc[prow]  = nzi;
10189e25ed09SBarry Smith     fm          = fill[n];
101956beaf04SBarry Smith     while (nzf--) {
1020010a20caSHong Zhang       *xi++   = fm ;
102156beaf04SBarry Smith       *flev++ = im[fm];
10229e25ed09SBarry Smith       fm      = fill[fm];
10239e25ed09SBarry Smith     }
10246cf997b0SBarry Smith     /* make sure row has diagonal entry */
1025010a20caSHong Zhang     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
102629bbc08cSBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\
10276cf997b0SBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
10286cf997b0SBarry Smith     }
10299e25ed09SBarry Smith   }
1030606d414cSSatish Balay   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1031898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1032898183ecSLois Curfman McInnes   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1033606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
1034606d414cSSatish Balay   ierr = PetscFree(im);CHKERRQ(ierr);
10359e25ed09SBarry Smith 
1036f64065bbSBarry Smith   {
1037329f5518SBarry Smith     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1038b0a32e0cSBarry Smith     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1039c0d6ac57SBarry Smith     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1040c0d6ac57SBarry Smith     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1041b0a32e0cSBarry Smith     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1042335d9088SBarry Smith     if (diagonal_fill) {
1043b1bcba4aSBarry Smith       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
1044335d9088SBarry Smith     }
1045f64065bbSBarry Smith   }
1046bbb6d6a8SBarry Smith 
10479e25ed09SBarry Smith   /* put together the new matrix */
1048b4fd4287SBarry Smith   ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1049b0a32e0cSBarry Smith   PetscLogObjectParent(*fact,isicol);
1050416022c9SBarry Smith   b = (Mat_SeqAIJ*)(*fact)->data;
1051606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
10527c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
1053010a20caSHong Zhang   len = (ainew[n] )*sizeof(PetscScalar);
10549e25ed09SBarry Smith   /* the next line frees the default space generated by the Create() */
1055606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
1056606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1057b0a32e0cSBarry Smith   ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr);
1058416022c9SBarry Smith   b->j          = ajnew;
1059416022c9SBarry Smith   b->i          = ainew;
106056beaf04SBarry Smith   for (i=0; i<n; i++) dloc[i] += ainew[i];
1061416022c9SBarry Smith   b->diag       = dloc;
1062416022c9SBarry Smith   b->ilen       = 0;
1063416022c9SBarry Smith   b->imax       = 0;
1064416022c9SBarry Smith   b->row        = isrow;
1065416022c9SBarry Smith   b->col        = iscol;
1066c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1067c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
106882bf6240SBarry Smith   b->icol       = isicol;
106987828ca2SBarry Smith   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1070416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
107156beaf04SBarry Smith      Allocate dloc, solve_work, new a, new j */
1072010a20caSHong Zhang   PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar)));
1073010a20caSHong Zhang   b->maxnz             = b->nz = ainew[n] ;
1074a2e34c3dSBarry Smith   b->lu_damping        = info->damping;
10752cea7109SBarry Smith   b->lu_shift          = info->shift;
10762cea7109SBarry Smith   b->lu_shift_fraction = info->shift_fraction;
107787828ca2SBarry Smith   b->lu_zeropivot = info->zeropivot;
10789e25ed09SBarry Smith   (*fact)->factor = FACTOR_LU;
10793a7fca6bSBarry Smith   ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr);
10803a7fca6bSBarry Smith   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1081ae068f58SLois Curfman McInnes 
1082ae068f58SLois Curfman McInnes   (*fact)->info.factor_mallocs    = realloc;
1083ae068f58SLois Curfman McInnes   (*fact)->info.fill_ratio_given  = f;
1084329f5518SBarry Smith   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
10853a40ed3dSBarry Smith   PetscFunctionReturn(0);
10869e25ed09SBarry Smith }
1087d5d45c9bSBarry Smith 
1088a6175056SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h"
1089a6175056SHong Zhang #undef __FUNCT__
1090a6175056SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
1091a6175056SHong Zhang int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1092a6175056SHong Zhang {
10930968510aSHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1094a6175056SHong Zhang   int                 ierr;
1095d5d45c9bSBarry Smith 
1096a6175056SHong Zhang   PetscFunctionBegin;
10970968510aSHong Zhang   if (!a->sbaijMat){
10980968510aSHong Zhang     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1099a6175056SHong Zhang   }
110003aac1b8SHong Zhang 
1101b45a75daSHong Zhang   ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr);
11020968510aSHong Zhang   ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
1103064503c5SHong Zhang   a->sbaijMat = PETSC_NULL;
1104653a6975SHong Zhang 
1105a6175056SHong Zhang   PetscFunctionReturn(0);
1106a6175056SHong Zhang }
1107a6175056SHong Zhang 
1108a6175056SHong Zhang #undef __FUNCT__
1109a6175056SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
111015e8a5b3SHong Zhang int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1111a6175056SHong Zhang {
11120968510aSHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
111303aac1b8SHong Zhang   int                 ierr;
1114653a6975SHong Zhang   PetscTruth          perm_identity;
1115a6175056SHong Zhang 
1116a6175056SHong Zhang   PetscFunctionBegin;
1117653a6975SHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1118653a6975SHong Zhang   if (!perm_identity){
1119653a6975SHong Zhang     SETERRQ(1,"Non-identity permutation is not supported yet");
1120653a6975SHong Zhang   }
11210968510aSHong Zhang   if (!a->sbaijMat){
11220968510aSHong Zhang     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
11230968510aSHong Zhang   }
1124a6175056SHong Zhang 
1125b00f7748SHong Zhang   ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1126653a6975SHong Zhang   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1127653a6975SHong Zhang 
1128a6175056SHong Zhang   PetscFunctionReturn(0);
1129a6175056SHong Zhang }
1130d5d45c9bSBarry Smith 
1131f76d2b81SHong Zhang #undef __FUNCT__
1132f76d2b81SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
1133f76d2b81SHong Zhang int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1134f76d2b81SHong Zhang {
1135f76d2b81SHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
113603aac1b8SHong Zhang   int                 ierr;
1137f76d2b81SHong Zhang   PetscTruth          perm_identity;
1138f76d2b81SHong Zhang 
1139f76d2b81SHong Zhang   PetscFunctionBegin;
1140f76d2b81SHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1141f76d2b81SHong Zhang   if (!perm_identity){
1142f76d2b81SHong Zhang     SETERRQ(1,"Non-identity permutation is not supported yet");
1143f76d2b81SHong Zhang   }
1144f76d2b81SHong Zhang   if (!a->sbaijMat){
1145f76d2b81SHong Zhang     ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
1146f76d2b81SHong Zhang   }
1147f76d2b81SHong Zhang 
1148f76d2b81SHong Zhang   ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
1149f76d2b81SHong Zhang   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1150f76d2b81SHong Zhang 
1151f76d2b81SHong Zhang   PetscFunctionReturn(0);
1152f76d2b81SHong Zhang }
1153