1*b19713cbSBarry Smith /*$Id: aijfact.c,v 1.130 1999/12/16 15:57:45 bsmith Exp bsmith $*/ 2289bc588SBarry Smith 370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h" 490f02eecSBarry Smith #include "src/vec/vecimpl.h" 51c4feecaSSatish Balay #include "src/inline/dot.h" 63b2fbd54SBarry Smith 75615d1e5SSatish Balay #undef __FUNC__ 891e9ee9fSBarry Smith #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 991e9ee9fSBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 103b2fbd54SBarry Smith { 113a40ed3dSBarry Smith PetscFunctionBegin; 123a40ed3dSBarry Smith 13e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG) 15d64ed03dSBarry Smith PetscFunctionReturn(0); 16d64ed03dSBarry Smith #endif 173b2fbd54SBarry Smith } 183b2fbd54SBarry Smith 1986bacbd2SBarry Smith 2086bacbd2SBarry Smith extern int MatMarkDiagonal_SeqAIJ(Mat); 2186bacbd2SBarry Smith extern int Mat_AIJ_CheckInode(Mat); 2286bacbd2SBarry Smith 2386bacbd2SBarry Smith #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 2486bacbd2SBarry Smith 2586bacbd2SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 2686bacbd2SBarry Smith #define dperm_ DPERM 2786bacbd2SBarry Smith #define ilutp_ ILUTP 2886bacbd2SBarry Smith #define ilu0_ ILU0 2986bacbd2SBarry Smith #define msrcsr_ MSRCSR 3086bacbd2SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3186bacbd2SBarry Smith #define dperm_ dperm 3286bacbd2SBarry Smith #define ilutp_ ilutp 3386bacbd2SBarry Smith #define ilu0_ ilu0 3486bacbd2SBarry Smith #define msrcsr_ msrcsr 3586bacbd2SBarry Smith #endif 3686bacbd2SBarry Smith 3786bacbd2SBarry Smith EXTERN_C_BEGIN 3886bacbd2SBarry Smith extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 3986bacbd2SBarry Smith extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 4086bacbd2SBarry Smith extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 4186bacbd2SBarry Smith extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 4286bacbd2SBarry Smith EXTERN_C_END 4386bacbd2SBarry Smith 4486bacbd2SBarry Smith #undef __FUNC__ 4586bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ" 4686bacbd2SBarry Smith /* ------------------------------------------------------------ 4786bacbd2SBarry Smith 4886bacbd2SBarry Smith This interface was contribed by Tony Caola 4986bacbd2SBarry Smith 5086bacbd2SBarry Smith This routine is an interface to the pivoting drop-tolerance 5186bacbd2SBarry Smith ILU routine written by Yousef Saad (saad@cs.umn.edu). It 5286bacbd2SBarry Smith was inspired by the non-pivoting iludt written by David 5386bacbd2SBarry Smith Hysom (hysom@cs.odu.edu). 5486bacbd2SBarry Smith 5586bacbd2SBarry Smith Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 5686bacbd2SBarry Smith help in getting this routine ironed out. 5786bacbd2SBarry Smith 5886bacbd2SBarry Smith As of right now, there are a couple of things that could 5986bacbd2SBarry Smith be, uh, better. 6086bacbd2SBarry Smith 6186bacbd2SBarry Smith 1 - Since Saad's routine is Fortran based, memory cannot be 6286bacbd2SBarry Smith malloc'd. I was trying to get the expected fill from the 6386bacbd2SBarry Smith preconditioner and use this number as the multiplier in 6486bacbd2SBarry Smith the equation for jmax, below, but couldn't figure it out. 6586bacbd2SBarry Smith Anyway, perhaps a better solution is to run SPARSKIT through 6686bacbd2SBarry Smith f2c and incorporate mallocs(), but I want to graduate so I'll 6786bacbd2SBarry Smith just rebuild Petsc. . . 6886bacbd2SBarry Smith 6986bacbd2SBarry Smith shift = 1, ishift = 0, for indices start at 1 7086bacbd2SBarry Smith shift = 0, ishift = 1, for indices starting at 0 7186bacbd2SBarry Smith ------------------------------------------------------------ 7286bacbd2SBarry Smith */ 7386bacbd2SBarry Smith 7486bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 7586bacbd2SBarry Smith { 7686bacbd2SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 7786bacbd2SBarry Smith IS iscolf, isrowf, isicol, isirow; 7886bacbd2SBarry Smith PetscTruth reorder; 7986bacbd2SBarry Smith int *c,*r,*ic,*ir, ierr, i, n = a->m; 8086bacbd2SBarry Smith int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 8186bacbd2SBarry Smith int *ordcol, *ordrow,*iwk,*iperm, *jw; 8286bacbd2SBarry Smith int ishift = !a->indexshift,shift = -a->indexshift; 83*b19713cbSBarry Smith int jmax,lfill,job,*o_i,*o_j; 84*b19713cbSBarry Smith Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a; 8586bacbd2SBarry Smith 8686bacbd2SBarry Smith PetscFunctionBegin; 8786bacbd2SBarry Smith 8886bacbd2SBarry Smith if (info->dt == PETSC_DEFAULT) info->dt = .005; 8986bacbd2SBarry Smith if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 9086bacbd2SBarry Smith if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 9186bacbd2SBarry Smith if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 9286bacbd2SBarry Smith lfill = info->dtcount/2; 9386bacbd2SBarry Smith jmax = info->fill*a->nz; 9486bacbd2SBarry Smith permtol = info->dtcol; 9586bacbd2SBarry Smith 9686bacbd2SBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 9786bacbd2SBarry Smith ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 9886bacbd2SBarry Smith 9986bacbd2SBarry Smith 10086bacbd2SBarry Smith /* ------------------------------------------------------------ 10186bacbd2SBarry Smith If reorder=.TRUE., then the original matrix has to be 10286bacbd2SBarry Smith reordered to reflect the user selected ordering scheme, and 10386bacbd2SBarry Smith then de-reordered so it is in it's original format. 10486bacbd2SBarry Smith Because Saad's dperm() is NOT in place, we have to copy 10586bacbd2SBarry Smith the original matrix and allocate more storage. . . 10686bacbd2SBarry Smith ------------------------------------------------------------ 10786bacbd2SBarry Smith */ 10886bacbd2SBarry Smith 10986bacbd2SBarry Smith /* set reorder to true if either isrow or iscol is not identity */ 11086bacbd2SBarry Smith ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 11186bacbd2SBarry Smith if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 11286bacbd2SBarry Smith reorder = PetscNot(reorder); 11386bacbd2SBarry Smith 11486bacbd2SBarry Smith ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 11586bacbd2SBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 11686bacbd2SBarry Smith ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 11786bacbd2SBarry Smith ierr = ISGetIndices(isirow,&ir); CHKERRQ(ierr); 11886bacbd2SBarry Smith 11986bacbd2SBarry Smith /* storage for ilu factor */ 12086bacbd2SBarry Smith new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 12186bacbd2SBarry Smith new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 12286bacbd2SBarry Smith new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 12386bacbd2SBarry Smith 12486bacbd2SBarry Smith if (reorder) { 12586bacbd2SBarry Smith old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 12686bacbd2SBarry Smith old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 12786bacbd2SBarry Smith old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 12886bacbd2SBarry Smith } 12986bacbd2SBarry Smith 13086bacbd2SBarry Smith ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 13186bacbd2SBarry Smith ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow); 13286bacbd2SBarry Smith 13386bacbd2SBarry Smith /* ------------------------------------------------------------ 13486bacbd2SBarry Smith Make sure that everything is Fortran formatted (1-Based) 13586bacbd2SBarry Smith ------------------------------------------------------------ 13686bacbd2SBarry Smith */ 137*b19713cbSBarry Smith if (ishift) { 138*b19713cbSBarry Smith for (i=old_i[0];i<old_i[n];i++) { 139*b19713cbSBarry Smith old_j[i]++; 14086bacbd2SBarry Smith } 141*b19713cbSBarry Smith for(i=0;i<n+1;i++) { 142*b19713cbSBarry Smith old_i[i]++; 143*b19713cbSBarry Smith }; 14486bacbd2SBarry Smith }; 14586bacbd2SBarry Smith 14686bacbd2SBarry Smith 14786bacbd2SBarry Smith for(i=0;i<n;i++) { 14886bacbd2SBarry Smith r[i] = r[i]+1; 14986bacbd2SBarry Smith c[i] = c[i]+1; 15086bacbd2SBarry Smith ir[i] = ir[i]+1; 15186bacbd2SBarry Smith ic[i] = ic[i]+1; 15286bacbd2SBarry Smith } 15386bacbd2SBarry Smith 15486bacbd2SBarry Smith if (reorder) { 15586bacbd2SBarry Smith job = 3; 156*b19713cbSBarry Smith dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 157*b19713cbSBarry Smith o_a = old_a2; 158*b19713cbSBarry Smith o_j = old_j2; 159*b19713cbSBarry Smith o_i = old_i2; 160*b19713cbSBarry Smith } else { 161*b19713cbSBarry Smith o_a = old_a; 162*b19713cbSBarry Smith o_j = old_j; 163*b19713cbSBarry Smith o_i = old_i; 16486bacbd2SBarry Smith } 16586bacbd2SBarry Smith 16686bacbd2SBarry Smith /* ------------------------------------------------------------ 16786bacbd2SBarry Smith Call Saad's ilutp() routine to generate the factorization 16886bacbd2SBarry Smith ------------------------------------------------------------ 16986bacbd2SBarry Smith */ 17086bacbd2SBarry Smith 17186bacbd2SBarry Smith iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 17286bacbd2SBarry Smith jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 17386bacbd2SBarry Smith w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 17486bacbd2SBarry Smith 175*b19713cbSBarry Smith ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 17686bacbd2SBarry Smith if (ierr) { 17786bacbd2SBarry Smith switch (ierr) { 17886bacbd2SBarry Smith case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 17986bacbd2SBarry Smith break; 18086bacbd2SBarry Smith case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 18186bacbd2SBarry Smith break; 18286bacbd2SBarry Smith case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 18386bacbd2SBarry Smith break; 18486bacbd2SBarry Smith case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 18586bacbd2SBarry Smith break; 18686bacbd2SBarry Smith case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 18786bacbd2SBarry Smith break; 18886bacbd2SBarry Smith default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 18986bacbd2SBarry Smith } 19086bacbd2SBarry Smith } 19186bacbd2SBarry Smith 19286bacbd2SBarry Smith ierr = PetscFree(w);CHKERRQ(ierr); 19386bacbd2SBarry Smith ierr = PetscFree(jw);CHKERRQ(ierr); 19486bacbd2SBarry Smith 19586bacbd2SBarry Smith 19686bacbd2SBarry Smith /* ------------------------------------------------------------ 19786bacbd2SBarry Smith Saad's routine gives the result in Modified Sparse Row (msr) 19886bacbd2SBarry Smith Convert to Compressed Sparse Row format (csr) 19986bacbd2SBarry Smith ------------------------------------------------------------ 20086bacbd2SBarry Smith */ 20186bacbd2SBarry Smith 20286bacbd2SBarry Smith wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 20386bacbd2SBarry Smith iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 20486bacbd2SBarry Smith 20586bacbd2SBarry Smith msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 20686bacbd2SBarry Smith 20786bacbd2SBarry Smith ierr = PetscFree(iwk);CHKERRQ(ierr); 20886bacbd2SBarry Smith ierr = PetscFree(wk);CHKERRQ(ierr); 20986bacbd2SBarry Smith 21086bacbd2SBarry Smith if (reorder) { 21186bacbd2SBarry Smith ierr = PetscFree(old_a2);CHKERRQ(ierr); 21286bacbd2SBarry Smith ierr = PetscFree(old_j2);CHKERRQ(ierr); 21386bacbd2SBarry Smith ierr = PetscFree(old_i2);CHKERRQ(ierr); 21486bacbd2SBarry Smith } 21586bacbd2SBarry Smith 216*b19713cbSBarry Smith /* fix permutation of old_j that the factorization introduced */ 217*b19713cbSBarry Smith if (!reorder) { 218*b19713cbSBarry Smith for (i=old_i[0]; i<=old_i[n]; i++) { 219*b19713cbSBarry Smith old_j[i-1] = iperm[old_j[i-1]-1]; 220*b19713cbSBarry Smith } 221*b19713cbSBarry Smith } 222*b19713cbSBarry Smith 223b801d0f9SBarry Smith /* get rid of the shift to indices starting at 1 */ 224*b19713cbSBarry Smith if (ishift) { 22586bacbd2SBarry Smith for (i=0; i<n+1; i++) { 226*b19713cbSBarry Smith old_i[i]--; 227*b19713cbSBarry Smith } 228*b19713cbSBarry Smith for (i=old_i[0];i<old_i[n];i++) { 229*b19713cbSBarry Smith old_j[i]--; 230*b19713cbSBarry Smith } 23186bacbd2SBarry Smith } 23286bacbd2SBarry Smith 233*b19713cbSBarry Smith /* Make the factored matrix 0-based */ 234*b19713cbSBarry Smith if (ishift) { 23586bacbd2SBarry Smith for (i=0; i<n+1; i++) { 236*b19713cbSBarry Smith new_i[i]--; 237*b19713cbSBarry Smith } 238*b19713cbSBarry Smith for (i=new_i[0];i<new_i[n];i++) { 239*b19713cbSBarry Smith new_j[i]--; 240*b19713cbSBarry Smith } 24186bacbd2SBarry Smith } 24286bacbd2SBarry Smith 24386bacbd2SBarry Smith for (i=0;i<n;i++) { 24486bacbd2SBarry Smith r[i] = r[i]-1; 24586bacbd2SBarry Smith c[i] = c[i]-1; 24686bacbd2SBarry Smith ir[i] = ir[i]-1; 24786bacbd2SBarry Smith ic[i] = ic[i]-1; 24886bacbd2SBarry Smith } 24986bacbd2SBarry Smith 25086bacbd2SBarry Smith /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 25186bacbd2SBarry Smith /*-- permute the right-hand-side and solution vectors --*/ 25286bacbd2SBarry Smith for(i=0; i<n; i++) { 25386bacbd2SBarry Smith ordcol[i] = ic[iperm[i]-1]; 25486bacbd2SBarry Smith ordrow[i] = ir[i]; 25586bacbd2SBarry Smith }; 25686bacbd2SBarry Smith 25786bacbd2SBarry Smith ierr = PetscFree(iperm);CHKERRQ(ierr); 25886bacbd2SBarry Smith 25986bacbd2SBarry Smith ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 26086bacbd2SBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 26186bacbd2SBarry Smith 26286bacbd2SBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 26386bacbd2SBarry Smith ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr); 26486bacbd2SBarry Smith 26586bacbd2SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 26686bacbd2SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf); 26786bacbd2SBarry Smith 26886bacbd2SBarry Smith /*----- put together the new matrix -----*/ 26986bacbd2SBarry Smith 27086bacbd2SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 27186bacbd2SBarry Smith (*fact)->factor = FACTOR_LU; 27286bacbd2SBarry Smith (*fact)->assembled = PETSC_TRUE; 27386bacbd2SBarry Smith 27486bacbd2SBarry Smith b = (Mat_SeqAIJ *) (*fact)->data; 27586bacbd2SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 27686bacbd2SBarry Smith b->sorted = PETSC_FALSE; 27786bacbd2SBarry Smith b->singlemalloc = PETSC_TRUE; 278*b19713cbSBarry Smith /* the next line frees the default space generated by the MatCreate() */ 27986bacbd2SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 28086bacbd2SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 28186bacbd2SBarry Smith b->a = new_a; 28286bacbd2SBarry Smith b->j = new_j; 28386bacbd2SBarry Smith b->i = new_i; 28486bacbd2SBarry Smith b->ilen = 0; 28586bacbd2SBarry Smith b->imax = 0; 28686bacbd2SBarry Smith b->row = isrowf; 28786bacbd2SBarry Smith b->col = iscolf; 28886bacbd2SBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 28986bacbd2SBarry Smith b->maxnz = b->nz = new_i[n]; 29086bacbd2SBarry Smith b->indexshift = a->indexshift; 29186bacbd2SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 29286bacbd2SBarry Smith (*fact)->info.factor_mallocs = 0; 29386bacbd2SBarry Smith 29486bacbd2SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 29586bacbd2SBarry Smith 29686bacbd2SBarry Smith /* check out for identical nodes. If found, use inode functions */ 29786bacbd2SBarry Smith ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 29886bacbd2SBarry Smith 29986bacbd2SBarry Smith PetscFunctionReturn(0); 30086bacbd2SBarry Smith } 30186bacbd2SBarry Smith #else 30286bacbd2SBarry Smith #undef __FUNC__ 30386bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ" 304c3b2863dSBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 30586bacbd2SBarry Smith { 30686bacbd2SBarry Smith PetscFunctionBegin; 30786bacbd2SBarry Smith SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 30886bacbd2SBarry Smith } 30986bacbd2SBarry Smith #endif 31086bacbd2SBarry Smith 311289bc588SBarry Smith /* 312289bc588SBarry Smith Factorization code for AIJ format. 313289bc588SBarry Smith */ 3145615d1e5SSatish Balay #undef __FUNC__ 3155615d1e5SSatish Balay #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 316416022c9SBarry Smith int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 317289bc588SBarry Smith { 318416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 319289bc588SBarry Smith IS isicol; 320416022c9SBarry Smith int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 321416022c9SBarry Smith int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 32291bf9adeSBarry Smith int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 323289bc588SBarry Smith 3243a40ed3dSBarry Smith PetscFunctionBegin; 325d3cbd50cSLois Curfman McInnes PetscValidHeaderSpecific(isrow,IS_COOKIE); 326d3cbd50cSLois Curfman McInnes PetscValidHeaderSpecific(iscol,IS_COOKIE); 327f1af5d2fSBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 3283b2fbd54SBarry Smith 32978b31e54SBarry Smith ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 330ac121b3dSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 331ac121b3dSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 332289bc588SBarry Smith 333289bc588SBarry Smith /* get new row pointers */ 3340452661fSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 335dbb450caSBarry Smith ainew[0] = -shift; 336289bc588SBarry Smith /* don't know how many column pointers are needed so estimate */ 337dbb450caSBarry Smith jmax = (int) (f*ai[n]+(!shift)); 3380452661fSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 339289bc588SBarry Smith /* fill is a linked list of nonzeros in active row */ 3400452661fSBarry Smith fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 3412fb3ed76SBarry Smith im = fill + n + 1; 342289bc588SBarry Smith /* idnew is location of diagonal in factor */ 3430452661fSBarry Smith idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 344dbb450caSBarry Smith idnew[0] = -shift; 345289bc588SBarry Smith 346289bc588SBarry Smith for ( i=0; i<n; i++ ) { 347289bc588SBarry Smith /* first copy previous fill into linked list */ 348289bc588SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 3496b96ef82SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 350dbb450caSBarry Smith ajtmp = aj + ai[r[i]] + shift; 351289bc588SBarry Smith fill[n] = n; 352289bc588SBarry Smith while (nz--) { 353289bc588SBarry Smith fm = n; 354dbb450caSBarry Smith idx = ic[*ajtmp++ + shift]; 355289bc588SBarry Smith do { 356289bc588SBarry Smith m = fm; 357289bc588SBarry Smith fm = fill[m]; 358289bc588SBarry Smith } while (fm < idx); 359289bc588SBarry Smith fill[m] = idx; 360289bc588SBarry Smith fill[idx] = fm; 361289bc588SBarry Smith } 362289bc588SBarry Smith row = fill[n]; 363289bc588SBarry Smith while ( row < i ) { 364dbb450caSBarry Smith ajtmp = ajnew + idnew[row] + (!shift); 3652fb3ed76SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 3662fb3ed76SBarry Smith nz = im[row] - nzbd; 367289bc588SBarry Smith fm = row; 3682fb3ed76SBarry Smith while (nz-- > 0) { 369dbb450caSBarry Smith idx = *ajtmp++ + shift; 3702fb3ed76SBarry Smith nzbd++; 3712fb3ed76SBarry Smith if (idx == i) im[row] = nzbd; 372289bc588SBarry Smith do { 373289bc588SBarry Smith m = fm; 374289bc588SBarry Smith fm = fill[m]; 375289bc588SBarry Smith } while (fm < idx); 376289bc588SBarry Smith if (fm != idx) { 377289bc588SBarry Smith fill[m] = idx; 378289bc588SBarry Smith fill[idx] = fm; 379289bc588SBarry Smith fm = idx; 380289bc588SBarry Smith nnz++; 381289bc588SBarry Smith } 382289bc588SBarry Smith } 383289bc588SBarry Smith row = fill[row]; 384289bc588SBarry Smith } 385289bc588SBarry Smith /* copy new filled row into permanent storage */ 386289bc588SBarry Smith ainew[i+1] = ainew[i] + nnz; 387496e697eSBarry Smith if (ainew[i+1] > jmax) { 388ecf371e4SBarry Smith 389ecf371e4SBarry Smith /* estimate how much additional space we will need */ 390ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 391ecf371e4SBarry Smith /* just double the memory each time */ 392ecf371e4SBarry Smith int maxadd = jmax; 393ecf371e4SBarry Smith /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 394bbb6d6a8SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 3952fb3ed76SBarry Smith jmax += maxadd; 396ecf371e4SBarry Smith 397ecf371e4SBarry Smith /* allocate a longer ajnew */ 3980452661fSBarry Smith ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 399549d3d68SSatish Balay ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 400606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 401289bc588SBarry Smith ajnew = ajtmp; 4022fb3ed76SBarry Smith realloc++; /* count how many times we realloc */ 403289bc588SBarry Smith } 404dbb450caSBarry Smith ajtmp = ajnew + ainew[i] + shift; 405289bc588SBarry Smith fm = fill[n]; 406289bc588SBarry Smith nzi = 0; 4072fb3ed76SBarry Smith im[i] = nnz; 408289bc588SBarry Smith while (nnz--) { 409289bc588SBarry Smith if (fm < i) nzi++; 410dbb450caSBarry Smith *ajtmp++ = fm - shift; 411289bc588SBarry Smith fm = fill[fm]; 412289bc588SBarry Smith } 413289bc588SBarry Smith idnew[i] = ainew[i] + nzi; 414289bc588SBarry Smith } 415464e8d44SSatish Balay if (ai[n] != 0) { 416464e8d44SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 417c38d4ed2SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 418981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 419981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 420981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 42105bf559cSSatish Balay } else { 422981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 42305bf559cSSatish Balay } 4242fb3ed76SBarry Smith 425898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 426898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 4271987afe7SBarry Smith 428606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 429289bc588SBarry Smith 430289bc588SBarry Smith /* put together the new matrix */ 431b4fd4287SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 4321987afe7SBarry Smith PLogObjectParent(*B,isicol); 433416022c9SBarry Smith b = (Mat_SeqAIJ *) (*B)->data; 434606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 4357c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 436e8d4e0b9SBarry Smith /* the next line frees the default space generated by the Create() */ 437606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 438606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 43991bf9adeSBarry Smith b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 440416022c9SBarry Smith b->j = ajnew; 441416022c9SBarry Smith b->i = ainew; 442416022c9SBarry Smith b->diag = idnew; 443416022c9SBarry Smith b->ilen = 0; 444416022c9SBarry Smith b->imax = 0; 445416022c9SBarry Smith b->row = isrow; 446416022c9SBarry Smith b->col = iscol; 447c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 448c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 44982bf6240SBarry Smith b->icol = isicol; 45091bf9adeSBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 451416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 4527fd98bd6SLois Curfman McInnes Allocate idnew, solve_work, new a, new j */ 453416022c9SBarry Smith PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 454416022c9SBarry Smith b->maxnz = b->nz = ainew[n] + shift; 4557fd98bd6SLois Curfman McInnes 456e93ccc4dSBarry Smith (*B)->factor = FACTOR_LU;; 457ae068f58SLois Curfman McInnes (*B)->info.factor_mallocs = realloc; 458ae068f58SLois Curfman McInnes (*B)->info.fill_ratio_given = f; 459703deb49SSatish Balay (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 460703deb49SSatish Balay 461e93ccc4dSBarry Smith if (ai[n] != 0) { 462e93ccc4dSBarry Smith (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 463eea2913aSSatish Balay } else { 464eea2913aSSatish Balay (*B)->info.fill_ratio_needed = 0.0; 465eea2913aSSatish Balay } 4663a40ed3dSBarry Smith PetscFunctionReturn(0); 467289bc588SBarry Smith } 4680a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 469184914b5SBarry Smith extern int Mat_AIJ_CheckInode(Mat); 47041c01911SSatish Balay 4715615d1e5SSatish Balay #undef __FUNC__ 4725615d1e5SSatish Balay #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 473416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 474289bc588SBarry Smith { 475416022c9SBarry Smith Mat C = *B; 476416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 47782bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 478416022c9SBarry Smith int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 4791987afe7SBarry Smith int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 480f2582111SSatish Balay int *diag_offset = b->diag,diag,k; 48135aab85fSBarry Smith int preserve_row_sums = (int) a->ilu_preserve_row_sums; 4823a40ed3dSBarry Smith register int *pj; 4838ecf7165SBarry Smith Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 48435aab85fSBarry Smith double ssum; 48535aab85fSBarry Smith register Scalar *pv, *rtmps,*u_values; 486289bc588SBarry Smith 4873a40ed3dSBarry Smith PetscFunctionBegin; 48882bf6240SBarry Smith 48978b31e54SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 49078b31e54SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 4910452661fSBarry Smith rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 492549d3d68SSatish Balay ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 4930cf60383SBarry Smith rtmps = rtmp + shift; ics = ic + shift; 494289bc588SBarry Smith 4956cf997b0SBarry Smith /* precalculate row sums */ 49635aab85fSBarry Smith if (preserve_row_sums) { 49735aab85fSBarry Smith rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 49835aab85fSBarry Smith for ( i=0; i<n; i++ ) { 49935aab85fSBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 50035aab85fSBarry Smith v = a->a + a->i[r[i]] + shift; 50135aab85fSBarry Smith sum = 0.0; 50235aab85fSBarry Smith for ( j=0; j<nz; j++ ) sum += v[j]; 50335aab85fSBarry Smith rowsums[i] = sum; 50435aab85fSBarry Smith } 50535aab85fSBarry Smith } 50635aab85fSBarry Smith 507289bc588SBarry Smith for ( i=0; i<n; i++ ) { 508289bc588SBarry Smith nz = ai[i+1] - ai[i]; 509dbb450caSBarry Smith ajtmp = aj + ai[i] + shift; 51048e94559SBarry Smith for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 511289bc588SBarry Smith 512289bc588SBarry Smith /* load in initial (unfactored row) */ 513416022c9SBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 514416022c9SBarry Smith ajtmpold = a->j + a->i[r[i]] + shift; 515416022c9SBarry Smith v = a->a + a->i[r[i]] + shift; 5160cf60383SBarry Smith for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 517289bc588SBarry Smith 518dbb450caSBarry Smith row = *ajtmp++ + shift; 519f2582111SSatish Balay while (row < i ) { 5208c37ef55SBarry Smith pc = rtmp + row; 5218c37ef55SBarry Smith if (*pc != 0.0) { 5221987afe7SBarry Smith pv = b->a + diag_offset[row] + shift; 5231987afe7SBarry Smith pj = b->j + diag_offset[row] + (!shift); 52435aab85fSBarry Smith multiplier = *pc / *pv++; 5258c37ef55SBarry Smith *pc = multiplier; 526f2582111SSatish Balay nz = ai[row+1] - diag_offset[row] - 1; 527f2582111SSatish Balay for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 528f2582111SSatish Balay PLogFlops(2*nz); 529289bc588SBarry Smith } 530dbb450caSBarry Smith row = *ajtmp++ + shift; 531289bc588SBarry Smith } 532416022c9SBarry Smith /* finished row so stick it into b->a */ 533416022c9SBarry Smith pv = b->a + ai[i] + shift; 534416022c9SBarry Smith pj = b->j + ai[i] + shift; 5358c37ef55SBarry Smith nz = ai[i+1] - ai[i]; 536416022c9SBarry Smith for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 53735aab85fSBarry Smith diag = diag_offset[i] - ai[i]; 53835aab85fSBarry Smith /* 53935aab85fSBarry Smith Possibly adjust diagonal entry on current row to force 54035aab85fSBarry Smith LU matrix to have same row sum as initial matrix. 54135aab85fSBarry Smith */ 542d708749eSBarry Smith if (pv[diag] == 0.0) { 5436cf997b0SBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 544d708749eSBarry Smith } 54535aab85fSBarry Smith if (preserve_row_sums) { 54635aab85fSBarry Smith pj = b->j + ai[i] + shift; 54735aab85fSBarry Smith sum = rowsums[i]; 54835aab85fSBarry Smith for ( j=0; j<diag; j++ ) { 54935aab85fSBarry Smith u_values = b->a + diag_offset[pj[j]] + shift; 55035aab85fSBarry Smith nz = ai[pj[j]+1] - diag_offset[pj[j]]; 55135aab85fSBarry Smith inner_sum = 0.0; 55235aab85fSBarry Smith for ( k=0; k<nz; k++ ) { 55335aab85fSBarry Smith inner_sum += u_values[k]; 55435aab85fSBarry Smith } 55535aab85fSBarry Smith sum -= pv[j]*inner_sum; 55635aab85fSBarry Smith 55735aab85fSBarry Smith } 55835aab85fSBarry Smith nz = ai[i+1] - diag_offset[i] - 1; 55935aab85fSBarry Smith u_values = b->a + diag_offset[i] + 1 + shift; 56035aab85fSBarry Smith for ( k=0; k<nz; k++ ) { 56135aab85fSBarry Smith sum -= u_values[k]; 56235aab85fSBarry Smith } 56335aab85fSBarry Smith ssum = PetscAbsScalar(sum/pv[diag]); 56435aab85fSBarry Smith if (ssum < 1000. && ssum > .001) pv[diag] = sum; 56535aab85fSBarry Smith } 56635aab85fSBarry Smith /* check pivot entry for current row */ 5678c37ef55SBarry Smith } 5680f11f9aeSSatish Balay 56935aab85fSBarry Smith /* invert diagonal entries for simplier triangular solves */ 57035aab85fSBarry Smith for ( i=0; i<n; i++ ) { 57135aab85fSBarry Smith b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 57235aab85fSBarry Smith } 57335aab85fSBarry Smith 574606d414cSSatish Balay if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 575606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 57678b31e54SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 57778b31e54SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 578416022c9SBarry Smith C->factor = FACTOR_LU; 57941c01911SSatish Balay ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 580c456f294SBarry Smith C->assembled = PETSC_TRUE; 581416022c9SBarry Smith PLogFlops(b->n); 5823a40ed3dSBarry Smith PetscFunctionReturn(0); 583289bc588SBarry Smith } 5840a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 5855615d1e5SSatish Balay #undef __FUNC__ 5865615d1e5SSatish Balay #define __FUNC__ "MatLUFactor_SeqAIJ" 587416022c9SBarry Smith int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 588da3a660dSBarry Smith { 589416022c9SBarry Smith Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 59086bacbd2SBarry Smith int ierr,refct; 591416022c9SBarry Smith Mat C; 592f830108cSBarry Smith PetscOps *Abops; 5930a6ffc59SBarry Smith MatOps Aops; 594416022c9SBarry Smith 5953a40ed3dSBarry Smith PetscFunctionBegin; 596f2582111SSatish Balay ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 597f2582111SSatish Balay ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 598da3a660dSBarry Smith 599da3a660dSBarry Smith /* free all the data structures from mat */ 600606d414cSSatish Balay ierr = PetscFree(mat->a);CHKERRQ(ierr); 601606d414cSSatish Balay if (!mat->singlemalloc) { 602606d414cSSatish Balay ierr = PetscFree(mat->i);CHKERRQ(ierr); 603606d414cSSatish Balay ierr = PetscFree(mat->j);CHKERRQ(ierr); 604606d414cSSatish Balay } 605606d414cSSatish Balay if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 606606d414cSSatish Balay if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 607606d414cSSatish Balay if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 608606d414cSSatish Balay if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 609606d414cSSatish Balay if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 6100f0aacb9SBarry Smith if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 611606d414cSSatish Balay ierr = PetscFree(mat);CHKERRQ(ierr); 612da3a660dSBarry Smith 61317642b18SBarry Smith ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 61417642b18SBarry Smith ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 61517642b18SBarry Smith 616f830108cSBarry Smith /* 617f830108cSBarry Smith This is horrible, horrible code. We need to keep the 618f830108cSBarry Smith A pointers for the bops and ops but copy everything 619f830108cSBarry Smith else from C. 620f830108cSBarry Smith */ 621f830108cSBarry Smith Abops = A->bops; 622f830108cSBarry Smith Aops = A->ops; 62386bacbd2SBarry Smith refct = A->refct; 624549d3d68SSatish Balay ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 625451c4af8SSatish Balay mat = (Mat_SeqAIJ *) A->data; 626451c4af8SSatish Balay PLogObjectParent(A,mat->icol); 627451c4af8SSatish Balay 628f830108cSBarry Smith A->bops = Abops; 629f830108cSBarry Smith A->ops = Aops; 630bef8e0ddSBarry Smith A->qlist = 0; 63186bacbd2SBarry Smith A->refct = refct; 632c38d4ed2SBarry Smith /* copy over the type_name and name */ 633c38d4ed2SBarry Smith ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 634c38d4ed2SBarry Smith ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 635f830108cSBarry Smith 6360452661fSBarry Smith PetscHeaderDestroy(C); 637c38d4ed2SBarry Smith 6383a40ed3dSBarry Smith PetscFunctionReturn(0); 639da3a660dSBarry Smith } 6400a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 6415615d1e5SSatish Balay #undef __FUNC__ 6425615d1e5SSatish Balay #define __FUNC__ "MatSolve_SeqAIJ" 643416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 6448c37ef55SBarry Smith { 645416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 646416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 647416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 6483b2fbd54SBarry Smith int nz,shift = a->indexshift,*rout,*cout; 649416022c9SBarry Smith Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 6508c37ef55SBarry Smith 6513a40ed3dSBarry Smith PetscFunctionBegin; 6523a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 65391bf9adeSBarry Smith 654e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 655e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 656416022c9SBarry Smith tmp = a->solve_work; 6578c37ef55SBarry Smith 6583b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 6593b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 6608c37ef55SBarry Smith 6618c37ef55SBarry Smith /* forward solve the lower triangular */ 6628c37ef55SBarry Smith tmp[0] = b[*r++]; 6630cf60383SBarry Smith tmps = tmp + shift; 6648c37ef55SBarry Smith for ( i=1; i<n; i++ ) { 665dbb450caSBarry Smith v = aa + ai[i] + shift; 666dbb450caSBarry Smith vi = aj + ai[i] + shift; 667416022c9SBarry Smith nz = a->diag[i] - ai[i]; 6688c37ef55SBarry Smith sum = b[*r++]; 6690cf60383SBarry Smith while (nz--) sum -= *v++ * tmps[*vi++]; 6708c37ef55SBarry Smith tmp[i] = sum; 6718c37ef55SBarry Smith } 6728c37ef55SBarry Smith 6738c37ef55SBarry Smith /* backward solve the upper triangular */ 6748c37ef55SBarry Smith for ( i=n-1; i>=0; i-- ){ 675416022c9SBarry Smith v = aa + a->diag[i] + (!shift); 676416022c9SBarry Smith vi = aj + a->diag[i] + (!shift); 677416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 6788c37ef55SBarry Smith sum = tmp[i]; 6790cf60383SBarry Smith while (nz--) sum -= *v++ * tmps[*vi++]; 680416022c9SBarry Smith x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 6818c37ef55SBarry Smith } 6828c37ef55SBarry Smith 6833b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 6843b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 685cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 686cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687416022c9SBarry Smith PLogFlops(2*a->nz - a->n); 6883a40ed3dSBarry Smith PetscFunctionReturn(0); 6898c37ef55SBarry Smith } 690026e39d0SSatish Balay 691930ae53cSBarry Smith /* ----------------------------------------------------------- */ 692930ae53cSBarry Smith #undef __FUNC__ 693930ae53cSBarry Smith #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 694930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 695930ae53cSBarry Smith { 696930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 697d85afc42SSatish Balay int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 698d85afc42SSatish Balay Scalar *x,*b, *aa = a->a, sum; 699aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 700d85afc42SSatish Balay int adiag_i,i,*vi,nz,ai_i; 701d85afc42SSatish Balay Scalar *v; 702d85afc42SSatish Balay #endif 703930ae53cSBarry Smith 7043a40ed3dSBarry Smith PetscFunctionBegin; 7053a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 706930ae53cSBarry Smith if (a->indexshift) { 7073a40ed3dSBarry Smith ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 7083a40ed3dSBarry Smith PetscFunctionReturn(0); 709930ae53cSBarry Smith } 710930ae53cSBarry Smith 711e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 712e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 713930ae53cSBarry Smith 714aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 7151c4feecaSSatish Balay fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 7161c4feecaSSatish Balay #else 717930ae53cSBarry Smith /* forward solve the lower triangular */ 718930ae53cSBarry Smith x[0] = b[0]; 719930ae53cSBarry Smith for ( i=1; i<n; i++ ) { 720930ae53cSBarry Smith ai_i = ai[i]; 721930ae53cSBarry Smith v = aa + ai_i; 722930ae53cSBarry Smith vi = aj + ai_i; 723930ae53cSBarry Smith nz = adiag[i] - ai_i; 724930ae53cSBarry Smith sum = b[i]; 725930ae53cSBarry Smith while (nz--) sum -= *v++ * x[*vi++]; 726930ae53cSBarry Smith x[i] = sum; 727930ae53cSBarry Smith } 728930ae53cSBarry Smith 729930ae53cSBarry Smith /* backward solve the upper triangular */ 730930ae53cSBarry Smith for ( i=n-1; i>=0; i-- ){ 731930ae53cSBarry Smith adiag_i = adiag[i]; 732d708749eSBarry Smith v = aa + adiag_i + 1; 733d708749eSBarry Smith vi = aj + adiag_i + 1; 734930ae53cSBarry Smith nz = ai[i+1] - adiag_i - 1; 735930ae53cSBarry Smith sum = x[i]; 736930ae53cSBarry Smith while (nz--) sum -= *v++ * x[*vi++]; 737930ae53cSBarry Smith x[i] = sum*aa[adiag_i]; 738930ae53cSBarry Smith } 7391c4feecaSSatish Balay #endif 740930ae53cSBarry Smith PLogFlops(2*a->nz - a->n); 741cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 742cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7433a40ed3dSBarry Smith PetscFunctionReturn(0); 744930ae53cSBarry Smith } 745930ae53cSBarry Smith 7465615d1e5SSatish Balay #undef __FUNC__ 7475615d1e5SSatish Balay #define __FUNC__ "MatSolveAdd_SeqAIJ" 748416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 749da3a660dSBarry Smith { 750416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 751416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 752416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 7533b2fbd54SBarry Smith int nz, shift = a->indexshift,*rout,*cout; 754416022c9SBarry Smith Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 755da3a660dSBarry Smith 7563a40ed3dSBarry Smith PetscFunctionBegin; 75778b31e54SBarry Smith if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 758da3a660dSBarry Smith 759e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 760e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 761416022c9SBarry Smith tmp = a->solve_work; 762da3a660dSBarry Smith 7633b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 7643b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 765da3a660dSBarry Smith 766da3a660dSBarry Smith /* forward solve the lower triangular */ 767da3a660dSBarry Smith tmp[0] = b[*r++]; 768da3a660dSBarry Smith for ( i=1; i<n; i++ ) { 769dbb450caSBarry Smith v = aa + ai[i] + shift; 770dbb450caSBarry Smith vi = aj + ai[i] + shift; 771416022c9SBarry Smith nz = a->diag[i] - ai[i]; 772da3a660dSBarry Smith sum = b[*r++]; 773dbb450caSBarry Smith while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 774da3a660dSBarry Smith tmp[i] = sum; 775da3a660dSBarry Smith } 776da3a660dSBarry Smith 777da3a660dSBarry Smith /* backward solve the upper triangular */ 778da3a660dSBarry Smith for ( i=n-1; i>=0; i-- ){ 779416022c9SBarry Smith v = aa + a->diag[i] + (!shift); 780416022c9SBarry Smith vi = aj + a->diag[i] + (!shift); 781416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 782da3a660dSBarry Smith sum = tmp[i]; 783dbb450caSBarry Smith while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 784416022c9SBarry Smith tmp[i] = sum*aa[a->diag[i]+shift]; 785da3a660dSBarry Smith x[*c--] += tmp[i]; 786da3a660dSBarry Smith } 787da3a660dSBarry Smith 7883b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 7893b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 790cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 791cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792416022c9SBarry Smith PLogFlops(2*a->nz); 793898183ecSLois Curfman McInnes 7943a40ed3dSBarry Smith PetscFunctionReturn(0); 795da3a660dSBarry Smith } 796da3a660dSBarry Smith /* -------------------------------------------------------------------*/ 7975615d1e5SSatish Balay #undef __FUNC__ 7987c922b88SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqAIJ" 7997c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 800da3a660dSBarry Smith { 801416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 8022235e667SBarry Smith IS iscol = a->col, isrow = a->row; 803416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 804f1af5d2fSBarry Smith int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 805f1af5d2fSBarry Smith Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 806da3a660dSBarry Smith 8073a40ed3dSBarry Smith PetscFunctionBegin; 808e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 809e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 810416022c9SBarry Smith tmp = a->solve_work; 811da3a660dSBarry Smith 8122235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 8132235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 814da3a660dSBarry Smith 815da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 8162235e667SBarry Smith for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 817da3a660dSBarry Smith 818da3a660dSBarry Smith /* forward solve the U^T */ 819da3a660dSBarry Smith for ( i=0; i<n; i++ ) { 820f1af5d2fSBarry Smith v = aa + diag[i] + shift; 821f1af5d2fSBarry Smith vi = aj + diag[i] + (!shift); 822f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 823c38d4ed2SBarry Smith s1 = tmp[i]; 824c38d4ed2SBarry Smith s1 *= *(v++); /* multiply by inverse of diagonal entry */ 825da3a660dSBarry Smith while (nz--) { 826f1af5d2fSBarry Smith tmp[*vi++ + shift] -= (*v++)*s1; 827da3a660dSBarry Smith } 828c38d4ed2SBarry Smith tmp[i] = s1; 829da3a660dSBarry Smith } 830da3a660dSBarry Smith 831da3a660dSBarry Smith /* backward solve the L^T */ 832da3a660dSBarry Smith for ( i=n-1; i>=0; i-- ){ 833f1af5d2fSBarry Smith v = aa + diag[i] - 1 + shift; 834f1af5d2fSBarry Smith vi = aj + diag[i] - 1 + shift; 835f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 836f1af5d2fSBarry Smith s1 = tmp[i]; 837da3a660dSBarry Smith while (nz--) { 838f1af5d2fSBarry Smith tmp[*vi-- + shift] -= (*v--)*s1; 839da3a660dSBarry Smith } 840da3a660dSBarry Smith } 841da3a660dSBarry Smith 842da3a660dSBarry Smith /* copy tmp into x according to permutation */ 843da3a660dSBarry Smith for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 844da3a660dSBarry Smith 8452235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 8462235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 847cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 848cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 849da3a660dSBarry Smith 850416022c9SBarry Smith PLogFlops(2*a->nz-a->n); 8513a40ed3dSBarry Smith PetscFunctionReturn(0); 852da3a660dSBarry Smith } 853da3a660dSBarry Smith 8545615d1e5SSatish Balay #undef __FUNC__ 8557c922b88SBarry Smith #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 8567c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 857da3a660dSBarry Smith { 858416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 8592235e667SBarry Smith IS iscol = a->col, isrow = a->row; 860416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 861f1af5d2fSBarry Smith int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 862416022c9SBarry Smith Scalar *x,*b,*tmp, *aa = a->a, *v; 8636abc6512SBarry Smith 8643a40ed3dSBarry Smith PetscFunctionBegin; 8656abc6512SBarry Smith if (zz != xx) VecCopy(zz,xx); 8666abc6512SBarry Smith 867e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 868e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869416022c9SBarry Smith tmp = a->solve_work; 8706abc6512SBarry Smith 8712235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 8722235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 8736abc6512SBarry Smith 8746abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 8752235e667SBarry Smith for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 8766abc6512SBarry Smith 8776abc6512SBarry Smith /* forward solve the U^T */ 8786abc6512SBarry Smith for ( i=0; i<n; i++ ) { 879f1af5d2fSBarry Smith v = aa + diag[i] + shift; 880f1af5d2fSBarry Smith vi = aj + diag[i] + (!shift); 881f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 8826abc6512SBarry Smith tmp[i] *= *v++; 8836abc6512SBarry Smith while (nz--) { 884dbb450caSBarry Smith tmp[*vi++ + shift] -= (*v++)*tmp[i]; 8856abc6512SBarry Smith } 8866abc6512SBarry Smith } 8876abc6512SBarry Smith 8886abc6512SBarry Smith /* backward solve the L^T */ 8896abc6512SBarry Smith for ( i=n-1; i>=0; i-- ){ 890f1af5d2fSBarry Smith v = aa + diag[i] - 1 + shift; 891f1af5d2fSBarry Smith vi = aj + diag[i] - 1 + shift; 892f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 8936abc6512SBarry Smith while (nz--) { 894dbb450caSBarry Smith tmp[*vi-- + shift] -= (*v--)*tmp[i]; 8956abc6512SBarry Smith } 8966abc6512SBarry Smith } 8976abc6512SBarry Smith 8986abc6512SBarry Smith /* copy tmp into x according to permutation */ 8996abc6512SBarry Smith for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 9006abc6512SBarry Smith 9012235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 9022235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 903cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 904cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9056abc6512SBarry Smith 906416022c9SBarry Smith PLogFlops(2*a->nz); 9073a40ed3dSBarry Smith PetscFunctionReturn(0); 908da3a660dSBarry Smith } 9099e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 9107c922b88SBarry Smith extern int MatMissingDiagonal_SeqAIJ(Mat); 91108480c60SBarry Smith 9125615d1e5SSatish Balay #undef __FUNC__ 9135615d1e5SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 9146cf997b0SBarry Smith int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 9159e25ed09SBarry Smith { 916416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 9179e25ed09SBarry Smith IS isicol; 918416022c9SBarry Smith int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 91956beaf04SBarry Smith int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 920335d9088SBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 9216cf997b0SBarry Smith int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 92277c4ece6SBarry Smith PetscTruth col_identity, row_identity; 9236cf997b0SBarry Smith double f; 9249e25ed09SBarry Smith 9253a40ed3dSBarry Smith PetscFunctionBegin; 9266cf997b0SBarry Smith if (info) { 9276cf997b0SBarry Smith f = info->fill; 9280cd17293SBarry Smith levels = (int) info->levels; 9290cd17293SBarry Smith diagonal_fill = (int) info->diagonal_fill; 9306cf997b0SBarry Smith } else { 9316cf997b0SBarry Smith f = 1.0; 9326cf997b0SBarry Smith levels = 0; 9336cf997b0SBarry Smith diagonal_fill = 0; 9346cf997b0SBarry Smith } 93582bf6240SBarry Smith ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 93682bf6240SBarry Smith 93708480c60SBarry Smith /* special case that simply copies fill pattern */ 938be0abb6dSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 939be0abb6dSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 94086bacbd2SBarry Smith if (!levels && row_identity && col_identity) { 9412e8a6d31SBarry Smith ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 94208480c60SBarry Smith (*fact)->factor = FACTOR_LU; 94308480c60SBarry Smith b = (Mat_SeqAIJ *) (*fact)->data; 94408480c60SBarry Smith if (!b->diag) { 9457c922b88SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 94608480c60SBarry Smith } 9477c922b88SBarry Smith ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 94808480c60SBarry Smith b->row = isrow; 94908480c60SBarry Smith b->col = iscol; 95082bf6240SBarry Smith b->icol = isicol; 9510452661fSBarry Smith b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 952f830108cSBarry Smith (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 953c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 954c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 9553a40ed3dSBarry Smith PetscFunctionReturn(0); 95608480c60SBarry Smith } 95708480c60SBarry Smith 958898183ecSLois Curfman McInnes ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 959898183ecSLois Curfman McInnes ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 9609e25ed09SBarry Smith 9619e25ed09SBarry Smith /* get new row pointers */ 9620452661fSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 963dbb450caSBarry Smith ainew[0] = -shift; 9649e25ed09SBarry Smith /* don't know how many column pointers are needed so estimate */ 965dbb450caSBarry Smith jmax = (int) (f*(ai[n]+!shift)); 9660452661fSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 9679e25ed09SBarry Smith /* ajfill is level of fill for each fill entry */ 9680452661fSBarry Smith ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 9699e25ed09SBarry Smith /* fill is a linked list of nonzeros in active row */ 9700452661fSBarry Smith fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 97156beaf04SBarry Smith /* im is level for each filled value */ 9720452661fSBarry Smith im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 97356beaf04SBarry Smith /* dloc is location of diagonal in factor */ 9740452661fSBarry Smith dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 97556beaf04SBarry Smith dloc[0] = 0; 97656beaf04SBarry Smith for ( prow=0; prow<n; prow++ ) { 9776cf997b0SBarry Smith 9786cf997b0SBarry Smith /* copy current row into linked list */ 97956beaf04SBarry Smith nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 980385f7a74SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 981dbb450caSBarry Smith xi = aj + ai[r[prow]] + shift; 9829e25ed09SBarry Smith fill[n] = n; 983435faa5fSBarry Smith fill[prow] = -1; /* marker to indicate if diagonal exists */ 9849e25ed09SBarry Smith while (nz--) { 9859e25ed09SBarry Smith fm = n; 986dbb450caSBarry Smith idx = ic[*xi++ + shift]; 9879e25ed09SBarry Smith do { 9889e25ed09SBarry Smith m = fm; 9899e25ed09SBarry Smith fm = fill[m]; 9909e25ed09SBarry Smith } while (fm < idx); 9919e25ed09SBarry Smith fill[m] = idx; 9929e25ed09SBarry Smith fill[idx] = fm; 99356beaf04SBarry Smith im[idx] = 0; 9949e25ed09SBarry Smith } 9956cf997b0SBarry Smith 9966cf997b0SBarry Smith /* make sure diagonal entry is included */ 997435faa5fSBarry Smith if (diagonal_fill && fill[prow] == -1) { 9986cf997b0SBarry Smith fm = n; 999435faa5fSBarry Smith while (fill[fm] < prow) fm = fill[fm]; 1000435faa5fSBarry Smith fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1001435faa5fSBarry Smith fill[fm] = prow; 10026cf997b0SBarry Smith im[prow] = 0; 10036cf997b0SBarry Smith nzf++; 1004335d9088SBarry Smith dcount++; 10056cf997b0SBarry Smith } 10066cf997b0SBarry Smith 100756beaf04SBarry Smith nzi = 0; 10089e25ed09SBarry Smith row = fill[n]; 100956beaf04SBarry Smith while ( row < prow ) { 101056beaf04SBarry Smith incrlev = im[row] + 1; 101156beaf04SBarry Smith nz = dloc[row]; 10126cf997b0SBarry Smith xi = ajnew + ainew[row] + shift + nz + 1; 1013dbb450caSBarry Smith flev = ajfill + ainew[row] + shift + nz + 1; 1014416022c9SBarry Smith nnz = ainew[row+1] - ainew[row] - nz - 1; 101556beaf04SBarry Smith fm = row; 101656beaf04SBarry Smith while (nnz-- > 0) { 1017dbb450caSBarry Smith idx = *xi++ + shift; 101856beaf04SBarry Smith if (*flev + incrlev > levels) { 101956beaf04SBarry Smith flev++; 102056beaf04SBarry Smith continue; 102156beaf04SBarry Smith } 102256beaf04SBarry Smith do { 10239e25ed09SBarry Smith m = fm; 10249e25ed09SBarry Smith fm = fill[m]; 102556beaf04SBarry Smith } while (fm < idx); 10269e25ed09SBarry Smith if (fm != idx) { 102756beaf04SBarry Smith im[idx] = *flev + incrlev; 10289e25ed09SBarry Smith fill[m] = idx; 10299e25ed09SBarry Smith fill[idx] = fm; 10309e25ed09SBarry Smith fm = idx; 103156beaf04SBarry Smith nzf++; 1032ecf371e4SBarry Smith } else { 103356beaf04SBarry Smith if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 10349e25ed09SBarry Smith } 103556beaf04SBarry Smith flev++; 10369e25ed09SBarry Smith } 10379e25ed09SBarry Smith row = fill[row]; 103856beaf04SBarry Smith nzi++; 10399e25ed09SBarry Smith } 10409e25ed09SBarry Smith /* copy new filled row into permanent storage */ 104156beaf04SBarry Smith ainew[prow+1] = ainew[prow] + nzf; 1042d7e8b826SBarry Smith if (ainew[prow+1] > jmax-shift) { 1043ecf371e4SBarry Smith 1044ecf371e4SBarry Smith /* estimate how much additional space we will need */ 1045ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1046ecf371e4SBarry Smith /* just double the memory each time */ 1047ecf371e4SBarry Smith /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1048ecf371e4SBarry Smith int maxadd = jmax; 104956beaf04SBarry Smith if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1050bbb6d6a8SBarry Smith jmax += maxadd; 1051ecf371e4SBarry Smith 1052ecf371e4SBarry Smith /* allocate a longer ajnew and ajfill */ 10530452661fSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1054549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1055606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 105656beaf04SBarry Smith ajnew = xi; 10570452661fSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1058549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1059606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 106056beaf04SBarry Smith ajfill = xi; 1061ecf371e4SBarry Smith realloc++; /* count how many times we realloc */ 10629e25ed09SBarry Smith } 1063dbb450caSBarry Smith xi = ajnew + ainew[prow] + shift; 1064dbb450caSBarry Smith flev = ajfill + ainew[prow] + shift; 106556beaf04SBarry Smith dloc[prow] = nzi; 10669e25ed09SBarry Smith fm = fill[n]; 106756beaf04SBarry Smith while (nzf--) { 1068dbb450caSBarry Smith *xi++ = fm - shift; 106956beaf04SBarry Smith *flev++ = im[fm]; 10709e25ed09SBarry Smith fm = fill[fm]; 10719e25ed09SBarry Smith } 10726cf997b0SBarry Smith /* make sure row has diagonal entry */ 10736cf997b0SBarry Smith if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 10746cf997b0SBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 10756cf997b0SBarry Smith try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 10766cf997b0SBarry Smith } 10779e25ed09SBarry Smith } 1078606d414cSSatish Balay ierr = PetscFree(ajfill); CHKERRQ(ierr); 1079898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1080898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1081606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 1082606d414cSSatish Balay ierr = PetscFree(im);CHKERRQ(ierr); 10839e25ed09SBarry Smith 1084f64065bbSBarry Smith { 1085464e8d44SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 1086c38d4ed2SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1087981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1088981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1089981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1090335d9088SBarry Smith if (diagonal_fill) { 1091335d9088SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1092335d9088SBarry Smith } 1093f64065bbSBarry Smith } 1094bbb6d6a8SBarry Smith 10959e25ed09SBarry Smith /* put together the new matrix */ 1096b4fd4287SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1097fa6007c9SSatish Balay PLogObjectParent(*fact,isicol); 1098416022c9SBarry Smith b = (Mat_SeqAIJ *) (*fact)->data; 1099606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 11007c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 1101dbb450caSBarry Smith len = (ainew[n] + shift)*sizeof(Scalar); 11029e25ed09SBarry Smith /* the next line frees the default space generated by the Create() */ 1103606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 1104606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 11056b96ef82SSatish Balay b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1106416022c9SBarry Smith b->j = ajnew; 1107416022c9SBarry Smith b->i = ainew; 110856beaf04SBarry Smith for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1109416022c9SBarry Smith b->diag = dloc; 1110416022c9SBarry Smith b->ilen = 0; 1111416022c9SBarry Smith b->imax = 0; 1112416022c9SBarry Smith b->row = isrow; 1113416022c9SBarry Smith b->col = iscol; 1114c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1115c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 111682bf6240SBarry Smith b->icol = isicol; 111782bf6240SBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1118416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 111956beaf04SBarry Smith Allocate dloc, solve_work, new a, new j */ 1120dbb450caSBarry Smith PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1121416022c9SBarry Smith b->maxnz = b->nz = ainew[n] + shift; 11229e25ed09SBarry Smith (*fact)->factor = FACTOR_LU; 1123ae068f58SLois Curfman McInnes 1124ae068f58SLois Curfman McInnes (*fact)->info.factor_mallocs = realloc; 1125ae068f58SLois Curfman McInnes (*fact)->info.fill_ratio_given = f; 1126ae068f58SLois Curfman McInnes (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1127e93ccc4dSBarry Smith (*fact)->factor = FACTOR_LU;; 1128ae068f58SLois Curfman McInnes 11293a40ed3dSBarry Smith PetscFunctionReturn(0); 11309e25ed09SBarry Smith } 1131d5d45c9bSBarry Smith 1132d5d45c9bSBarry Smith 1133d5d45c9bSBarry Smith 1134d5d45c9bSBarry Smith 1135