1*86bacbd2SBarry Smith /*$Id: aijfact.c,v 1.127 1999/11/24 21:53:47 bsmith Exp bsmith $*/ 2289bc588SBarry Smith 370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h" 490f02eecSBarry Smith #include "src/vec/vecimpl.h" 51c4feecaSSatish Balay #include "src/inline/dot.h" 63b2fbd54SBarry Smith 75615d1e5SSatish Balay #undef __FUNC__ 891e9ee9fSBarry Smith #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 991e9ee9fSBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 103b2fbd54SBarry Smith { 113a40ed3dSBarry Smith PetscFunctionBegin; 123a40ed3dSBarry Smith 13e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG) 15d64ed03dSBarry Smith PetscFunctionReturn(0); 16d64ed03dSBarry Smith #endif 173b2fbd54SBarry Smith } 183b2fbd54SBarry Smith 19*86bacbd2SBarry Smith 20*86bacbd2SBarry Smith extern int MatMarkDiagonal_SeqAIJ(Mat); 21*86bacbd2SBarry Smith extern int Mat_AIJ_CheckInode(Mat); 22*86bacbd2SBarry Smith 23*86bacbd2SBarry Smith #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24*86bacbd2SBarry Smith 25*86bacbd2SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 26*86bacbd2SBarry Smith #define dperm_ DPERM 27*86bacbd2SBarry Smith #define ilutp_ ILUTP 28*86bacbd2SBarry Smith #define ilu0_ ILU0 29*86bacbd2SBarry Smith #define msrcsr_ MSRCSR 30*86bacbd2SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31*86bacbd2SBarry Smith #define dperm_ dperm 32*86bacbd2SBarry Smith #define ilutp_ ilutp 33*86bacbd2SBarry Smith #define ilu0_ ilu0 34*86bacbd2SBarry Smith #define msrcsr_ msrcsr 35*86bacbd2SBarry Smith #endif 36*86bacbd2SBarry Smith 37*86bacbd2SBarry Smith EXTERN_C_BEGIN 38*86bacbd2SBarry Smith extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39*86bacbd2SBarry Smith extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40*86bacbd2SBarry Smith extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41*86bacbd2SBarry Smith extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42*86bacbd2SBarry Smith EXTERN_C_END 43*86bacbd2SBarry Smith 44*86bacbd2SBarry Smith #undef __FUNC__ 45*86bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46*86bacbd2SBarry Smith /* ------------------------------------------------------------ 47*86bacbd2SBarry Smith 48*86bacbd2SBarry Smith This interface was contribed by Tony Caola 49*86bacbd2SBarry Smith 50*86bacbd2SBarry Smith This routine is an interface to the pivoting drop-tolerance 51*86bacbd2SBarry Smith ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52*86bacbd2SBarry Smith was inspired by the non-pivoting iludt written by David 53*86bacbd2SBarry Smith Hysom (hysom@cs.odu.edu). 54*86bacbd2SBarry Smith 55*86bacbd2SBarry Smith Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56*86bacbd2SBarry Smith help in getting this routine ironed out. 57*86bacbd2SBarry Smith 58*86bacbd2SBarry Smith As of right now, there are a couple of things that could 59*86bacbd2SBarry Smith be, uh, better. 60*86bacbd2SBarry Smith 61*86bacbd2SBarry Smith 1 - Since Saad's routine is Fortran based, memory cannot be 62*86bacbd2SBarry Smith malloc'd. I was trying to get the expected fill from the 63*86bacbd2SBarry Smith preconditioner and use this number as the multiplier in 64*86bacbd2SBarry Smith the equation for jmax, below, but couldn't figure it out. 65*86bacbd2SBarry Smith Anyway, perhaps a better solution is to run SPARSKIT through 66*86bacbd2SBarry Smith f2c and incorporate mallocs(), but I want to graduate so I'll 67*86bacbd2SBarry Smith just rebuild Petsc. . . 68*86bacbd2SBarry Smith 69*86bacbd2SBarry Smith shift = 1, ishift = 0, for indices start at 1 70*86bacbd2SBarry Smith shift = 0, ishift = 1, for indices starting at 0 71*86bacbd2SBarry Smith ------------------------------------------------------------ 72*86bacbd2SBarry Smith */ 73*86bacbd2SBarry Smith 74*86bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75*86bacbd2SBarry Smith { 76*86bacbd2SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77*86bacbd2SBarry Smith IS iscolf, isrowf, isicol, isirow; 78*86bacbd2SBarry Smith PetscTruth reorder; 79*86bacbd2SBarry Smith int *c,*r,*ic,*ir, ierr, i, n = a->m; 80*86bacbd2SBarry Smith int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81*86bacbd2SBarry Smith int *ordcol, *ordrow,*iwk,*iperm, *jw; 82*86bacbd2SBarry Smith int ishift = !a->indexshift,shift = -a->indexshift; 83*86bacbd2SBarry Smith int jmax,lfill,job; 84*86bacbd2SBarry Smith Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0; 85*86bacbd2SBarry Smith 86*86bacbd2SBarry Smith PetscFunctionBegin; 87*86bacbd2SBarry Smith 88*86bacbd2SBarry Smith if (info->dt == PETSC_DEFAULT) info->dt = .005; 89*86bacbd2SBarry Smith if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90*86bacbd2SBarry Smith if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91*86bacbd2SBarry Smith if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92*86bacbd2SBarry Smith lfill = info->dtcount/2; 93*86bacbd2SBarry Smith jmax = info->fill*a->nz; 94*86bacbd2SBarry Smith permtol = info->dtcol; 95*86bacbd2SBarry Smith 96*86bacbd2SBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 97*86bacbd2SBarry Smith ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 98*86bacbd2SBarry Smith 99*86bacbd2SBarry Smith 100*86bacbd2SBarry Smith /* ------------------------------------------------------------ 101*86bacbd2SBarry Smith If reorder=.TRUE., then the original matrix has to be 102*86bacbd2SBarry Smith reordered to reflect the user selected ordering scheme, and 103*86bacbd2SBarry Smith then de-reordered so it is in it's original format. 104*86bacbd2SBarry Smith Because Saad's dperm() is NOT in place, we have to copy 105*86bacbd2SBarry Smith the original matrix and allocate more storage. . . 106*86bacbd2SBarry Smith ------------------------------------------------------------ 107*86bacbd2SBarry Smith */ 108*86bacbd2SBarry Smith 109*86bacbd2SBarry Smith /* set reorder to true if either isrow or iscol is not identity */ 110*86bacbd2SBarry Smith ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 111*86bacbd2SBarry Smith if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 112*86bacbd2SBarry Smith reorder = PetscNot(reorder); 113*86bacbd2SBarry Smith 114*86bacbd2SBarry Smith ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 115*86bacbd2SBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 116*86bacbd2SBarry Smith ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 117*86bacbd2SBarry Smith ierr = ISGetIndices(isirow,&ir); CHKERRQ(ierr); 118*86bacbd2SBarry Smith 119*86bacbd2SBarry Smith /* storage for ilu factor */ 120*86bacbd2SBarry Smith new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 121*86bacbd2SBarry Smith new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 122*86bacbd2SBarry Smith new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 123*86bacbd2SBarry Smith 124*86bacbd2SBarry Smith if (reorder) { 125*86bacbd2SBarry Smith old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 126*86bacbd2SBarry Smith old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 127*86bacbd2SBarry Smith old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 128*86bacbd2SBarry Smith } 129*86bacbd2SBarry Smith 130*86bacbd2SBarry Smith ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 131*86bacbd2SBarry Smith ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow); 132*86bacbd2SBarry Smith 133*86bacbd2SBarry Smith /* ------------------------------------------------------------ 134*86bacbd2SBarry Smith Make sure that everything is Fortran formatted (1-Based) 135*86bacbd2SBarry Smith ------------------------------------------------------------ 136*86bacbd2SBarry Smith */ 137*86bacbd2SBarry Smith for (i=old_i[0]-shift;i<old_i[n]-shift;i++) { 138*86bacbd2SBarry Smith old_j[i] = old_j[i]+ishift; 139*86bacbd2SBarry Smith if (reorder) { 140*86bacbd2SBarry Smith old_j2[i] = old_j[i]; 141*86bacbd2SBarry Smith old_a2[i] = old_a[i]; 142*86bacbd2SBarry Smith } 143*86bacbd2SBarry Smith }; 144*86bacbd2SBarry Smith 145*86bacbd2SBarry Smith for(i=0;i<n+1;i++) { 146*86bacbd2SBarry Smith old_i[i] = old_i[i]+ishift; 147*86bacbd2SBarry Smith if (reorder) old_i2[i]=old_i[i]; 148*86bacbd2SBarry Smith }; 149*86bacbd2SBarry Smith 150*86bacbd2SBarry Smith for(i=0;i<n;i++) { 151*86bacbd2SBarry Smith r[i] = r[i]+1; 152*86bacbd2SBarry Smith c[i] = c[i]+1; 153*86bacbd2SBarry Smith ir[i] = ir[i]+1; 154*86bacbd2SBarry Smith ic[i] = ic[i]+1; 155*86bacbd2SBarry Smith } 156*86bacbd2SBarry Smith 157*86bacbd2SBarry Smith if (reorder) { 158*86bacbd2SBarry Smith job = 3; 159*86bacbd2SBarry Smith dperm_(&n,old_a2,old_j2,old_i2,old_a,old_j,old_i,r,c,&job); 160*86bacbd2SBarry Smith } 161*86bacbd2SBarry Smith 162*86bacbd2SBarry Smith /* ------------------------------------------------------------ 163*86bacbd2SBarry Smith Call Saad's ilutp() routine to generate the factorization 164*86bacbd2SBarry Smith ------------------------------------------------------------ 165*86bacbd2SBarry Smith */ 166*86bacbd2SBarry Smith 167*86bacbd2SBarry Smith iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 168*86bacbd2SBarry Smith jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 169*86bacbd2SBarry Smith w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 170*86bacbd2SBarry Smith 171*86bacbd2SBarry Smith 172*86bacbd2SBarry Smith ilutp_(&n,old_a,old_j,old_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 173*86bacbd2SBarry Smith if (ierr) { 174*86bacbd2SBarry Smith switch (ierr) { 175*86bacbd2SBarry Smith case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 176*86bacbd2SBarry Smith break; 177*86bacbd2SBarry Smith case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 178*86bacbd2SBarry Smith break; 179*86bacbd2SBarry Smith case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 180*86bacbd2SBarry Smith break; 181*86bacbd2SBarry Smith case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 182*86bacbd2SBarry Smith break; 183*86bacbd2SBarry Smith case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 184*86bacbd2SBarry Smith break; 185*86bacbd2SBarry Smith default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 186*86bacbd2SBarry Smith } 187*86bacbd2SBarry Smith } 188*86bacbd2SBarry Smith 189*86bacbd2SBarry Smith ierr = PetscFree(w);CHKERRQ(ierr); 190*86bacbd2SBarry Smith ierr = PetscFree(jw);CHKERRQ(ierr); 191*86bacbd2SBarry Smith 192*86bacbd2SBarry Smith 193*86bacbd2SBarry Smith /* ------------------------------------------------------------ 194*86bacbd2SBarry Smith Saad's routine gives the result in Modified Sparse Row (msr) 195*86bacbd2SBarry Smith Convert to Compressed Sparse Row format (csr) 196*86bacbd2SBarry Smith ------------------------------------------------------------ 197*86bacbd2SBarry Smith */ 198*86bacbd2SBarry Smith 199*86bacbd2SBarry Smith wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 200*86bacbd2SBarry Smith iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 201*86bacbd2SBarry Smith 202*86bacbd2SBarry Smith msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 203*86bacbd2SBarry Smith 204*86bacbd2SBarry Smith ierr = PetscFree(iwk);CHKERRQ(ierr); 205*86bacbd2SBarry Smith ierr = PetscFree(wk);CHKERRQ(ierr); 206*86bacbd2SBarry Smith 207*86bacbd2SBarry Smith for(i=old_i[0]; i<=old_i[n]; i++) { 208*86bacbd2SBarry Smith old_j[i-1] = iperm[old_j[i-1]-1]; 209*86bacbd2SBarry Smith if (reorder) { 210*86bacbd2SBarry Smith old_j2[i-1] = old_j[i-1]; 211*86bacbd2SBarry Smith old_a2[i-1] = old_a[i-1]; 212*86bacbd2SBarry Smith } 213*86bacbd2SBarry Smith }; 214*86bacbd2SBarry Smith 215*86bacbd2SBarry Smith if (reorder) { 216*86bacbd2SBarry Smith for(i=0; i<n+1; i++) { 217*86bacbd2SBarry Smith old_i2[i]=old_i[i]; 218*86bacbd2SBarry Smith }; 219*86bacbd2SBarry Smith 220*86bacbd2SBarry Smith job = 3; 221*86bacbd2SBarry Smith dperm_(&n,old_a2,old_j2,old_i2,old_a,old_j,old_i,ir,ic,&job); 222*86bacbd2SBarry Smith ierr = PetscFree(old_a2);CHKERRQ(ierr); 223*86bacbd2SBarry Smith ierr = PetscFree(old_j2);CHKERRQ(ierr); 224*86bacbd2SBarry Smith ierr = PetscFree(old_i2);CHKERRQ(ierr); 225*86bacbd2SBarry Smith } 226*86bacbd2SBarry Smith 227*86bacbd2SBarry Smith /* get rid of the shift to incies statrting at 1 */ 228*86bacbd2SBarry Smith for (i=0; i<n+1; i++) { 229*86bacbd2SBarry Smith old_i[i] = old_i[i]-ishift; 230*86bacbd2SBarry Smith } 231*86bacbd2SBarry Smith 232*86bacbd2SBarry Smith for (i=old_i[0]-shift;i<=old_i[n]-shift;i++) { 233*86bacbd2SBarry Smith old_j[i-1] = old_j[i-1]-ishift; 234*86bacbd2SBarry Smith } 235*86bacbd2SBarry Smith 236*86bacbd2SBarry Smith /* Make the return matrix 0-based */ 237*86bacbd2SBarry Smith 238*86bacbd2SBarry Smith for (i=new_i[0]-shift;i<=new_i[n]-shift;i++) { 239*86bacbd2SBarry Smith new_j[i-1] = new_j[i-1]-ishift; 240*86bacbd2SBarry Smith } 241*86bacbd2SBarry Smith 242*86bacbd2SBarry Smith for (i=0; i<n+1; i++) { 243*86bacbd2SBarry Smith new_i[i] = new_i[i]-ishift; 244*86bacbd2SBarry Smith } 245*86bacbd2SBarry Smith 246*86bacbd2SBarry Smith for (i=0;i<n;i++) { 247*86bacbd2SBarry Smith r[i] = r[i]-1; 248*86bacbd2SBarry Smith c[i] = c[i]-1; 249*86bacbd2SBarry Smith ir[i] = ir[i]-1; 250*86bacbd2SBarry Smith ic[i] = ic[i]-1; 251*86bacbd2SBarry Smith } 252*86bacbd2SBarry Smith 253*86bacbd2SBarry Smith /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 254*86bacbd2SBarry Smith /*-- permute the right-hand-side and solution vectors --*/ 255*86bacbd2SBarry Smith for(i=0; i<n; i++) { 256*86bacbd2SBarry Smith ordcol[i] = ic[iperm[i]-1]; 257*86bacbd2SBarry Smith ordrow[i] = ir[i]; 258*86bacbd2SBarry Smith }; 259*86bacbd2SBarry Smith 260*86bacbd2SBarry Smith ierr = PetscFree(iperm);CHKERRQ(ierr); 261*86bacbd2SBarry Smith 262*86bacbd2SBarry Smith ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 263*86bacbd2SBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 264*86bacbd2SBarry Smith 265*86bacbd2SBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 266*86bacbd2SBarry Smith ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr); 267*86bacbd2SBarry Smith 268*86bacbd2SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 269*86bacbd2SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf); 270*86bacbd2SBarry Smith 271*86bacbd2SBarry Smith /*----- put together the new matrix -----*/ 272*86bacbd2SBarry Smith 273*86bacbd2SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 274*86bacbd2SBarry Smith (*fact)->factor = FACTOR_LU; 275*86bacbd2SBarry Smith (*fact)->assembled = PETSC_TRUE; 276*86bacbd2SBarry Smith 277*86bacbd2SBarry Smith b = (Mat_SeqAIJ *) (*fact)->data; 278*86bacbd2SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 279*86bacbd2SBarry Smith b->sorted = PETSC_FALSE; 280*86bacbd2SBarry Smith b->singlemalloc = PETSC_TRUE; 281*86bacbd2SBarry Smith /* the next line frees the default space generated by the Create() */ 282*86bacbd2SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 283*86bacbd2SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 284*86bacbd2SBarry Smith b->a = new_a; 285*86bacbd2SBarry Smith b->j = new_j; 286*86bacbd2SBarry Smith b->i = new_i; 287*86bacbd2SBarry Smith b->ilen = 0; 288*86bacbd2SBarry Smith b->imax = 0; 289*86bacbd2SBarry Smith b->row = isrowf; 290*86bacbd2SBarry Smith b->col = iscolf; 291*86bacbd2SBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 292*86bacbd2SBarry Smith b->maxnz = b->nz = new_i[n]; 293*86bacbd2SBarry Smith b->indexshift = a->indexshift; 294*86bacbd2SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 295*86bacbd2SBarry Smith (*fact)->info.factor_mallocs = 0; 296*86bacbd2SBarry Smith 297*86bacbd2SBarry Smith PLogFlops(b->n); 298*86bacbd2SBarry Smith 299*86bacbd2SBarry Smith a->j = old_j; 300*86bacbd2SBarry Smith a->i = old_i; 301*86bacbd2SBarry Smith a->a = old_a; 302*86bacbd2SBarry Smith a->sorted = PETSC_FALSE; 303*86bacbd2SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 304*86bacbd2SBarry Smith 305*86bacbd2SBarry Smith /* check out for identical nodes. If found, use inode functions */ 306*86bacbd2SBarry Smith ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 307*86bacbd2SBarry Smith 308*86bacbd2SBarry Smith PetscFunctionReturn(0); 309*86bacbd2SBarry Smith } 310*86bacbd2SBarry Smith #else 311*86bacbd2SBarry Smith #undef __FUNC__ 312*86bacbd2SBarry Smith #define __FUNC__ "MatILUDTFactor_SeqAIJ" 313*86bacbd2SBarry Smith int MatILUDTFactor_SeqAIJ(Mat A,double dt,int maxnz,IS isrow,IS iscol,Mat *fact) 314*86bacbd2SBarry Smith { 315*86bacbd2SBarry Smith PetscFunctionBegin; 316*86bacbd2SBarry Smith SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 317*86bacbd2SBarry Smith } 318*86bacbd2SBarry Smith #endif 319*86bacbd2SBarry Smith 320289bc588SBarry Smith /* 321289bc588SBarry Smith Factorization code for AIJ format. 322289bc588SBarry Smith */ 3235615d1e5SSatish Balay #undef __FUNC__ 3245615d1e5SSatish Balay #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 325416022c9SBarry Smith int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 326289bc588SBarry Smith { 327416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 328289bc588SBarry Smith IS isicol; 329416022c9SBarry Smith int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 330416022c9SBarry Smith int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 33191bf9adeSBarry Smith int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 332289bc588SBarry Smith 3333a40ed3dSBarry Smith PetscFunctionBegin; 334d3cbd50cSLois Curfman McInnes PetscValidHeaderSpecific(isrow,IS_COOKIE); 335d3cbd50cSLois Curfman McInnes PetscValidHeaderSpecific(iscol,IS_COOKIE); 336f1af5d2fSBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 3373b2fbd54SBarry Smith 33878b31e54SBarry Smith ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 339ac121b3dSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 340ac121b3dSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 341289bc588SBarry Smith 342289bc588SBarry Smith /* get new row pointers */ 3430452661fSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 344dbb450caSBarry Smith ainew[0] = -shift; 345289bc588SBarry Smith /* don't know how many column pointers are needed so estimate */ 346dbb450caSBarry Smith jmax = (int) (f*ai[n]+(!shift)); 3470452661fSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 348289bc588SBarry Smith /* fill is a linked list of nonzeros in active row */ 3490452661fSBarry Smith fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 3502fb3ed76SBarry Smith im = fill + n + 1; 351289bc588SBarry Smith /* idnew is location of diagonal in factor */ 3520452661fSBarry Smith idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 353dbb450caSBarry Smith idnew[0] = -shift; 354289bc588SBarry Smith 355289bc588SBarry Smith for ( i=0; i<n; i++ ) { 356289bc588SBarry Smith /* first copy previous fill into linked list */ 357289bc588SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 3586b96ef82SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 359dbb450caSBarry Smith ajtmp = aj + ai[r[i]] + shift; 360289bc588SBarry Smith fill[n] = n; 361289bc588SBarry Smith while (nz--) { 362289bc588SBarry Smith fm = n; 363dbb450caSBarry Smith idx = ic[*ajtmp++ + shift]; 364289bc588SBarry Smith do { 365289bc588SBarry Smith m = fm; 366289bc588SBarry Smith fm = fill[m]; 367289bc588SBarry Smith } while (fm < idx); 368289bc588SBarry Smith fill[m] = idx; 369289bc588SBarry Smith fill[idx] = fm; 370289bc588SBarry Smith } 371289bc588SBarry Smith row = fill[n]; 372289bc588SBarry Smith while ( row < i ) { 373dbb450caSBarry Smith ajtmp = ajnew + idnew[row] + (!shift); 3742fb3ed76SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 3752fb3ed76SBarry Smith nz = im[row] - nzbd; 376289bc588SBarry Smith fm = row; 3772fb3ed76SBarry Smith while (nz-- > 0) { 378dbb450caSBarry Smith idx = *ajtmp++ + shift; 3792fb3ed76SBarry Smith nzbd++; 3802fb3ed76SBarry Smith if (idx == i) im[row] = nzbd; 381289bc588SBarry Smith do { 382289bc588SBarry Smith m = fm; 383289bc588SBarry Smith fm = fill[m]; 384289bc588SBarry Smith } while (fm < idx); 385289bc588SBarry Smith if (fm != idx) { 386289bc588SBarry Smith fill[m] = idx; 387289bc588SBarry Smith fill[idx] = fm; 388289bc588SBarry Smith fm = idx; 389289bc588SBarry Smith nnz++; 390289bc588SBarry Smith } 391289bc588SBarry Smith } 392289bc588SBarry Smith row = fill[row]; 393289bc588SBarry Smith } 394289bc588SBarry Smith /* copy new filled row into permanent storage */ 395289bc588SBarry Smith ainew[i+1] = ainew[i] + nnz; 396496e697eSBarry Smith if (ainew[i+1] > jmax) { 397ecf371e4SBarry Smith 398ecf371e4SBarry Smith /* estimate how much additional space we will need */ 399ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 400ecf371e4SBarry Smith /* just double the memory each time */ 401ecf371e4SBarry Smith int maxadd = jmax; 402ecf371e4SBarry Smith /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 403bbb6d6a8SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 4042fb3ed76SBarry Smith jmax += maxadd; 405ecf371e4SBarry Smith 406ecf371e4SBarry Smith /* allocate a longer ajnew */ 4070452661fSBarry Smith ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 408549d3d68SSatish Balay ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 409606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 410289bc588SBarry Smith ajnew = ajtmp; 4112fb3ed76SBarry Smith realloc++; /* count how many times we realloc */ 412289bc588SBarry Smith } 413dbb450caSBarry Smith ajtmp = ajnew + ainew[i] + shift; 414289bc588SBarry Smith fm = fill[n]; 415289bc588SBarry Smith nzi = 0; 4162fb3ed76SBarry Smith im[i] = nnz; 417289bc588SBarry Smith while (nnz--) { 418289bc588SBarry Smith if (fm < i) nzi++; 419dbb450caSBarry Smith *ajtmp++ = fm - shift; 420289bc588SBarry Smith fm = fill[fm]; 421289bc588SBarry Smith } 422289bc588SBarry Smith idnew[i] = ainew[i] + nzi; 423289bc588SBarry Smith } 424464e8d44SSatish Balay if (ai[n] != 0) { 425464e8d44SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 426c38d4ed2SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 427981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 428981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 429981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 43005bf559cSSatish Balay } else { 431981c4779SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 43205bf559cSSatish Balay } 4332fb3ed76SBarry Smith 434898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 435898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 4361987afe7SBarry Smith 437606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 438289bc588SBarry Smith 439289bc588SBarry Smith /* put together the new matrix */ 440b4fd4287SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 4411987afe7SBarry Smith PLogObjectParent(*B,isicol); 442416022c9SBarry Smith b = (Mat_SeqAIJ *) (*B)->data; 443606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 4447c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 445e8d4e0b9SBarry Smith /* the next line frees the default space generated by the Create() */ 446606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 447606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 44891bf9adeSBarry Smith b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 449416022c9SBarry Smith b->j = ajnew; 450416022c9SBarry Smith b->i = ainew; 451416022c9SBarry Smith b->diag = idnew; 452416022c9SBarry Smith b->ilen = 0; 453416022c9SBarry Smith b->imax = 0; 454416022c9SBarry Smith b->row = isrow; 455416022c9SBarry Smith b->col = iscol; 456c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 457c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 45882bf6240SBarry Smith b->icol = isicol; 45991bf9adeSBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 460416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 4617fd98bd6SLois Curfman McInnes Allocate idnew, solve_work, new a, new j */ 462416022c9SBarry Smith PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 463416022c9SBarry Smith b->maxnz = b->nz = ainew[n] + shift; 4647fd98bd6SLois Curfman McInnes 465e93ccc4dSBarry Smith (*B)->factor = FACTOR_LU;; 466ae068f58SLois Curfman McInnes (*B)->info.factor_mallocs = realloc; 467ae068f58SLois Curfman McInnes (*B)->info.fill_ratio_given = f; 468703deb49SSatish Balay (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 469703deb49SSatish Balay 470e93ccc4dSBarry Smith if (ai[n] != 0) { 471e93ccc4dSBarry Smith (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 472eea2913aSSatish Balay } else { 473eea2913aSSatish Balay (*B)->info.fill_ratio_needed = 0.0; 474eea2913aSSatish Balay } 4753a40ed3dSBarry Smith PetscFunctionReturn(0); 476289bc588SBarry Smith } 4770a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 478184914b5SBarry Smith extern int Mat_AIJ_CheckInode(Mat); 47941c01911SSatish Balay 4805615d1e5SSatish Balay #undef __FUNC__ 4815615d1e5SSatish Balay #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 482416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 483289bc588SBarry Smith { 484416022c9SBarry Smith Mat C = *B; 485416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 48682bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 487416022c9SBarry Smith int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 4881987afe7SBarry Smith int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 489f2582111SSatish Balay int *diag_offset = b->diag,diag,k; 49035aab85fSBarry Smith int preserve_row_sums = (int) a->ilu_preserve_row_sums; 4913a40ed3dSBarry Smith register int *pj; 4928ecf7165SBarry Smith Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 49335aab85fSBarry Smith double ssum; 49435aab85fSBarry Smith register Scalar *pv, *rtmps,*u_values; 495289bc588SBarry Smith 4963a40ed3dSBarry Smith PetscFunctionBegin; 49782bf6240SBarry Smith 49878b31e54SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 49978b31e54SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 5000452661fSBarry Smith rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 501549d3d68SSatish Balay ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 5020cf60383SBarry Smith rtmps = rtmp + shift; ics = ic + shift; 503289bc588SBarry Smith 5046cf997b0SBarry Smith /* precalculate row sums */ 50535aab85fSBarry Smith if (preserve_row_sums) { 50635aab85fSBarry Smith rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 50735aab85fSBarry Smith for ( i=0; i<n; i++ ) { 50835aab85fSBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 50935aab85fSBarry Smith v = a->a + a->i[r[i]] + shift; 51035aab85fSBarry Smith sum = 0.0; 51135aab85fSBarry Smith for ( j=0; j<nz; j++ ) sum += v[j]; 51235aab85fSBarry Smith rowsums[i] = sum; 51335aab85fSBarry Smith } 51435aab85fSBarry Smith } 51535aab85fSBarry Smith 516289bc588SBarry Smith for ( i=0; i<n; i++ ) { 517289bc588SBarry Smith nz = ai[i+1] - ai[i]; 518dbb450caSBarry Smith ajtmp = aj + ai[i] + shift; 51948e94559SBarry Smith for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 520289bc588SBarry Smith 521289bc588SBarry Smith /* load in initial (unfactored row) */ 522416022c9SBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 523416022c9SBarry Smith ajtmpold = a->j + a->i[r[i]] + shift; 524416022c9SBarry Smith v = a->a + a->i[r[i]] + shift; 5250cf60383SBarry Smith for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 526289bc588SBarry Smith 527dbb450caSBarry Smith row = *ajtmp++ + shift; 528f2582111SSatish Balay while (row < i ) { 5298c37ef55SBarry Smith pc = rtmp + row; 5308c37ef55SBarry Smith if (*pc != 0.0) { 5311987afe7SBarry Smith pv = b->a + diag_offset[row] + shift; 5321987afe7SBarry Smith pj = b->j + diag_offset[row] + (!shift); 53335aab85fSBarry Smith multiplier = *pc / *pv++; 5348c37ef55SBarry Smith *pc = multiplier; 535f2582111SSatish Balay nz = ai[row+1] - diag_offset[row] - 1; 536f2582111SSatish Balay for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 537f2582111SSatish Balay PLogFlops(2*nz); 538289bc588SBarry Smith } 539dbb450caSBarry Smith row = *ajtmp++ + shift; 540289bc588SBarry Smith } 541416022c9SBarry Smith /* finished row so stick it into b->a */ 542416022c9SBarry Smith pv = b->a + ai[i] + shift; 543416022c9SBarry Smith pj = b->j + ai[i] + shift; 5448c37ef55SBarry Smith nz = ai[i+1] - ai[i]; 545416022c9SBarry Smith for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 54635aab85fSBarry Smith diag = diag_offset[i] - ai[i]; 54735aab85fSBarry Smith /* 54835aab85fSBarry Smith Possibly adjust diagonal entry on current row to force 54935aab85fSBarry Smith LU matrix to have same row sum as initial matrix. 55035aab85fSBarry Smith */ 551d708749eSBarry Smith if (pv[diag] == 0.0) { 5526cf997b0SBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 553d708749eSBarry Smith } 55435aab85fSBarry Smith if (preserve_row_sums) { 55535aab85fSBarry Smith pj = b->j + ai[i] + shift; 55635aab85fSBarry Smith sum = rowsums[i]; 55735aab85fSBarry Smith for ( j=0; j<diag; j++ ) { 55835aab85fSBarry Smith u_values = b->a + diag_offset[pj[j]] + shift; 55935aab85fSBarry Smith nz = ai[pj[j]+1] - diag_offset[pj[j]]; 56035aab85fSBarry Smith inner_sum = 0.0; 56135aab85fSBarry Smith for ( k=0; k<nz; k++ ) { 56235aab85fSBarry Smith inner_sum += u_values[k]; 56335aab85fSBarry Smith } 56435aab85fSBarry Smith sum -= pv[j]*inner_sum; 56535aab85fSBarry Smith 56635aab85fSBarry Smith } 56735aab85fSBarry Smith nz = ai[i+1] - diag_offset[i] - 1; 56835aab85fSBarry Smith u_values = b->a + diag_offset[i] + 1 + shift; 56935aab85fSBarry Smith for ( k=0; k<nz; k++ ) { 57035aab85fSBarry Smith sum -= u_values[k]; 57135aab85fSBarry Smith } 57235aab85fSBarry Smith ssum = PetscAbsScalar(sum/pv[diag]); 57335aab85fSBarry Smith if (ssum < 1000. && ssum > .001) pv[diag] = sum; 57435aab85fSBarry Smith } 57535aab85fSBarry Smith /* check pivot entry for current row */ 5768c37ef55SBarry Smith } 5770f11f9aeSSatish Balay 57835aab85fSBarry Smith /* invert diagonal entries for simplier triangular solves */ 57935aab85fSBarry Smith for ( i=0; i<n; i++ ) { 58035aab85fSBarry Smith b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 58135aab85fSBarry Smith } 58235aab85fSBarry Smith 583606d414cSSatish Balay if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 584606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 58578b31e54SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 58678b31e54SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 587416022c9SBarry Smith C->factor = FACTOR_LU; 58841c01911SSatish Balay ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 589c456f294SBarry Smith C->assembled = PETSC_TRUE; 590416022c9SBarry Smith PLogFlops(b->n); 5913a40ed3dSBarry Smith PetscFunctionReturn(0); 592289bc588SBarry Smith } 5930a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 5945615d1e5SSatish Balay #undef __FUNC__ 5955615d1e5SSatish Balay #define __FUNC__ "MatLUFactor_SeqAIJ" 596416022c9SBarry Smith int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 597da3a660dSBarry Smith { 598416022c9SBarry Smith Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 599*86bacbd2SBarry Smith int ierr,refct; 600416022c9SBarry Smith Mat C; 601f830108cSBarry Smith PetscOps *Abops; 6020a6ffc59SBarry Smith MatOps Aops; 603416022c9SBarry Smith 6043a40ed3dSBarry Smith PetscFunctionBegin; 605f2582111SSatish Balay ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 606f2582111SSatish Balay ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 607da3a660dSBarry Smith 608da3a660dSBarry Smith /* free all the data structures from mat */ 609606d414cSSatish Balay ierr = PetscFree(mat->a);CHKERRQ(ierr); 610606d414cSSatish Balay if (!mat->singlemalloc) { 611606d414cSSatish Balay ierr = PetscFree(mat->i);CHKERRQ(ierr); 612606d414cSSatish Balay ierr = PetscFree(mat->j);CHKERRQ(ierr); 613606d414cSSatish Balay } 614606d414cSSatish Balay if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 615606d414cSSatish Balay if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 616606d414cSSatish Balay if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 617606d414cSSatish Balay if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 618606d414cSSatish Balay if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 6190f0aacb9SBarry Smith if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 620606d414cSSatish Balay ierr = PetscFree(mat);CHKERRQ(ierr); 621da3a660dSBarry Smith 62217642b18SBarry Smith ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 62317642b18SBarry Smith ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 62417642b18SBarry Smith 625f830108cSBarry Smith /* 626f830108cSBarry Smith This is horrible, horrible code. We need to keep the 627f830108cSBarry Smith A pointers for the bops and ops but copy everything 628f830108cSBarry Smith else from C. 629f830108cSBarry Smith */ 630f830108cSBarry Smith Abops = A->bops; 631f830108cSBarry Smith Aops = A->ops; 632*86bacbd2SBarry Smith refct = A->refct; 633549d3d68SSatish Balay ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 634451c4af8SSatish Balay mat = (Mat_SeqAIJ *) A->data; 635451c4af8SSatish Balay PLogObjectParent(A,mat->icol); 636451c4af8SSatish Balay 637f830108cSBarry Smith A->bops = Abops; 638f830108cSBarry Smith A->ops = Aops; 639bef8e0ddSBarry Smith A->qlist = 0; 640*86bacbd2SBarry Smith A->refct = refct; 641c38d4ed2SBarry Smith /* copy over the type_name and name */ 642c38d4ed2SBarry Smith ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 643c38d4ed2SBarry Smith ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 644f830108cSBarry Smith 6450452661fSBarry Smith PetscHeaderDestroy(C); 646c38d4ed2SBarry Smith 6473a40ed3dSBarry Smith PetscFunctionReturn(0); 648da3a660dSBarry Smith } 6490a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 6505615d1e5SSatish Balay #undef __FUNC__ 6515615d1e5SSatish Balay #define __FUNC__ "MatSolve_SeqAIJ" 652416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 6538c37ef55SBarry Smith { 654416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 655416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 656416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 6573b2fbd54SBarry Smith int nz,shift = a->indexshift,*rout,*cout; 658416022c9SBarry Smith Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 6598c37ef55SBarry Smith 6603a40ed3dSBarry Smith PetscFunctionBegin; 6613a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 66291bf9adeSBarry Smith 663e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 664e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 665416022c9SBarry Smith tmp = a->solve_work; 6668c37ef55SBarry Smith 6673b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 6683b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 6698c37ef55SBarry Smith 6708c37ef55SBarry Smith /* forward solve the lower triangular */ 6718c37ef55SBarry Smith tmp[0] = b[*r++]; 6720cf60383SBarry Smith tmps = tmp + shift; 6738c37ef55SBarry Smith for ( i=1; i<n; i++ ) { 674dbb450caSBarry Smith v = aa + ai[i] + shift; 675dbb450caSBarry Smith vi = aj + ai[i] + shift; 676416022c9SBarry Smith nz = a->diag[i] - ai[i]; 6778c37ef55SBarry Smith sum = b[*r++]; 6780cf60383SBarry Smith while (nz--) sum -= *v++ * tmps[*vi++]; 6798c37ef55SBarry Smith tmp[i] = sum; 6808c37ef55SBarry Smith } 6818c37ef55SBarry Smith 6828c37ef55SBarry Smith /* backward solve the upper triangular */ 6838c37ef55SBarry Smith for ( i=n-1; i>=0; i-- ){ 684416022c9SBarry Smith v = aa + a->diag[i] + (!shift); 685416022c9SBarry Smith vi = aj + a->diag[i] + (!shift); 686416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 6878c37ef55SBarry Smith sum = tmp[i]; 6880cf60383SBarry Smith while (nz--) sum -= *v++ * tmps[*vi++]; 689416022c9SBarry Smith x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 6908c37ef55SBarry Smith } 6918c37ef55SBarry Smith 6923b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 6933b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 694cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 695cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 696416022c9SBarry Smith PLogFlops(2*a->nz - a->n); 6973a40ed3dSBarry Smith PetscFunctionReturn(0); 6988c37ef55SBarry Smith } 699026e39d0SSatish Balay 700930ae53cSBarry Smith /* ----------------------------------------------------------- */ 701930ae53cSBarry Smith #undef __FUNC__ 702930ae53cSBarry Smith #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 703930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 704930ae53cSBarry Smith { 705930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 706d85afc42SSatish Balay int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 707d85afc42SSatish Balay Scalar *x,*b, *aa = a->a, sum; 708aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 709d85afc42SSatish Balay int adiag_i,i,*vi,nz,ai_i; 710d85afc42SSatish Balay Scalar *v; 711d85afc42SSatish Balay #endif 712930ae53cSBarry Smith 7133a40ed3dSBarry Smith PetscFunctionBegin; 7143a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 715930ae53cSBarry Smith if (a->indexshift) { 7163a40ed3dSBarry Smith ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 7173a40ed3dSBarry Smith PetscFunctionReturn(0); 718930ae53cSBarry Smith } 719930ae53cSBarry Smith 720e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 721e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 722930ae53cSBarry Smith 723aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 7241c4feecaSSatish Balay fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 7251c4feecaSSatish Balay #else 726930ae53cSBarry Smith /* forward solve the lower triangular */ 727930ae53cSBarry Smith x[0] = b[0]; 728930ae53cSBarry Smith for ( i=1; i<n; i++ ) { 729930ae53cSBarry Smith ai_i = ai[i]; 730930ae53cSBarry Smith v = aa + ai_i; 731930ae53cSBarry Smith vi = aj + ai_i; 732930ae53cSBarry Smith nz = adiag[i] - ai_i; 733930ae53cSBarry Smith sum = b[i]; 734930ae53cSBarry Smith while (nz--) sum -= *v++ * x[*vi++]; 735930ae53cSBarry Smith x[i] = sum; 736930ae53cSBarry Smith } 737930ae53cSBarry Smith 738930ae53cSBarry Smith /* backward solve the upper triangular */ 739930ae53cSBarry Smith for ( i=n-1; i>=0; i-- ){ 740930ae53cSBarry Smith adiag_i = adiag[i]; 741d708749eSBarry Smith v = aa + adiag_i + 1; 742d708749eSBarry Smith vi = aj + adiag_i + 1; 743930ae53cSBarry Smith nz = ai[i+1] - adiag_i - 1; 744930ae53cSBarry Smith sum = x[i]; 745930ae53cSBarry Smith while (nz--) sum -= *v++ * x[*vi++]; 746930ae53cSBarry Smith x[i] = sum*aa[adiag_i]; 747930ae53cSBarry Smith } 7481c4feecaSSatish Balay #endif 749930ae53cSBarry Smith PLogFlops(2*a->nz - a->n); 750cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 751cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 7523a40ed3dSBarry Smith PetscFunctionReturn(0); 753930ae53cSBarry Smith } 754930ae53cSBarry Smith 7555615d1e5SSatish Balay #undef __FUNC__ 7565615d1e5SSatish Balay #define __FUNC__ "MatSolveAdd_SeqAIJ" 757416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 758da3a660dSBarry Smith { 759416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 760416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 761416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 7623b2fbd54SBarry Smith int nz, shift = a->indexshift,*rout,*cout; 763416022c9SBarry Smith Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 764da3a660dSBarry Smith 7653a40ed3dSBarry Smith PetscFunctionBegin; 76678b31e54SBarry Smith if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 767da3a660dSBarry Smith 768e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 769e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 770416022c9SBarry Smith tmp = a->solve_work; 771da3a660dSBarry Smith 7723b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 7733b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 774da3a660dSBarry Smith 775da3a660dSBarry Smith /* forward solve the lower triangular */ 776da3a660dSBarry Smith tmp[0] = b[*r++]; 777da3a660dSBarry Smith for ( i=1; i<n; i++ ) { 778dbb450caSBarry Smith v = aa + ai[i] + shift; 779dbb450caSBarry Smith vi = aj + ai[i] + shift; 780416022c9SBarry Smith nz = a->diag[i] - ai[i]; 781da3a660dSBarry Smith sum = b[*r++]; 782dbb450caSBarry Smith while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 783da3a660dSBarry Smith tmp[i] = sum; 784da3a660dSBarry Smith } 785da3a660dSBarry Smith 786da3a660dSBarry Smith /* backward solve the upper triangular */ 787da3a660dSBarry Smith for ( i=n-1; i>=0; i-- ){ 788416022c9SBarry Smith v = aa + a->diag[i] + (!shift); 789416022c9SBarry Smith vi = aj + a->diag[i] + (!shift); 790416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 791da3a660dSBarry Smith sum = tmp[i]; 792dbb450caSBarry Smith while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 793416022c9SBarry Smith tmp[i] = sum*aa[a->diag[i]+shift]; 794da3a660dSBarry Smith x[*c--] += tmp[i]; 795da3a660dSBarry Smith } 796da3a660dSBarry Smith 7973b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 7983b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 799cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 800cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 801416022c9SBarry Smith PLogFlops(2*a->nz); 802898183ecSLois Curfman McInnes 8033a40ed3dSBarry Smith PetscFunctionReturn(0); 804da3a660dSBarry Smith } 805da3a660dSBarry Smith /* -------------------------------------------------------------------*/ 8065615d1e5SSatish Balay #undef __FUNC__ 8077c922b88SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqAIJ" 8087c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 809da3a660dSBarry Smith { 810416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 8112235e667SBarry Smith IS iscol = a->col, isrow = a->row; 812416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 813f1af5d2fSBarry Smith int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 814f1af5d2fSBarry Smith Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 815da3a660dSBarry Smith 8163a40ed3dSBarry Smith PetscFunctionBegin; 817e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 818e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819416022c9SBarry Smith tmp = a->solve_work; 820da3a660dSBarry Smith 8212235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 8222235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 823da3a660dSBarry Smith 824da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 8252235e667SBarry Smith for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 826da3a660dSBarry Smith 827da3a660dSBarry Smith /* forward solve the U^T */ 828da3a660dSBarry Smith for ( i=0; i<n; i++ ) { 829f1af5d2fSBarry Smith v = aa + diag[i] + shift; 830f1af5d2fSBarry Smith vi = aj + diag[i] + (!shift); 831f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 832c38d4ed2SBarry Smith s1 = tmp[i]; 833c38d4ed2SBarry Smith s1 *= *(v++); /* multiply by inverse of diagonal entry */ 834da3a660dSBarry Smith while (nz--) { 835f1af5d2fSBarry Smith tmp[*vi++ + shift] -= (*v++)*s1; 836da3a660dSBarry Smith } 837c38d4ed2SBarry Smith tmp[i] = s1; 838da3a660dSBarry Smith } 839da3a660dSBarry Smith 840da3a660dSBarry Smith /* backward solve the L^T */ 841da3a660dSBarry Smith for ( i=n-1; i>=0; i-- ){ 842f1af5d2fSBarry Smith v = aa + diag[i] - 1 + shift; 843f1af5d2fSBarry Smith vi = aj + diag[i] - 1 + shift; 844f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 845f1af5d2fSBarry Smith s1 = tmp[i]; 846da3a660dSBarry Smith while (nz--) { 847f1af5d2fSBarry Smith tmp[*vi-- + shift] -= (*v--)*s1; 848da3a660dSBarry Smith } 849da3a660dSBarry Smith } 850da3a660dSBarry Smith 851da3a660dSBarry Smith /* copy tmp into x according to permutation */ 852da3a660dSBarry Smith for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 853da3a660dSBarry Smith 8542235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 8552235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 856cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 857cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858da3a660dSBarry Smith 859416022c9SBarry Smith PLogFlops(2*a->nz-a->n); 8603a40ed3dSBarry Smith PetscFunctionReturn(0); 861da3a660dSBarry Smith } 862da3a660dSBarry Smith 8635615d1e5SSatish Balay #undef __FUNC__ 8647c922b88SBarry Smith #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 8657c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 866da3a660dSBarry Smith { 867416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 8682235e667SBarry Smith IS iscol = a->col, isrow = a->row; 869416022c9SBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 870f1af5d2fSBarry Smith int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 871416022c9SBarry Smith Scalar *x,*b,*tmp, *aa = a->a, *v; 8726abc6512SBarry Smith 8733a40ed3dSBarry Smith PetscFunctionBegin; 8746abc6512SBarry Smith if (zz != xx) VecCopy(zz,xx); 8756abc6512SBarry Smith 876e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878416022c9SBarry Smith tmp = a->solve_work; 8796abc6512SBarry Smith 8802235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 8812235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 8826abc6512SBarry Smith 8836abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 8842235e667SBarry Smith for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 8856abc6512SBarry Smith 8866abc6512SBarry Smith /* forward solve the U^T */ 8876abc6512SBarry Smith for ( i=0; i<n; i++ ) { 888f1af5d2fSBarry Smith v = aa + diag[i] + shift; 889f1af5d2fSBarry Smith vi = aj + diag[i] + (!shift); 890f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 8916abc6512SBarry Smith tmp[i] *= *v++; 8926abc6512SBarry Smith while (nz--) { 893dbb450caSBarry Smith tmp[*vi++ + shift] -= (*v++)*tmp[i]; 8946abc6512SBarry Smith } 8956abc6512SBarry Smith } 8966abc6512SBarry Smith 8976abc6512SBarry Smith /* backward solve the L^T */ 8986abc6512SBarry Smith for ( i=n-1; i>=0; i-- ){ 899f1af5d2fSBarry Smith v = aa + diag[i] - 1 + shift; 900f1af5d2fSBarry Smith vi = aj + diag[i] - 1 + shift; 901f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 9026abc6512SBarry Smith while (nz--) { 903dbb450caSBarry Smith tmp[*vi-- + shift] -= (*v--)*tmp[i]; 9046abc6512SBarry Smith } 9056abc6512SBarry Smith } 9066abc6512SBarry Smith 9076abc6512SBarry Smith /* copy tmp into x according to permutation */ 9086abc6512SBarry Smith for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 9096abc6512SBarry Smith 9102235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 9112235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 912cb5b572fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 913cb5b572fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9146abc6512SBarry Smith 915416022c9SBarry Smith PLogFlops(2*a->nz); 9163a40ed3dSBarry Smith PetscFunctionReturn(0); 917da3a660dSBarry Smith } 9189e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 9197c922b88SBarry Smith extern int MatMissingDiagonal_SeqAIJ(Mat); 92008480c60SBarry Smith 9215615d1e5SSatish Balay #undef __FUNC__ 9225615d1e5SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 9236cf997b0SBarry Smith int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 9249e25ed09SBarry Smith { 925416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 9269e25ed09SBarry Smith IS isicol; 927416022c9SBarry Smith int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 92856beaf04SBarry Smith int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 929335d9088SBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 9306cf997b0SBarry Smith int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 93177c4ece6SBarry Smith PetscTruth col_identity, row_identity; 9326cf997b0SBarry Smith double f; 9339e25ed09SBarry Smith 9343a40ed3dSBarry Smith PetscFunctionBegin; 9356cf997b0SBarry Smith if (info) { 9366cf997b0SBarry Smith f = info->fill; 9370cd17293SBarry Smith levels = (int) info->levels; 9380cd17293SBarry Smith diagonal_fill = (int) info->diagonal_fill; 9396cf997b0SBarry Smith } else { 9406cf997b0SBarry Smith f = 1.0; 9416cf997b0SBarry Smith levels = 0; 9426cf997b0SBarry Smith diagonal_fill = 0; 9436cf997b0SBarry Smith } 94482bf6240SBarry Smith ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 94582bf6240SBarry Smith 94608480c60SBarry Smith /* special case that simply copies fill pattern */ 947be0abb6dSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 948be0abb6dSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 949*86bacbd2SBarry Smith if (!levels && row_identity && col_identity) { 9502e8a6d31SBarry Smith ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 95108480c60SBarry Smith (*fact)->factor = FACTOR_LU; 95208480c60SBarry Smith b = (Mat_SeqAIJ *) (*fact)->data; 95308480c60SBarry Smith if (!b->diag) { 9547c922b88SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 95508480c60SBarry Smith } 9567c922b88SBarry Smith ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 95708480c60SBarry Smith b->row = isrow; 95808480c60SBarry Smith b->col = iscol; 95982bf6240SBarry Smith b->icol = isicol; 9600452661fSBarry Smith b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 961f830108cSBarry Smith (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 962c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 963c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 9643a40ed3dSBarry Smith PetscFunctionReturn(0); 96508480c60SBarry Smith } 96608480c60SBarry Smith 967898183ecSLois Curfman McInnes ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 968898183ecSLois Curfman McInnes ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 9699e25ed09SBarry Smith 9709e25ed09SBarry Smith /* get new row pointers */ 9710452661fSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 972dbb450caSBarry Smith ainew[0] = -shift; 9739e25ed09SBarry Smith /* don't know how many column pointers are needed so estimate */ 974dbb450caSBarry Smith jmax = (int) (f*(ai[n]+!shift)); 9750452661fSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 9769e25ed09SBarry Smith /* ajfill is level of fill for each fill entry */ 9770452661fSBarry Smith ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 9789e25ed09SBarry Smith /* fill is a linked list of nonzeros in active row */ 9790452661fSBarry Smith fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 98056beaf04SBarry Smith /* im is level for each filled value */ 9810452661fSBarry Smith im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 98256beaf04SBarry Smith /* dloc is location of diagonal in factor */ 9830452661fSBarry Smith dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 98456beaf04SBarry Smith dloc[0] = 0; 98556beaf04SBarry Smith for ( prow=0; prow<n; prow++ ) { 9866cf997b0SBarry Smith 9876cf997b0SBarry Smith /* copy current row into linked list */ 98856beaf04SBarry Smith nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 989385f7a74SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 990dbb450caSBarry Smith xi = aj + ai[r[prow]] + shift; 9919e25ed09SBarry Smith fill[n] = n; 992435faa5fSBarry Smith fill[prow] = -1; /* marker to indicate if diagonal exists */ 9939e25ed09SBarry Smith while (nz--) { 9949e25ed09SBarry Smith fm = n; 995dbb450caSBarry Smith idx = ic[*xi++ + shift]; 9969e25ed09SBarry Smith do { 9979e25ed09SBarry Smith m = fm; 9989e25ed09SBarry Smith fm = fill[m]; 9999e25ed09SBarry Smith } while (fm < idx); 10009e25ed09SBarry Smith fill[m] = idx; 10019e25ed09SBarry Smith fill[idx] = fm; 100256beaf04SBarry Smith im[idx] = 0; 10039e25ed09SBarry Smith } 10046cf997b0SBarry Smith 10056cf997b0SBarry Smith /* make sure diagonal entry is included */ 1006435faa5fSBarry Smith if (diagonal_fill && fill[prow] == -1) { 10076cf997b0SBarry Smith fm = n; 1008435faa5fSBarry Smith while (fill[fm] < prow) fm = fill[fm]; 1009435faa5fSBarry Smith fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1010435faa5fSBarry Smith fill[fm] = prow; 10116cf997b0SBarry Smith im[prow] = 0; 10126cf997b0SBarry Smith nzf++; 1013335d9088SBarry Smith dcount++; 10146cf997b0SBarry Smith } 10156cf997b0SBarry Smith 101656beaf04SBarry Smith nzi = 0; 10179e25ed09SBarry Smith row = fill[n]; 101856beaf04SBarry Smith while ( row < prow ) { 101956beaf04SBarry Smith incrlev = im[row] + 1; 102056beaf04SBarry Smith nz = dloc[row]; 10216cf997b0SBarry Smith xi = ajnew + ainew[row] + shift + nz + 1; 1022dbb450caSBarry Smith flev = ajfill + ainew[row] + shift + nz + 1; 1023416022c9SBarry Smith nnz = ainew[row+1] - ainew[row] - nz - 1; 102456beaf04SBarry Smith fm = row; 102556beaf04SBarry Smith while (nnz-- > 0) { 1026dbb450caSBarry Smith idx = *xi++ + shift; 102756beaf04SBarry Smith if (*flev + incrlev > levels) { 102856beaf04SBarry Smith flev++; 102956beaf04SBarry Smith continue; 103056beaf04SBarry Smith } 103156beaf04SBarry Smith do { 10329e25ed09SBarry Smith m = fm; 10339e25ed09SBarry Smith fm = fill[m]; 103456beaf04SBarry Smith } while (fm < idx); 10359e25ed09SBarry Smith if (fm != idx) { 103656beaf04SBarry Smith im[idx] = *flev + incrlev; 10379e25ed09SBarry Smith fill[m] = idx; 10389e25ed09SBarry Smith fill[idx] = fm; 10399e25ed09SBarry Smith fm = idx; 104056beaf04SBarry Smith nzf++; 1041ecf371e4SBarry Smith } else { 104256beaf04SBarry Smith if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 10439e25ed09SBarry Smith } 104456beaf04SBarry Smith flev++; 10459e25ed09SBarry Smith } 10469e25ed09SBarry Smith row = fill[row]; 104756beaf04SBarry Smith nzi++; 10489e25ed09SBarry Smith } 10499e25ed09SBarry Smith /* copy new filled row into permanent storage */ 105056beaf04SBarry Smith ainew[prow+1] = ainew[prow] + nzf; 1051d7e8b826SBarry Smith if (ainew[prow+1] > jmax-shift) { 1052ecf371e4SBarry Smith 1053ecf371e4SBarry Smith /* estimate how much additional space we will need */ 1054ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1055ecf371e4SBarry Smith /* just double the memory each time */ 1056ecf371e4SBarry Smith /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1057ecf371e4SBarry Smith int maxadd = jmax; 105856beaf04SBarry Smith if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1059bbb6d6a8SBarry Smith jmax += maxadd; 1060ecf371e4SBarry Smith 1061ecf371e4SBarry Smith /* allocate a longer ajnew and ajfill */ 10620452661fSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1063549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1064606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 106556beaf04SBarry Smith ajnew = xi; 10660452661fSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1067549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1068606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 106956beaf04SBarry Smith ajfill = xi; 1070ecf371e4SBarry Smith realloc++; /* count how many times we realloc */ 10719e25ed09SBarry Smith } 1072dbb450caSBarry Smith xi = ajnew + ainew[prow] + shift; 1073dbb450caSBarry Smith flev = ajfill + ainew[prow] + shift; 107456beaf04SBarry Smith dloc[prow] = nzi; 10759e25ed09SBarry Smith fm = fill[n]; 107656beaf04SBarry Smith while (nzf--) { 1077dbb450caSBarry Smith *xi++ = fm - shift; 107856beaf04SBarry Smith *flev++ = im[fm]; 10799e25ed09SBarry Smith fm = fill[fm]; 10809e25ed09SBarry Smith } 10816cf997b0SBarry Smith /* make sure row has diagonal entry */ 10826cf997b0SBarry Smith if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 10836cf997b0SBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 10846cf997b0SBarry Smith try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 10856cf997b0SBarry Smith } 10869e25ed09SBarry Smith } 1087606d414cSSatish Balay ierr = PetscFree(ajfill); CHKERRQ(ierr); 1088898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1089898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1090606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 1091606d414cSSatish Balay ierr = PetscFree(im);CHKERRQ(ierr); 10929e25ed09SBarry Smith 1093f64065bbSBarry Smith { 1094464e8d44SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 1095c38d4ed2SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1096981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1097981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1098981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1099335d9088SBarry Smith if (diagonal_fill) { 1100335d9088SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1101335d9088SBarry Smith } 1102f64065bbSBarry Smith } 1103bbb6d6a8SBarry Smith 11049e25ed09SBarry Smith /* put together the new matrix */ 1105b4fd4287SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1106fa6007c9SSatish Balay PLogObjectParent(*fact,isicol); 1107416022c9SBarry Smith b = (Mat_SeqAIJ *) (*fact)->data; 1108606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 11097c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 1110dbb450caSBarry Smith len = (ainew[n] + shift)*sizeof(Scalar); 11119e25ed09SBarry Smith /* the next line frees the default space generated by the Create() */ 1112606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 1113606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 11146b96ef82SSatish Balay b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1115416022c9SBarry Smith b->j = ajnew; 1116416022c9SBarry Smith b->i = ainew; 111756beaf04SBarry Smith for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1118416022c9SBarry Smith b->diag = dloc; 1119416022c9SBarry Smith b->ilen = 0; 1120416022c9SBarry Smith b->imax = 0; 1121416022c9SBarry Smith b->row = isrow; 1122416022c9SBarry Smith b->col = iscol; 1123c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1124c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 112582bf6240SBarry Smith b->icol = isicol; 112682bf6240SBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1127416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 112856beaf04SBarry Smith Allocate dloc, solve_work, new a, new j */ 1129dbb450caSBarry Smith PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1130416022c9SBarry Smith b->maxnz = b->nz = ainew[n] + shift; 11319e25ed09SBarry Smith (*fact)->factor = FACTOR_LU; 1132ae068f58SLois Curfman McInnes 1133ae068f58SLois Curfman McInnes (*fact)->info.factor_mallocs = realloc; 1134ae068f58SLois Curfman McInnes (*fact)->info.fill_ratio_given = f; 1135ae068f58SLois Curfman McInnes (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1136e93ccc4dSBarry Smith (*fact)->factor = FACTOR_LU;; 1137ae068f58SLois Curfman McInnes 11383a40ed3dSBarry Smith PetscFunctionReturn(0); 11399e25ed09SBarry Smith } 1140d5d45c9bSBarry Smith 1141d5d45c9bSBarry Smith 1142d5d45c9bSBarry Smith 1143d5d45c9bSBarry Smith 1144