173f4d377SMatthew Knepley /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2289bc588SBarry Smith 370f55243SBarry Smith #include "src/mat/impls/aij/seq/aij.h" 41c4feecaSSatish Balay #include "src/inline/dot.h" 5ed480e8bSBarry Smith #include "src/inline/spops.h" 63b2fbd54SBarry Smith 74a2ae208SSatish Balay #undef __FUNCT__ 84a2ae208SSatish Balay #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 9234db7c0SBarry Smith int MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 103b2fbd54SBarry Smith { 113a40ed3dSBarry Smith PetscFunctionBegin; 123a40ed3dSBarry Smith 1329bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Code not written"); 14aa482453SBarry Smith #if !defined(PETSC_USE_DEBUG) 15d64ed03dSBarry Smith PetscFunctionReturn(0); 16d64ed03dSBarry Smith #endif 173b2fbd54SBarry Smith } 183b2fbd54SBarry Smith 1986bacbd2SBarry Smith 20ca44d042SBarry Smith EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 213a7fca6bSBarry Smith EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 2286bacbd2SBarry Smith 2387828ca2SBarry Smith EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 2487828ca2SBarry Smith EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 2587828ca2SBarry Smith EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 2686bacbd2SBarry Smith 274a2ae208SSatish Balay #undef __FUNCT__ 284a2ae208SSatish Balay #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 2986bacbd2SBarry Smith /* ------------------------------------------------------------ 3086bacbd2SBarry Smith 3186bacbd2SBarry Smith This interface was contribed by Tony Caola 3286bacbd2SBarry Smith 3386bacbd2SBarry Smith This routine is an interface to the pivoting drop-tolerance 345bd2ddc7SBarry Smith ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 355bd2ddc7SBarry Smith SPARSEKIT2. 365bd2ddc7SBarry Smith 375bd2ddc7SBarry Smith The SPARSEKIT2 routines used here are covered by the GNU 385bd2ddc7SBarry Smith copyright; see the file gnu in this directory. 3986bacbd2SBarry Smith 4086bacbd2SBarry Smith Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 4186bacbd2SBarry Smith help in getting this routine ironed out. 4286bacbd2SBarry Smith 435bd2ddc7SBarry Smith The major drawback to this routine is that if info->fill is 445bd2ddc7SBarry Smith not large enough it fails rather than allocating more space; 455bd2ddc7SBarry Smith this can be fixed by hacking/improving the f2c version of 465bd2ddc7SBarry Smith Yousef Saad's code. 4786bacbd2SBarry Smith 4886bacbd2SBarry Smith ------------------------------------------------------------ 4986bacbd2SBarry Smith */ 50b380c88cSHong Zhang int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 5186bacbd2SBarry Smith { 5260ec86bdSBarry Smith #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 5360ec86bdSBarry Smith PetscFunctionBegin; 5460ec86bdSBarry Smith SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 5560ec86bdSBarry Smith You can obtain the drop tolerance routines by installing PETSc from\n\ 5660ec86bdSBarry Smith www.mcs.anl.gov/petsc\n"); 5760ec86bdSBarry Smith #else 5886bacbd2SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 5907d2056aSBarry Smith IS iscolf,isicol,isirow; 6086bacbd2SBarry Smith PetscTruth reorder; 61273d9f13SBarry Smith int *c,*r,*ic,ierr,i,n = A->m; 62329f5518SBarry Smith int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63313ae024SBarry Smith int *ordcol,*iwk,*iperm,*jw; 64b19713cbSBarry Smith int jmax,lfill,job,*o_i,*o_j; 6587828ca2SBarry Smith PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66329f5518SBarry Smith PetscReal permtol,af; 6786bacbd2SBarry Smith 6886bacbd2SBarry Smith PetscFunctionBegin; 6986bacbd2SBarry Smith 7086bacbd2SBarry Smith if (info->dt == PETSC_DEFAULT) info->dt = .005; 7186bacbd2SBarry Smith if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 7286bacbd2SBarry Smith if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 7333259db3SBarry Smith if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 746faa4630SBarry Smith lfill = (int)(info->dtcount/2.0); 756faa4630SBarry Smith jmax = (int)(info->fill*a->nz); 7686bacbd2SBarry Smith permtol = info->dtcol; 7786bacbd2SBarry Smith 7886bacbd2SBarry Smith 7986bacbd2SBarry Smith /* ------------------------------------------------------------ 8086bacbd2SBarry Smith If reorder=.TRUE., then the original matrix has to be 8186bacbd2SBarry Smith reordered to reflect the user selected ordering scheme, and 8286bacbd2SBarry Smith then de-reordered so it is in it's original format. 8386bacbd2SBarry Smith Because Saad's dperm() is NOT in place, we have to copy 8486bacbd2SBarry Smith the original matrix and allocate more storage. . . 8586bacbd2SBarry Smith ------------------------------------------------------------ 8686bacbd2SBarry Smith */ 8786bacbd2SBarry Smith 8886bacbd2SBarry Smith /* set reorder to true if either isrow or iscol is not identity */ 8986bacbd2SBarry Smith ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 9086bacbd2SBarry Smith if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 9186bacbd2SBarry Smith reorder = PetscNot(reorder); 9286bacbd2SBarry Smith 9386bacbd2SBarry Smith 9486bacbd2SBarry Smith /* storage for ilu factor */ 95b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 96b0a32e0cSBarry Smith ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 9787828ca2SBarry Smith ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98b0a32e0cSBarry Smith ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 9986bacbd2SBarry Smith 10086bacbd2SBarry Smith /* ------------------------------------------------------------ 10186bacbd2SBarry Smith Make sure that everything is Fortran formatted (1-Based) 10286bacbd2SBarry Smith ------------------------------------------------------------ 10386bacbd2SBarry Smith */ 104b19713cbSBarry Smith for (i=old_i[0];i<old_i[n];i++) { 105b19713cbSBarry Smith old_j[i]++; 10686bacbd2SBarry Smith } 107b19713cbSBarry Smith for(i=0;i<n+1;i++) { 108b19713cbSBarry Smith old_i[i]++; 109b19713cbSBarry Smith }; 110010a20caSHong Zhang 11186bacbd2SBarry Smith 11286bacbd2SBarry Smith if (reorder) { 113c0c2c81eSBarry Smith ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114c0c2c81eSBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115c0c2c81eSBarry Smith for(i=0;i<n;i++) { 116c0c2c81eSBarry Smith r[i] = r[i]+1; 117c0c2c81eSBarry Smith c[i] = c[i]+1; 118c0c2c81eSBarry Smith } 119b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 120b0a32e0cSBarry Smith ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 12187828ca2SBarry Smith ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 1225bd2ddc7SBarry Smith job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123c0c2c81eSBarry Smith for (i=0;i<n;i++) { 124c0c2c81eSBarry Smith r[i] = r[i]-1; 125c0c2c81eSBarry Smith c[i] = c[i]-1; 126c0c2c81eSBarry Smith } 127c0c2c81eSBarry Smith ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128c0c2c81eSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129b19713cbSBarry Smith o_a = old_a2; 130b19713cbSBarry Smith o_j = old_j2; 131b19713cbSBarry Smith o_i = old_i2; 132b19713cbSBarry Smith } else { 133b19713cbSBarry Smith o_a = old_a; 134b19713cbSBarry Smith o_j = old_j; 135b19713cbSBarry Smith o_i = old_i; 13686bacbd2SBarry Smith } 13786bacbd2SBarry Smith 13886bacbd2SBarry Smith /* ------------------------------------------------------------ 13986bacbd2SBarry Smith Call Saad's ilutp() routine to generate the factorization 14086bacbd2SBarry Smith ------------------------------------------------------------ 14186bacbd2SBarry Smith */ 14286bacbd2SBarry Smith 143b0a32e0cSBarry Smith ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 144b0a32e0cSBarry Smith ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 14587828ca2SBarry Smith ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 14686bacbd2SBarry Smith 14787828ca2SBarry Smith SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 14886bacbd2SBarry Smith if (ierr) { 14986bacbd2SBarry Smith switch (ierr) { 15029bbc08cSBarry Smith case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 15129bbc08cSBarry Smith case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 15229bbc08cSBarry Smith case -5: SETERRQ(1,"ilutp(), zero row encountered"); 15329bbc08cSBarry Smith case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 15429bbc08cSBarry Smith case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 15529bbc08cSBarry Smith default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 15686bacbd2SBarry Smith } 15786bacbd2SBarry Smith } 15886bacbd2SBarry Smith 15986bacbd2SBarry Smith ierr = PetscFree(w);CHKERRQ(ierr); 16086bacbd2SBarry Smith ierr = PetscFree(jw);CHKERRQ(ierr); 16186bacbd2SBarry Smith 16286bacbd2SBarry Smith /* ------------------------------------------------------------ 16386bacbd2SBarry Smith Saad's routine gives the result in Modified Sparse Row (msr) 16486bacbd2SBarry Smith Convert to Compressed Sparse Row format (csr) 16586bacbd2SBarry Smith ------------------------------------------------------------ 16686bacbd2SBarry Smith */ 16786bacbd2SBarry Smith 16887828ca2SBarry Smith ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 17086bacbd2SBarry Smith 1715bd2ddc7SBarry Smith SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 17286bacbd2SBarry Smith 17386bacbd2SBarry Smith ierr = PetscFree(iwk);CHKERRQ(ierr); 17486bacbd2SBarry Smith ierr = PetscFree(wk);CHKERRQ(ierr); 17586bacbd2SBarry Smith 17686bacbd2SBarry Smith if (reorder) { 17786bacbd2SBarry Smith ierr = PetscFree(old_a2);CHKERRQ(ierr); 17886bacbd2SBarry Smith ierr = PetscFree(old_j2);CHKERRQ(ierr); 17986bacbd2SBarry Smith ierr = PetscFree(old_i2);CHKERRQ(ierr); 180313ae024SBarry Smith } else { 181b19713cbSBarry Smith /* fix permutation of old_j that the factorization introduced */ 182f170005cSBarry Smith for (i=old_i[0]; i<old_i[n]; i++) { 183b19713cbSBarry Smith old_j[i-1] = iperm[old_j[i-1]-1]; 184b19713cbSBarry Smith } 185b19713cbSBarry Smith } 186b19713cbSBarry Smith 187b801d0f9SBarry Smith /* get rid of the shift to indices starting at 1 */ 18886bacbd2SBarry Smith for (i=0; i<n+1; i++) { 189b19713cbSBarry Smith old_i[i]--; 190b19713cbSBarry Smith } 191b19713cbSBarry Smith for (i=old_i[0];i<old_i[n];i++) { 192b19713cbSBarry Smith old_j[i]--; 193b19713cbSBarry Smith } 19486bacbd2SBarry Smith 195b19713cbSBarry Smith /* Make the factored matrix 0-based */ 19686bacbd2SBarry Smith for (i=0; i<n+1; i++) { 197b19713cbSBarry Smith new_i[i]--; 198b19713cbSBarry Smith } 199b19713cbSBarry Smith for (i=new_i[0];i<new_i[n];i++) { 200b19713cbSBarry Smith new_j[i]--; 201b19713cbSBarry Smith } 20286bacbd2SBarry Smith 20386bacbd2SBarry Smith /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 20486bacbd2SBarry Smith /*-- permute the right-hand-side and solution vectors --*/ 2054c49b128SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2064c49b128SBarry Smith ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207c0c2c81eSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 20886bacbd2SBarry Smith for(i=0; i<n; i++) { 20986bacbd2SBarry Smith ordcol[i] = ic[iperm[i]-1]; 21086bacbd2SBarry Smith }; 21186bacbd2SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 21207d2056aSBarry Smith ierr = ISDestroy(isicol);CHKERRQ(ierr); 21386bacbd2SBarry Smith 214c0c2c81eSBarry Smith ierr = PetscFree(iperm);CHKERRQ(ierr); 215c0c2c81eSBarry Smith 216329f5518SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 21707d2056aSBarry Smith ierr = PetscFree(ordcol);CHKERRQ(ierr); 21886bacbd2SBarry Smith 21986bacbd2SBarry Smith /*----- put together the new matrix -----*/ 22086bacbd2SBarry Smith 22186bacbd2SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 22286bacbd2SBarry Smith (*fact)->factor = FACTOR_LU; 22386bacbd2SBarry Smith (*fact)->assembled = PETSC_TRUE; 22486bacbd2SBarry Smith 22586bacbd2SBarry Smith b = (Mat_SeqAIJ*)(*fact)->data; 22686bacbd2SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 22786bacbd2SBarry Smith b->sorted = PETSC_FALSE; 22807d2056aSBarry Smith b->singlemalloc = PETSC_FALSE; 229b19713cbSBarry Smith /* the next line frees the default space generated by the MatCreate() */ 23086bacbd2SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 23186bacbd2SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 23286bacbd2SBarry Smith b->a = new_a; 23386bacbd2SBarry Smith b->j = new_j; 23486bacbd2SBarry Smith b->i = new_i; 23586bacbd2SBarry Smith b->ilen = 0; 23686bacbd2SBarry Smith b->imax = 0; 2371f9e874aSBarry Smith /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238313ae024SBarry Smith b->row = isirow; 23986bacbd2SBarry Smith b->col = iscolf; 24087828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 24186bacbd2SBarry Smith b->maxnz = b->nz = new_i[n]; 24286bacbd2SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 24386bacbd2SBarry Smith (*fact)->info.factor_mallocs = 0; 24486bacbd2SBarry Smith 24586bacbd2SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 24686bacbd2SBarry Smith 24786bacbd2SBarry Smith /* check out for identical nodes. If found, use inode functions */ 2483a7fca6bSBarry Smith ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 24986bacbd2SBarry Smith 250431e710aSBarry Smith af = ((double)b->nz)/((double)a->nz) + .001; 251b0a32e0cSBarry Smith PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 252b0a32e0cSBarry Smith PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 253b0a32e0cSBarry Smith PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 254b0a32e0cSBarry Smith PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 255431e710aSBarry Smith 25686bacbd2SBarry Smith PetscFunctionReturn(0); 25760ec86bdSBarry Smith #endif 25886bacbd2SBarry Smith } 25986bacbd2SBarry Smith 260289bc588SBarry Smith /* 261289bc588SBarry Smith Factorization code for AIJ format. 262289bc588SBarry Smith */ 2634a2ae208SSatish Balay #undef __FUNCT__ 264b9617806SBarry Smith #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 265b380c88cSHong Zhang int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 266289bc588SBarry Smith { 267416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 268289bc588SBarry Smith IS isicol; 269273d9f13SBarry Smith int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 270010a20caSHong Zhang int *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 27191bf9adeSBarry Smith int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 2729e046878SBarry Smith PetscReal f; 273289bc588SBarry Smith 2743a40ed3dSBarry Smith PetscFunctionBegin; 27529bbc08cSBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 2764c49b128SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277ac121b3dSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278ac121b3dSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279289bc588SBarry Smith 280289bc588SBarry Smith /* get new row pointers */ 281b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 282010a20caSHong Zhang ainew[0] = 0; 283289bc588SBarry Smith /* don't know how many column pointers are needed so estimate */ 284d3d32019SBarry Smith f = info->fill; 285010a20caSHong Zhang jmax = (int)(f*ai[n]+1); 286b0a32e0cSBarry Smith ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 287289bc588SBarry Smith /* fill is a linked list of nonzeros in active row */ 288b0a32e0cSBarry Smith ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 2892fb3ed76SBarry Smith im = fill + n + 1; 290289bc588SBarry Smith /* idnew is location of diagonal in factor */ 291b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 292010a20caSHong Zhang idnew[0] = 0; 293289bc588SBarry Smith 294289bc588SBarry Smith for (i=0; i<n; i++) { 295289bc588SBarry Smith /* first copy previous fill into linked list */ 296289bc588SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 29729bbc08cSBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 298010a20caSHong Zhang ajtmp = aj + ai[r[i]]; 299289bc588SBarry Smith fill[n] = n; 300289bc588SBarry Smith while (nz--) { 301289bc588SBarry Smith fm = n; 302010a20caSHong Zhang idx = ic[*ajtmp++]; 303289bc588SBarry Smith do { 304289bc588SBarry Smith m = fm; 305289bc588SBarry Smith fm = fill[m]; 306289bc588SBarry Smith } while (fm < idx); 307289bc588SBarry Smith fill[m] = idx; 308289bc588SBarry Smith fill[idx] = fm; 309289bc588SBarry Smith } 310289bc588SBarry Smith row = fill[n]; 311289bc588SBarry Smith while (row < i) { 312010a20caSHong Zhang ajtmp = ajnew + idnew[row] + 1; 3132fb3ed76SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 3142fb3ed76SBarry Smith nz = im[row] - nzbd; 315289bc588SBarry Smith fm = row; 3162fb3ed76SBarry Smith while (nz-- > 0) { 317010a20caSHong Zhang idx = *ajtmp++ ; 3182fb3ed76SBarry Smith nzbd++; 3192fb3ed76SBarry Smith if (idx == i) im[row] = nzbd; 320289bc588SBarry Smith do { 321289bc588SBarry Smith m = fm; 322289bc588SBarry Smith fm = fill[m]; 323289bc588SBarry Smith } while (fm < idx); 324289bc588SBarry Smith if (fm != idx) { 325289bc588SBarry Smith fill[m] = idx; 326289bc588SBarry Smith fill[idx] = fm; 327289bc588SBarry Smith fm = idx; 328289bc588SBarry Smith nnz++; 329289bc588SBarry Smith } 330289bc588SBarry Smith } 331289bc588SBarry Smith row = fill[row]; 332289bc588SBarry Smith } 333289bc588SBarry Smith /* copy new filled row into permanent storage */ 334289bc588SBarry Smith ainew[i+1] = ainew[i] + nnz; 335496e697eSBarry Smith if (ainew[i+1] > jmax) { 336ecf371e4SBarry Smith 337ecf371e4SBarry Smith /* estimate how much additional space we will need */ 338ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 339ecf371e4SBarry Smith /* just double the memory each time */ 340ecf371e4SBarry Smith int maxadd = jmax; 341ecf371e4SBarry Smith /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 342bbb6d6a8SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 3432fb3ed76SBarry Smith jmax += maxadd; 344ecf371e4SBarry Smith 345ecf371e4SBarry Smith /* allocate a longer ajnew */ 346b0a32e0cSBarry Smith ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 347010a20caSHong Zhang ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr); 348606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 349289bc588SBarry Smith ajnew = ajtmp; 3502fb3ed76SBarry Smith realloc++; /* count how many times we realloc */ 351289bc588SBarry Smith } 352010a20caSHong Zhang ajtmp = ajnew + ainew[i]; 353289bc588SBarry Smith fm = fill[n]; 354289bc588SBarry Smith nzi = 0; 3552fb3ed76SBarry Smith im[i] = nnz; 356289bc588SBarry Smith while (nnz--) { 357289bc588SBarry Smith if (fm < i) nzi++; 358010a20caSHong Zhang *ajtmp++ = fm ; 359289bc588SBarry Smith fm = fill[fm]; 360289bc588SBarry Smith } 361289bc588SBarry Smith idnew[i] = ainew[i] + nzi; 362289bc588SBarry Smith } 363464e8d44SSatish Balay if (ai[n] != 0) { 364329f5518SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 365b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 366b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 367b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 368b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 36905bf559cSSatish Balay } else { 370b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 37105bf559cSSatish Balay } 3722fb3ed76SBarry Smith 373898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 374898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3751987afe7SBarry Smith 376606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 377289bc588SBarry Smith 378289bc588SBarry Smith /* put together the new matrix */ 379b4fd4287SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 380b0a32e0cSBarry Smith PetscLogObjectParent(*B,isicol); 381416022c9SBarry Smith b = (Mat_SeqAIJ*)(*B)->data; 382606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 3837c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 384e8d4e0b9SBarry Smith /* the next line frees the default space generated by the Create() */ 385606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 386606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 387010a20caSHong Zhang ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 388416022c9SBarry Smith b->j = ajnew; 389416022c9SBarry Smith b->i = ainew; 390416022c9SBarry Smith b->diag = idnew; 391416022c9SBarry Smith b->ilen = 0; 392416022c9SBarry Smith b->imax = 0; 393416022c9SBarry Smith b->row = isrow; 394416022c9SBarry Smith b->col = iscol; 395085256b3SBarry Smith b->lu_damping = info->damping; 39687828ca2SBarry Smith b->lu_zeropivot = info->zeropivot; 3972cea7109SBarry Smith b->lu_shift = info->shift; 3982cea7109SBarry Smith b->lu_shift_fraction = info->shift_fraction; 399c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 400c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 40182bf6240SBarry Smith b->icol = isicol; 40287828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 403416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 4047fd98bd6SLois Curfman McInnes Allocate idnew, solve_work, new a, new j */ 405010a20caSHong Zhang PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar))); 406010a20caSHong Zhang b->maxnz = b->nz = ainew[n] ; 4077fd98bd6SLois Curfman McInnes 408329f5518SBarry Smith (*B)->factor = FACTOR_LU; 409ae068f58SLois Curfman McInnes (*B)->info.factor_mallocs = realloc; 410ae068f58SLois Curfman McInnes (*B)->info.fill_ratio_given = f; 4113a7fca6bSBarry Smith ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 4127dda7e10SBarry Smith (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 413703deb49SSatish Balay 414e93ccc4dSBarry Smith if (ai[n] != 0) { 415329f5518SBarry Smith (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 416eea2913aSSatish Balay } else { 417eea2913aSSatish Balay (*B)->info.fill_ratio_needed = 0.0; 418eea2913aSSatish Balay } 4193a40ed3dSBarry Smith PetscFunctionReturn(0); 420289bc588SBarry Smith } 4210a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 4223a7fca6bSBarry Smith EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 42341c01911SSatish Balay 4244a2ae208SSatish Balay #undef __FUNCT__ 4254a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 426416022c9SBarry Smith int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 427289bc588SBarry Smith { 428416022c9SBarry Smith Mat C = *B; 429416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 43082bf6240SBarry Smith IS isrow = b->row,isicol = b->icol; 431273d9f13SBarry Smith int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 432010a20caSHong Zhang int *ajtmpold,*ajtmp,nz,row,*ics; 4330cf777b8SBarry Smith int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 43487828ca2SBarry Smith PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 4350cf777b8SBarry Smith PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 4360cf777b8SBarry Smith PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 4370cf777b8SBarry Smith PetscTruth damp,lushift; 438289bc588SBarry Smith 4393a40ed3dSBarry Smith PetscFunctionBegin; 44078b31e54SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 44178b31e54SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 44287828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 44387828ca2SBarry Smith ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 444010a20caSHong Zhang rtmps = rtmp; ics = ic; 445289bc588SBarry Smith 4466cc28720Svictorle if (!a->diag) { 4470cf777b8SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 4480cf777b8SBarry Smith } 4490cf777b8SBarry Smith 4500cf777b8SBarry Smith if (b->lu_shift) { /* set max shift */ 4510cf777b8SBarry Smith int *aai = a->i,*ddiag = a->diag; 4520cf777b8SBarry Smith shift_top = 0; 4536cc28720Svictorle for (i=0; i<n; i++) { 454010a20caSHong Zhang d = PetscAbsScalar((a->a)[ddiag[i]]); 4556cc28720Svictorle /* calculate amt of shift needed for this row */ 4566c037d1bSvictorle if (d<=0) { 4573aeef889SHong Zhang row_shift = 0; 4583aeef889SHong Zhang } else { 4593aeef889SHong Zhang row_shift = -2*d; 4603aeef889SHong Zhang } 461010a20caSHong Zhang v = a->a+aai[i]; 4628a2e2d9cSvictorle for (j=0; j<aai[i+1]-aai[i]; j++) 4636cc28720Svictorle row_shift += PetscAbsScalar(v[j]); 4646cc28720Svictorle if (row_shift>shift_top) shift_top = row_shift; 4656cc28720Svictorle } 4666cc28720Svictorle } 4676cc28720Svictorle 4686cc28720Svictorle shift_fraction = 0; shift_amount = 0; 469085256b3SBarry Smith do { 4708a5eb818SBarry Smith damp = PETSC_FALSE; 4716cc28720Svictorle lushift = PETSC_FALSE; 472289bc588SBarry Smith for (i=0; i<n; i++) { 473289bc588SBarry Smith nz = ai[i+1] - ai[i]; 474010a20caSHong Zhang ajtmp = aj + ai[i]; 47548e94559SBarry Smith for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 476289bc588SBarry Smith 477289bc588SBarry Smith /* load in initial (unfactored row) */ 478416022c9SBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 479010a20caSHong Zhang ajtmpold = a->j + a->i[r[i]]; 480010a20caSHong Zhang v = a->a + a->i[r[i]]; 481085256b3SBarry Smith for (j=0; j<nz; j++) { 482085256b3SBarry Smith rtmp[ics[ajtmpold[j]]] = v[j]; 483085256b3SBarry Smith } 484e2dd4fc4Svictorle rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 485289bc588SBarry Smith 486010a20caSHong Zhang row = *ajtmp++ ; 487f2582111SSatish Balay while (row < i) { 4888c37ef55SBarry Smith pc = rtmp + row; 4898c37ef55SBarry Smith if (*pc != 0.0) { 490010a20caSHong Zhang pv = b->a + diag_offset[row] ; 491010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 49235aab85fSBarry Smith multiplier = *pc / *pv++; 4938c37ef55SBarry Smith *pc = multiplier; 494f2582111SSatish Balay nz = ai[row+1] - diag_offset[row] - 1; 495f2582111SSatish Balay for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 496b0a32e0cSBarry Smith PetscLogFlops(2*nz); 497289bc588SBarry Smith } 498010a20caSHong Zhang row = *ajtmp++; 499289bc588SBarry Smith } 500416022c9SBarry Smith /* finished row so stick it into b->a */ 501010a20caSHong Zhang pv = b->a + ai[i] ; 502010a20caSHong Zhang pj = b->j + ai[i] ; 5038c37ef55SBarry Smith nz = ai[i+1] - ai[i]; 50435aab85fSBarry Smith diag = diag_offset[i] - ai[i]; 505d3d32019SBarry Smith /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 506d3d32019SBarry Smith rs = 0.0; 507d3d32019SBarry Smith for (j=0; j<nz; j++) { 508d3d32019SBarry Smith pv[j] = rtmps[pj[j]]; 509d3d32019SBarry Smith if (j != diag) rs += PetscAbsScalar(pv[j]); 510d3d32019SBarry Smith } 5116cc28720Svictorle #define MAX_NSHIFT 5 512*b2da817dSvictorle if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 5136cc28720Svictorle if (nshift>MAX_NSHIFT) { 5140cf777b8SBarry Smith SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 5156cc28720Svictorle } else if (nshift==MAX_NSHIFT) { 5166cc28720Svictorle shift_fraction = shift_hi; 5176c037d1bSvictorle lushift = PETSC_FALSE; 5186cc28720Svictorle } else { 5196cc28720Svictorle shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 5206c037d1bSvictorle lushift = PETSC_TRUE; 5216cc28720Svictorle } 5226cc28720Svictorle shift_amount = shift_fraction * shift_top; 5230cf777b8SBarry Smith nshift++; 5240cf777b8SBarry Smith break; 5256cc28720Svictorle } 526d3d32019SBarry Smith if (PetscAbsScalar(pv[diag]) < zeropivot*rs) { 527d3d32019SBarry Smith if (damping) { 5288a5eb818SBarry Smith if (ndamp) damping *= 2.0; 529085256b3SBarry Smith damp = PETSC_TRUE; 530085256b3SBarry Smith ndamp++; 531085256b3SBarry Smith break; 532085256b3SBarry Smith } else { 53363bb2aa6SBarry Smith SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 534d708749eSBarry Smith } 53535aab85fSBarry Smith } 53635aab85fSBarry Smith } 5376cc28720Svictorle if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 5386cc28720Svictorle /* 5396c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 5406cc28720Svictorle * then try lower shift 5416cc28720Svictorle */ 5420cf777b8SBarry Smith shift_hi = shift_fraction; 5430cf777b8SBarry Smith shift_fraction = (shift_hi+shift_lo)/2.; 5446cc28720Svictorle shift_amount = shift_fraction * shift_top; 5450cf777b8SBarry Smith lushift = PETSC_TRUE; 5460cf777b8SBarry Smith nshift++; 5476cc28720Svictorle } 5486cc28720Svictorle } while (damp || lushift); 5490f11f9aeSSatish Balay 55035aab85fSBarry Smith /* invert diagonal entries for simplier triangular solves */ 55135aab85fSBarry Smith for (i=0; i<n; i++) { 552010a20caSHong Zhang b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 55335aab85fSBarry Smith } 55435aab85fSBarry Smith 555606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 55678b31e54SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 55778b31e54SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 558416022c9SBarry Smith C->factor = FACTOR_LU; 5597dda7e10SBarry Smith (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 560c456f294SBarry Smith C->assembled = PETSC_TRUE; 561b0a32e0cSBarry Smith PetscLogFlops(C->n); 562d3d32019SBarry Smith if (ndamp) { 563b0a32e0cSBarry Smith PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 564085256b3SBarry Smith } 5656cc28720Svictorle if (nshift) { 5666cc28720Svictorle b->lu_shift_fraction = shift_fraction; 567fb10cecfSBarry Smith PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift); 5686cc28720Svictorle } 5693a40ed3dSBarry Smith PetscFunctionReturn(0); 570289bc588SBarry Smith } 5710889b5dcSvictorle 5720889b5dcSvictorle #undef __FUNCT__ 5730889b5dcSvictorle #define __FUNCT__ "MatUsePETSc_SeqAIJ" 5740889b5dcSvictorle int MatUsePETSc_SeqAIJ(Mat A) 5750889b5dcSvictorle { 5760889b5dcSvictorle PetscFunctionBegin; 5770889b5dcSvictorle A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 5780889b5dcSvictorle A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 5790889b5dcSvictorle PetscFunctionReturn(0); 5800889b5dcSvictorle } 5810889b5dcSvictorle 5820889b5dcSvictorle 5830a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 5844a2ae208SSatish Balay #undef __FUNCT__ 5854a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqAIJ" 586b380c88cSHong Zhang int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 587da3a660dSBarry Smith { 588273d9f13SBarry Smith int ierr; 589416022c9SBarry Smith Mat C; 590416022c9SBarry Smith 5913a40ed3dSBarry Smith PetscFunctionBegin; 5929e046878SBarry Smith ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 593f2582111SSatish Balay ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 594273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 595440f4c53SSatish Balay PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 5963a40ed3dSBarry Smith PetscFunctionReturn(0); 597da3a660dSBarry Smith } 5980a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 5994a2ae208SSatish Balay #undef __FUNCT__ 6004a2ae208SSatish Balay #define __FUNCT__ "MatSolve_SeqAIJ" 601416022c9SBarry Smith int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 6028c37ef55SBarry Smith { 603416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 604416022c9SBarry Smith IS iscol = a->col,isrow = a->row; 605273d9f13SBarry Smith int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 606010a20caSHong Zhang int nz,*rout,*cout; 60787828ca2SBarry Smith PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 6088c37ef55SBarry Smith 6093a40ed3dSBarry Smith PetscFunctionBegin; 6103a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 61191bf9adeSBarry Smith 612b1d4fb26SBarry Smith ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 613b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 614416022c9SBarry Smith tmp = a->solve_work; 6158c37ef55SBarry Smith 6163b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 6173b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 6188c37ef55SBarry Smith 6198c37ef55SBarry Smith /* forward solve the lower triangular */ 6208c37ef55SBarry Smith tmp[0] = b[*r++]; 621010a20caSHong Zhang tmps = tmp; 6228c37ef55SBarry Smith for (i=1; i<n; i++) { 623010a20caSHong Zhang v = aa + ai[i] ; 624010a20caSHong Zhang vi = aj + ai[i] ; 62553e7425aSBarry Smith nz = a->diag[i] - ai[i]; 62653e7425aSBarry Smith sum = b[*r++]; 627ed480e8bSBarry Smith SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 6288c37ef55SBarry Smith tmp[i] = sum; 6298c37ef55SBarry Smith } 6308c37ef55SBarry Smith 6318c37ef55SBarry Smith /* backward solve the upper triangular */ 6328c37ef55SBarry Smith for (i=n-1; i>=0; i--){ 633010a20caSHong Zhang v = aa + a->diag[i] + 1; 634010a20caSHong Zhang vi = aj + a->diag[i] + 1; 635416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 6368c37ef55SBarry Smith sum = tmp[i]; 637ed480e8bSBarry Smith SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 638010a20caSHong Zhang x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 6398c37ef55SBarry Smith } 6408c37ef55SBarry Smith 6413b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 6423b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 643b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 644b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 645b0a32e0cSBarry Smith PetscLogFlops(2*a->nz - A->n); 6463a40ed3dSBarry Smith PetscFunctionReturn(0); 6478c37ef55SBarry Smith } 648026e39d0SSatish Balay 649930ae53cSBarry Smith /* ----------------------------------------------------------- */ 6504a2ae208SSatish Balay #undef __FUNCT__ 6514a2ae208SSatish Balay #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 652930ae53cSBarry Smith int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 653930ae53cSBarry Smith { 654930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 655273d9f13SBarry Smith int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 656362ced78SSatish Balay PetscScalar *x,*b,*aa = a->a; 657aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 658d85afc42SSatish Balay int adiag_i,i,*vi,nz,ai_i; 659362ced78SSatish Balay PetscScalar *v,sum; 660d85afc42SSatish Balay #endif 661930ae53cSBarry Smith 6623a40ed3dSBarry Smith PetscFunctionBegin; 6633a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 664930ae53cSBarry Smith 665b1d4fb26SBarry Smith ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 666b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 667930ae53cSBarry Smith 668aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 6691c4feecaSSatish Balay fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 6701c4feecaSSatish Balay #else 671930ae53cSBarry Smith /* forward solve the lower triangular */ 672930ae53cSBarry Smith x[0] = b[0]; 673930ae53cSBarry Smith for (i=1; i<n; i++) { 674930ae53cSBarry Smith ai_i = ai[i]; 675930ae53cSBarry Smith v = aa + ai_i; 676930ae53cSBarry Smith vi = aj + ai_i; 677930ae53cSBarry Smith nz = adiag[i] - ai_i; 678930ae53cSBarry Smith sum = b[i]; 679930ae53cSBarry Smith while (nz--) sum -= *v++ * x[*vi++]; 680930ae53cSBarry Smith x[i] = sum; 681930ae53cSBarry Smith } 682930ae53cSBarry Smith 683930ae53cSBarry Smith /* backward solve the upper triangular */ 684930ae53cSBarry Smith for (i=n-1; i>=0; i--){ 685930ae53cSBarry Smith adiag_i = adiag[i]; 686d708749eSBarry Smith v = aa + adiag_i + 1; 687d708749eSBarry Smith vi = aj + adiag_i + 1; 688930ae53cSBarry Smith nz = ai[i+1] - adiag_i - 1; 689930ae53cSBarry Smith sum = x[i]; 690930ae53cSBarry Smith while (nz--) sum -= *v++ * x[*vi++]; 691930ae53cSBarry Smith x[i] = sum*aa[adiag_i]; 692930ae53cSBarry Smith } 6931c4feecaSSatish Balay #endif 694b0a32e0cSBarry Smith PetscLogFlops(2*a->nz - A->n); 695b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 696b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 6973a40ed3dSBarry Smith PetscFunctionReturn(0); 698930ae53cSBarry Smith } 699930ae53cSBarry Smith 7004a2ae208SSatish Balay #undef __FUNCT__ 7014a2ae208SSatish Balay #define __FUNCT__ "MatSolveAdd_SeqAIJ" 702416022c9SBarry Smith int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 703da3a660dSBarry Smith { 704416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 705416022c9SBarry Smith IS iscol = a->col,isrow = a->row; 706273d9f13SBarry Smith int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 707010a20caSHong Zhang int nz,*rout,*cout; 70887828ca2SBarry Smith PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 709da3a660dSBarry Smith 7103a40ed3dSBarry Smith PetscFunctionBegin; 71178b31e54SBarry Smith if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 712da3a660dSBarry Smith 713b1d4fb26SBarry Smith ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 714b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 715416022c9SBarry Smith tmp = a->solve_work; 716da3a660dSBarry Smith 7173b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 7183b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 719da3a660dSBarry Smith 720da3a660dSBarry Smith /* forward solve the lower triangular */ 721da3a660dSBarry Smith tmp[0] = b[*r++]; 722da3a660dSBarry Smith for (i=1; i<n; i++) { 723010a20caSHong Zhang v = aa + ai[i] ; 724010a20caSHong Zhang vi = aj + ai[i] ; 725416022c9SBarry Smith nz = a->diag[i] - ai[i]; 726da3a660dSBarry Smith sum = b[*r++]; 727010a20caSHong Zhang while (nz--) sum -= *v++ * tmp[*vi++ ]; 728da3a660dSBarry Smith tmp[i] = sum; 729da3a660dSBarry Smith } 730da3a660dSBarry Smith 731da3a660dSBarry Smith /* backward solve the upper triangular */ 732da3a660dSBarry Smith for (i=n-1; i>=0; i--){ 733010a20caSHong Zhang v = aa + a->diag[i] + 1; 734010a20caSHong Zhang vi = aj + a->diag[i] + 1; 735416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 736da3a660dSBarry Smith sum = tmp[i]; 737010a20caSHong Zhang while (nz--) sum -= *v++ * tmp[*vi++ ]; 738010a20caSHong Zhang tmp[i] = sum*aa[a->diag[i]]; 739da3a660dSBarry Smith x[*c--] += tmp[i]; 740da3a660dSBarry Smith } 741da3a660dSBarry Smith 7423b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 7433b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 744b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 745b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 746b0a32e0cSBarry Smith PetscLogFlops(2*a->nz); 747898183ecSLois Curfman McInnes 7483a40ed3dSBarry Smith PetscFunctionReturn(0); 749da3a660dSBarry Smith } 750da3a660dSBarry Smith /* -------------------------------------------------------------------*/ 7514a2ae208SSatish Balay #undef __FUNCT__ 7524a2ae208SSatish Balay #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 7537c922b88SBarry Smith int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 754da3a660dSBarry Smith { 755416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7562235e667SBarry Smith IS iscol = a->col,isrow = a->row; 757273d9f13SBarry Smith int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 758010a20caSHong Zhang int nz,*rout,*cout,*diag = a->diag; 75987828ca2SBarry Smith PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 760da3a660dSBarry Smith 7613a40ed3dSBarry Smith PetscFunctionBegin; 762b1d4fb26SBarry Smith ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 763b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 764416022c9SBarry Smith tmp = a->solve_work; 765da3a660dSBarry Smith 7662235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 7672235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 768da3a660dSBarry Smith 769da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 7702235e667SBarry Smith for (i=0; i<n; i++) tmp[i] = b[c[i]]; 771da3a660dSBarry Smith 772da3a660dSBarry Smith /* forward solve the U^T */ 773da3a660dSBarry Smith for (i=0; i<n; i++) { 774010a20caSHong Zhang v = aa + diag[i] ; 775010a20caSHong Zhang vi = aj + diag[i] + 1; 776f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 777c38d4ed2SBarry Smith s1 = tmp[i]; 778ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 779da3a660dSBarry Smith while (nz--) { 780010a20caSHong Zhang tmp[*vi++ ] -= (*v++)*s1; 781da3a660dSBarry Smith } 782c38d4ed2SBarry Smith tmp[i] = s1; 783da3a660dSBarry Smith } 784da3a660dSBarry Smith 785da3a660dSBarry Smith /* backward solve the L^T */ 786da3a660dSBarry Smith for (i=n-1; i>=0; i--){ 787010a20caSHong Zhang v = aa + diag[i] - 1 ; 788010a20caSHong Zhang vi = aj + diag[i] - 1 ; 789f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 790f1af5d2fSBarry Smith s1 = tmp[i]; 791da3a660dSBarry Smith while (nz--) { 792010a20caSHong Zhang tmp[*vi-- ] -= (*v--)*s1; 793da3a660dSBarry Smith } 794da3a660dSBarry Smith } 795da3a660dSBarry Smith 796da3a660dSBarry Smith /* copy tmp into x according to permutation */ 797da3a660dSBarry Smith for (i=0; i<n; i++) x[r[i]] = tmp[i]; 798da3a660dSBarry Smith 7992235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 8002235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 801b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 802b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 803da3a660dSBarry Smith 804b0a32e0cSBarry Smith PetscLogFlops(2*a->nz-A->n); 8053a40ed3dSBarry Smith PetscFunctionReturn(0); 806da3a660dSBarry Smith } 807da3a660dSBarry Smith 8084a2ae208SSatish Balay #undef __FUNCT__ 8094a2ae208SSatish Balay #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 8107c922b88SBarry Smith int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 811da3a660dSBarry Smith { 812416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8132235e667SBarry Smith IS iscol = a->col,isrow = a->row; 814273d9f13SBarry Smith int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 815010a20caSHong Zhang int nz,*rout,*cout,*diag = a->diag; 81687828ca2SBarry Smith PetscScalar *x,*b,*tmp,*aa = a->a,*v; 8176abc6512SBarry Smith 8183a40ed3dSBarry Smith PetscFunctionBegin; 8196abc6512SBarry Smith if (zz != xx) VecCopy(zz,xx); 8206abc6512SBarry Smith 821b1d4fb26SBarry Smith ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 822b1d4fb26SBarry Smith ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 823416022c9SBarry Smith tmp = a->solve_work; 8246abc6512SBarry Smith 8252235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 8262235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 8276abc6512SBarry Smith 8286abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 8292235e667SBarry Smith for (i=0; i<n; i++) tmp[i] = b[c[i]]; 8306abc6512SBarry Smith 8316abc6512SBarry Smith /* forward solve the U^T */ 8326abc6512SBarry Smith for (i=0; i<n; i++) { 833010a20caSHong Zhang v = aa + diag[i] ; 834010a20caSHong Zhang vi = aj + diag[i] + 1; 835f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 8366abc6512SBarry Smith tmp[i] *= *v++; 8376abc6512SBarry Smith while (nz--) { 838010a20caSHong Zhang tmp[*vi++ ] -= (*v++)*tmp[i]; 8396abc6512SBarry Smith } 8406abc6512SBarry Smith } 8416abc6512SBarry Smith 8426abc6512SBarry Smith /* backward solve the L^T */ 8436abc6512SBarry Smith for (i=n-1; i>=0; i--){ 844010a20caSHong Zhang v = aa + diag[i] - 1 ; 845010a20caSHong Zhang vi = aj + diag[i] - 1 ; 846f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 8476abc6512SBarry Smith while (nz--) { 848010a20caSHong Zhang tmp[*vi-- ] -= (*v--)*tmp[i]; 8496abc6512SBarry Smith } 8506abc6512SBarry Smith } 8516abc6512SBarry Smith 8526abc6512SBarry Smith /* copy tmp into x according to permutation */ 8536abc6512SBarry Smith for (i=0; i<n; i++) x[r[i]] += tmp[i]; 8546abc6512SBarry Smith 8552235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 8562235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 857b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 858b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 8596abc6512SBarry Smith 860b0a32e0cSBarry Smith PetscLogFlops(2*a->nz); 8613a40ed3dSBarry Smith PetscFunctionReturn(0); 862da3a660dSBarry Smith } 8639e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 864ca44d042SBarry Smith EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 86508480c60SBarry Smith 8664a2ae208SSatish Balay #undef __FUNCT__ 8674a2ae208SSatish Balay #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 868b380c88cSHong Zhang int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 8699e25ed09SBarry Smith { 870416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 8719e25ed09SBarry Smith IS isicol; 872273d9f13SBarry Smith int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 87356beaf04SBarry Smith int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 874335d9088SBarry Smith int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 875010a20caSHong Zhang int incrlev,nnz,i,levels,diagonal_fill; 87677c4ece6SBarry Smith PetscTruth col_identity,row_identity; 877329f5518SBarry Smith PetscReal f; 8789e25ed09SBarry Smith 8793a40ed3dSBarry Smith PetscFunctionBegin; 8806cf997b0SBarry Smith f = info->fill; 8810cd17293SBarry Smith levels = (int)info->levels; 8820cd17293SBarry Smith diagonal_fill = (int)info->diagonal_fill; 8834c49b128SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 88482bf6240SBarry Smith 88508480c60SBarry Smith /* special case that simply copies fill pattern */ 886be0abb6dSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 887be0abb6dSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 88886bacbd2SBarry Smith if (!levels && row_identity && col_identity) { 8892e8a6d31SBarry Smith ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 89008480c60SBarry Smith (*fact)->factor = FACTOR_LU; 89108480c60SBarry Smith b = (Mat_SeqAIJ*)(*fact)->data; 89208480c60SBarry Smith if (!b->diag) { 8937c922b88SBarry Smith ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 89408480c60SBarry Smith } 8957c922b88SBarry Smith ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 89608480c60SBarry Smith b->row = isrow; 89708480c60SBarry Smith b->col = iscol; 89882bf6240SBarry Smith b->icol = isicol; 899a2e34c3dSBarry Smith b->lu_damping = info->damping; 90087828ca2SBarry Smith b->lu_zeropivot = info->zeropivot; 9012cea7109SBarry Smith b->lu_shift = info->shift; 9022cea7109SBarry Smith b->lu_shift_fraction= info->shift_fraction; 90387828ca2SBarry Smith ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 904f830108cSBarry Smith (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 905c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 906c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 9073a40ed3dSBarry Smith PetscFunctionReturn(0); 90808480c60SBarry Smith } 90908480c60SBarry Smith 910898183ecSLois Curfman McInnes ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 911898183ecSLois Curfman McInnes ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 9129e25ed09SBarry Smith 9139e25ed09SBarry Smith /* get new row pointers */ 914b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 915010a20caSHong Zhang ainew[0] = 0; 9169e25ed09SBarry Smith /* don't know how many column pointers are needed so estimate */ 917010a20caSHong Zhang jmax = (int)(f*(ai[n]+1)); 918b0a32e0cSBarry Smith ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 9199e25ed09SBarry Smith /* ajfill is level of fill for each fill entry */ 920b0a32e0cSBarry Smith ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 9219e25ed09SBarry Smith /* fill is a linked list of nonzeros in active row */ 922b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 92356beaf04SBarry Smith /* im is level for each filled value */ 924b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 92556beaf04SBarry Smith /* dloc is location of diagonal in factor */ 926b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 92756beaf04SBarry Smith dloc[0] = 0; 92856beaf04SBarry Smith for (prow=0; prow<n; prow++) { 9296cf997b0SBarry Smith 9306cf997b0SBarry Smith /* copy current row into linked list */ 93156beaf04SBarry Smith nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 93229bbc08cSBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 933010a20caSHong Zhang xi = aj + ai[r[prow]] ; 9349e25ed09SBarry Smith fill[n] = n; 935435faa5fSBarry Smith fill[prow] = -1; /* marker to indicate if diagonal exists */ 9369e25ed09SBarry Smith while (nz--) { 9379e25ed09SBarry Smith fm = n; 938010a20caSHong Zhang idx = ic[*xi++ ]; 9399e25ed09SBarry Smith do { 9409e25ed09SBarry Smith m = fm; 9419e25ed09SBarry Smith fm = fill[m]; 9429e25ed09SBarry Smith } while (fm < idx); 9439e25ed09SBarry Smith fill[m] = idx; 9449e25ed09SBarry Smith fill[idx] = fm; 94556beaf04SBarry Smith im[idx] = 0; 9469e25ed09SBarry Smith } 9476cf997b0SBarry Smith 9486cf997b0SBarry Smith /* make sure diagonal entry is included */ 949435faa5fSBarry Smith if (diagonal_fill && fill[prow] == -1) { 9506cf997b0SBarry Smith fm = n; 951435faa5fSBarry Smith while (fill[fm] < prow) fm = fill[fm]; 952435faa5fSBarry Smith fill[prow] = fill[fm]; /* insert diagonal into linked list */ 953435faa5fSBarry Smith fill[fm] = prow; 9546cf997b0SBarry Smith im[prow] = 0; 9556cf997b0SBarry Smith nzf++; 956335d9088SBarry Smith dcount++; 9576cf997b0SBarry Smith } 9586cf997b0SBarry Smith 95956beaf04SBarry Smith nzi = 0; 9609e25ed09SBarry Smith row = fill[n]; 96156beaf04SBarry Smith while (row < prow) { 96256beaf04SBarry Smith incrlev = im[row] + 1; 96356beaf04SBarry Smith nz = dloc[row]; 964010a20caSHong Zhang xi = ajnew + ainew[row] + nz + 1; 965010a20caSHong Zhang flev = ajfill + ainew[row] + nz + 1; 966416022c9SBarry Smith nnz = ainew[row+1] - ainew[row] - nz - 1; 96756beaf04SBarry Smith fm = row; 96856beaf04SBarry Smith while (nnz-- > 0) { 969010a20caSHong Zhang idx = *xi++ ; 97056beaf04SBarry Smith if (*flev + incrlev > levels) { 97156beaf04SBarry Smith flev++; 97256beaf04SBarry Smith continue; 97356beaf04SBarry Smith } 97456beaf04SBarry Smith do { 9759e25ed09SBarry Smith m = fm; 9769e25ed09SBarry Smith fm = fill[m]; 97756beaf04SBarry Smith } while (fm < idx); 9789e25ed09SBarry Smith if (fm != idx) { 97956beaf04SBarry Smith im[idx] = *flev + incrlev; 9809e25ed09SBarry Smith fill[m] = idx; 9819e25ed09SBarry Smith fill[idx] = fm; 9829e25ed09SBarry Smith fm = idx; 98356beaf04SBarry Smith nzf++; 984ecf371e4SBarry Smith } else { 98556beaf04SBarry Smith if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 9869e25ed09SBarry Smith } 98756beaf04SBarry Smith flev++; 9889e25ed09SBarry Smith } 9899e25ed09SBarry Smith row = fill[row]; 99056beaf04SBarry Smith nzi++; 9919e25ed09SBarry Smith } 9929e25ed09SBarry Smith /* copy new filled row into permanent storage */ 99356beaf04SBarry Smith ainew[prow+1] = ainew[prow] + nzf; 994010a20caSHong Zhang if (ainew[prow+1] > jmax) { 995ecf371e4SBarry Smith 996ecf371e4SBarry Smith /* estimate how much additional space we will need */ 997ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 998ecf371e4SBarry Smith /* just double the memory each time */ 999ecf371e4SBarry Smith /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1000ecf371e4SBarry Smith int maxadd = jmax; 100156beaf04SBarry Smith if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1002bbb6d6a8SBarry Smith jmax += maxadd; 1003ecf371e4SBarry Smith 1004ecf371e4SBarry Smith /* allocate a longer ajnew and ajfill */ 1005b0a32e0cSBarry Smith ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1006010a20caSHong Zhang ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1007606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 100856beaf04SBarry Smith ajnew = xi; 1009b0a32e0cSBarry Smith ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1010010a20caSHong Zhang ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1011606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 101256beaf04SBarry Smith ajfill = xi; 1013ecf371e4SBarry Smith realloc++; /* count how many times we realloc */ 10149e25ed09SBarry Smith } 1015010a20caSHong Zhang xi = ajnew + ainew[prow] ; 1016010a20caSHong Zhang flev = ajfill + ainew[prow] ; 101756beaf04SBarry Smith dloc[prow] = nzi; 10189e25ed09SBarry Smith fm = fill[n]; 101956beaf04SBarry Smith while (nzf--) { 1020010a20caSHong Zhang *xi++ = fm ; 102156beaf04SBarry Smith *flev++ = im[fm]; 10229e25ed09SBarry Smith fm = fill[fm]; 10239e25ed09SBarry Smith } 10246cf997b0SBarry Smith /* make sure row has diagonal entry */ 1025010a20caSHong Zhang if (ajnew[ainew[prow]+dloc[prow]] != prow) { 102629bbc08cSBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 10276cf997b0SBarry Smith try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 10286cf997b0SBarry Smith } 10299e25ed09SBarry Smith } 1030606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1032898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1033606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 1034606d414cSSatish Balay ierr = PetscFree(im);CHKERRQ(ierr); 10359e25ed09SBarry Smith 1036f64065bbSBarry Smith { 1037329f5518SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1038b0a32e0cSBarry Smith PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1039c0d6ac57SBarry Smith PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1040c0d6ac57SBarry Smith PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1041b0a32e0cSBarry Smith PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1042335d9088SBarry Smith if (diagonal_fill) { 1043b1bcba4aSBarry Smith PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1044335d9088SBarry Smith } 1045f64065bbSBarry Smith } 1046bbb6d6a8SBarry Smith 10479e25ed09SBarry Smith /* put together the new matrix */ 1048b4fd4287SBarry Smith ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1049b0a32e0cSBarry Smith PetscLogObjectParent(*fact,isicol); 1050416022c9SBarry Smith b = (Mat_SeqAIJ*)(*fact)->data; 1051606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 10527c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 1053010a20caSHong Zhang len = (ainew[n] )*sizeof(PetscScalar); 10549e25ed09SBarry Smith /* the next line frees the default space generated by the Create() */ 1055606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 1056606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1057b0a32e0cSBarry Smith ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1058416022c9SBarry Smith b->j = ajnew; 1059416022c9SBarry Smith b->i = ainew; 106056beaf04SBarry Smith for (i=0; i<n; i++) dloc[i] += ainew[i]; 1061416022c9SBarry Smith b->diag = dloc; 1062416022c9SBarry Smith b->ilen = 0; 1063416022c9SBarry Smith b->imax = 0; 1064416022c9SBarry Smith b->row = isrow; 1065416022c9SBarry Smith b->col = iscol; 1066c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1067c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 106882bf6240SBarry Smith b->icol = isicol; 106987828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1070416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 107156beaf04SBarry Smith Allocate dloc, solve_work, new a, new j */ 1072010a20caSHong Zhang PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar))); 1073010a20caSHong Zhang b->maxnz = b->nz = ainew[n] ; 1074a2e34c3dSBarry Smith b->lu_damping = info->damping; 10752cea7109SBarry Smith b->lu_shift = info->shift; 10762cea7109SBarry Smith b->lu_shift_fraction = info->shift_fraction; 107787828ca2SBarry Smith b->lu_zeropivot = info->zeropivot; 10789e25ed09SBarry Smith (*fact)->factor = FACTOR_LU; 10793a7fca6bSBarry Smith ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 10803a7fca6bSBarry Smith (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1081ae068f58SLois Curfman McInnes 1082ae068f58SLois Curfman McInnes (*fact)->info.factor_mallocs = realloc; 1083ae068f58SLois Curfman McInnes (*fact)->info.fill_ratio_given = f; 1084329f5518SBarry Smith (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 10853a40ed3dSBarry Smith PetscFunctionReturn(0); 10869e25ed09SBarry Smith } 1087d5d45c9bSBarry Smith 1088a6175056SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h" 1089a6175056SHong Zhang #undef __FUNCT__ 1090a6175056SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1091a6175056SHong Zhang int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1092a6175056SHong Zhang { 10930968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1094a6175056SHong Zhang int ierr; 1095d5d45c9bSBarry Smith 1096a6175056SHong Zhang PetscFunctionBegin; 10970968510aSHong Zhang if (!a->sbaijMat){ 10980968510aSHong Zhang ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1099a6175056SHong Zhang } 110003aac1b8SHong Zhang 1101b45a75daSHong Zhang ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 11020968510aSHong Zhang ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1103064503c5SHong Zhang a->sbaijMat = PETSC_NULL; 1104653a6975SHong Zhang 1105a6175056SHong Zhang PetscFunctionReturn(0); 1106a6175056SHong Zhang } 1107a6175056SHong Zhang 1108a6175056SHong Zhang #undef __FUNCT__ 1109a6175056SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 111015e8a5b3SHong Zhang int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1111a6175056SHong Zhang { 11120968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 111303aac1b8SHong Zhang int ierr; 1114653a6975SHong Zhang PetscTruth perm_identity; 1115a6175056SHong Zhang 1116a6175056SHong Zhang PetscFunctionBegin; 1117653a6975SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1118653a6975SHong Zhang if (!perm_identity){ 1119653a6975SHong Zhang SETERRQ(1,"Non-identity permutation is not supported yet"); 1120653a6975SHong Zhang } 11210968510aSHong Zhang if (!a->sbaijMat){ 11220968510aSHong Zhang ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 11230968510aSHong Zhang } 1124a6175056SHong Zhang 1125b00f7748SHong Zhang ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1126653a6975SHong Zhang (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1127653a6975SHong Zhang 1128a6175056SHong Zhang PetscFunctionReturn(0); 1129a6175056SHong Zhang } 1130d5d45c9bSBarry Smith 1131f76d2b81SHong Zhang #undef __FUNCT__ 1132f76d2b81SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1133f76d2b81SHong Zhang int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1134f76d2b81SHong Zhang { 1135f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 113603aac1b8SHong Zhang int ierr; 1137f76d2b81SHong Zhang PetscTruth perm_identity; 1138f76d2b81SHong Zhang 1139f76d2b81SHong Zhang PetscFunctionBegin; 1140f76d2b81SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1141f76d2b81SHong Zhang if (!perm_identity){ 1142f76d2b81SHong Zhang SETERRQ(1,"Non-identity permutation is not supported yet"); 1143f76d2b81SHong Zhang } 1144f76d2b81SHong Zhang if (!a->sbaijMat){ 1145f76d2b81SHong Zhang ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1146f76d2b81SHong Zhang } 1147f76d2b81SHong Zhang 1148f76d2b81SHong Zhang ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1149f76d2b81SHong Zhang (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1150f76d2b81SHong Zhang 1151f76d2b81SHong Zhang PetscFunctionReturn(0); 1152f76d2b81SHong Zhang } 1153