1*83287d42SBarry Smith /*$Id: baijfact.c,v 1.86 2000/11/28 17:29:14 bsmith Exp $*/ 2*83287d42SBarry Smith /* 3*83287d42SBarry Smith Factorization code for BAIJ format. 4*83287d42SBarry Smith */ 5*83287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 6*83287d42SBarry Smith #include "src/vec/vecimpl.h" 7*83287d42SBarry Smith #include "src/inline/ilu.h" 8*83287d42SBarry Smith 9*83287d42SBarry Smith /* 10*83287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 11*83287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 12*83287d42SBarry Smith NOT good code reuse. 13*83287d42SBarry Smith */ 14*83287d42SBarry Smith #undef __FUNC__ 15*83287d42SBarry Smith #define __FUNC__ /*<a name="MatLUFactorSymbolic_SeqBAIJ"></a>*/"MatLUFactorSymbolic_SeqBAIJ" 16*83287d42SBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B) 17*83287d42SBarry Smith { 18*83287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 19*83287d42SBarry Smith IS isicol; 20*83287d42SBarry Smith int *r,*ic,ierr,i,n = a->mbs,*ai = a->i,*aj = a->j; 21*83287d42SBarry Smith int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = a->bs,bs2=a->bs2; 22*83287d42SBarry Smith int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 23*83287d42SBarry Smith PetscReal f = 1.0; 24*83287d42SBarry Smith 25*83287d42SBarry Smith PetscFunctionBegin; 26*83287d42SBarry Smith PetscValidHeaderSpecific(isrow,IS_COOKIE); 27*83287d42SBarry Smith PetscValidHeaderSpecific(iscol,IS_COOKIE); 28*83287d42SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 29*83287d42SBarry Smith 30*83287d42SBarry Smith if (!isrow) { 31*83287d42SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 32*83287d42SBarry Smith } 33*83287d42SBarry Smith if (!iscol) { 34*83287d42SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 35*83287d42SBarry Smith } 36*83287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 37*83287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 38*83287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 39*83287d42SBarry Smith 40*83287d42SBarry Smith if (info) f = info->fill; 41*83287d42SBarry Smith /* get new row pointers */ 42*83287d42SBarry Smith ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 43*83287d42SBarry Smith ainew[0] = 0; 44*83287d42SBarry Smith /* don't know how many column pointers are needed so estimate */ 45*83287d42SBarry Smith jmax = (int)(f*ai[n] + 1); 46*83287d42SBarry Smith ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 47*83287d42SBarry Smith /* fill is a linked list of nonzeros in active row */ 48*83287d42SBarry Smith fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 49*83287d42SBarry Smith im = fill + n + 1; 50*83287d42SBarry Smith /* idnew is location of diagonal in factor */ 51*83287d42SBarry Smith idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 52*83287d42SBarry Smith idnew[0] = 0; 53*83287d42SBarry Smith 54*83287d42SBarry Smith for (i=0; i<n; i++) { 55*83287d42SBarry Smith /* first copy previous fill into linked list */ 56*83287d42SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 57*83287d42SBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 58*83287d42SBarry Smith ajtmp = aj + ai[r[i]]; 59*83287d42SBarry Smith fill[n] = n; 60*83287d42SBarry Smith while (nz--) { 61*83287d42SBarry Smith fm = n; 62*83287d42SBarry Smith idx = ic[*ajtmp++]; 63*83287d42SBarry Smith do { 64*83287d42SBarry Smith m = fm; 65*83287d42SBarry Smith fm = fill[m]; 66*83287d42SBarry Smith } while (fm < idx); 67*83287d42SBarry Smith fill[m] = idx; 68*83287d42SBarry Smith fill[idx] = fm; 69*83287d42SBarry Smith } 70*83287d42SBarry Smith row = fill[n]; 71*83287d42SBarry Smith while (row < i) { 72*83287d42SBarry Smith ajtmp = ajnew + idnew[row] + 1; 73*83287d42SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 74*83287d42SBarry Smith nz = im[row] - nzbd; 75*83287d42SBarry Smith fm = row; 76*83287d42SBarry Smith while (nz-- > 0) { 77*83287d42SBarry Smith idx = *ajtmp++; 78*83287d42SBarry Smith nzbd++; 79*83287d42SBarry Smith if (idx == i) im[row] = nzbd; 80*83287d42SBarry Smith do { 81*83287d42SBarry Smith m = fm; 82*83287d42SBarry Smith fm = fill[m]; 83*83287d42SBarry Smith } while (fm < idx); 84*83287d42SBarry Smith if (fm != idx) { 85*83287d42SBarry Smith fill[m] = idx; 86*83287d42SBarry Smith fill[idx] = fm; 87*83287d42SBarry Smith fm = idx; 88*83287d42SBarry Smith nnz++; 89*83287d42SBarry Smith } 90*83287d42SBarry Smith } 91*83287d42SBarry Smith row = fill[row]; 92*83287d42SBarry Smith } 93*83287d42SBarry Smith /* copy new filled row into permanent storage */ 94*83287d42SBarry Smith ainew[i+1] = ainew[i] + nnz; 95*83287d42SBarry Smith if (ainew[i+1] > jmax) { 96*83287d42SBarry Smith 97*83287d42SBarry Smith /* estimate how much additional space we will need */ 98*83287d42SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 99*83287d42SBarry Smith /* just double the memory each time */ 100*83287d42SBarry Smith int maxadd = jmax; 101*83287d42SBarry Smith /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */ 102*83287d42SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 103*83287d42SBarry Smith jmax += maxadd; 104*83287d42SBarry Smith 105*83287d42SBarry Smith /* allocate a longer ajnew */ 106*83287d42SBarry Smith ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 107*83287d42SBarry Smith ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));CHKERRQ(ierr); 108*83287d42SBarry Smith ierr = PetscFree(ajnew);CHKERRQ(ierr); 109*83287d42SBarry Smith ajnew = ajtmp; 110*83287d42SBarry Smith realloc++; /* count how many times we realloc */ 111*83287d42SBarry Smith } 112*83287d42SBarry Smith ajtmp = ajnew + ainew[i]; 113*83287d42SBarry Smith fm = fill[n]; 114*83287d42SBarry Smith nzi = 0; 115*83287d42SBarry Smith im[i] = nnz; 116*83287d42SBarry Smith while (nnz--) { 117*83287d42SBarry Smith if (fm < i) nzi++; 118*83287d42SBarry Smith *ajtmp++ = fm; 119*83287d42SBarry Smith fm = fill[fm]; 120*83287d42SBarry Smith } 121*83287d42SBarry Smith idnew[i] = ainew[i] + nzi; 122*83287d42SBarry Smith } 123*83287d42SBarry Smith 124*83287d42SBarry Smith if (ai[n] != 0) { 125*83287d42SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 126*83287d42SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 127*83287d42SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af); 128*83287d42SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af); 129*83287d42SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n"); 130*83287d42SBarry Smith } else { 131*83287d42SBarry Smith PLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n"); 132*83287d42SBarry Smith } 133*83287d42SBarry Smith 134*83287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 135*83287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 136*83287d42SBarry Smith 137*83287d42SBarry Smith ierr = PetscFree(fill);CHKERRQ(ierr); 138*83287d42SBarry Smith 139*83287d42SBarry Smith /* put together the new matrix */ 140*83287d42SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B);CHKERRQ(ierr); 141*83287d42SBarry Smith PLogObjectParent(*B,isicol); 142*83287d42SBarry Smith b = (Mat_SeqBAIJ*)(*B)->data; 143*83287d42SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 144*83287d42SBarry Smith b->singlemalloc = PETSC_FALSE; 145*83287d42SBarry Smith /* the next line frees the default space generated by the Create() */ 146*83287d42SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 147*83287d42SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 148*83287d42SBarry Smith b->a = (MatScalar*)PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2);CHKPTRQ(b->a); 149*83287d42SBarry Smith b->j = ajnew; 150*83287d42SBarry Smith b->i = ainew; 151*83287d42SBarry Smith b->diag = idnew; 152*83287d42SBarry Smith b->ilen = 0; 153*83287d42SBarry Smith b->imax = 0; 154*83287d42SBarry Smith b->row = isrow; 155*83287d42SBarry Smith b->col = iscol; 156*83287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 157*83287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 158*83287d42SBarry Smith b->icol = isicol; 159*83287d42SBarry Smith b->solve_work = (Scalar*)PetscMalloc((bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 160*83287d42SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 161*83287d42SBarry Smith Allocate idnew, solve_work, new a, new j */ 162*83287d42SBarry Smith PLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(MatScalar))); 163*83287d42SBarry Smith b->maxnz = b->nz = ainew[n]; 164*83287d42SBarry Smith 165*83287d42SBarry Smith (*B)->factor = FACTOR_LU; 166*83287d42SBarry Smith (*B)->info.factor_mallocs = realloc; 167*83287d42SBarry Smith (*B)->info.fill_ratio_given = f; 168*83287d42SBarry Smith if (ai[n] != 0) { 169*83287d42SBarry Smith (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 170*83287d42SBarry Smith } else { 171*83287d42SBarry Smith (*B)->info.fill_ratio_needed = 0.0; 172*83287d42SBarry Smith } 173*83287d42SBarry Smith PetscFunctionReturn(0); 174*83287d42SBarry Smith } 175