1*b0a32e0cSBarry Smith /*$Id: baijfact3.c,v 1.1 2001/01/06 15:35:15 bsmith Exp bsmith $*/ 283287d42SBarry Smith /* 383287d42SBarry Smith Factorization code for BAIJ format. 483287d42SBarry Smith */ 583287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 683287d42SBarry Smith #include "src/vec/vecimpl.h" 783287d42SBarry Smith #include "src/inline/ilu.h" 883287d42SBarry Smith 983287d42SBarry Smith /* 1083287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 1183287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 1283287d42SBarry Smith NOT good code reuse. 1383287d42SBarry Smith */ 1483287d42SBarry Smith #undef __FUNC__ 15*b0a32e0cSBarry Smith #define __FUNC__ "MatLUFactorSymbolic_SeqBAIJ" 1683287d42SBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B) 1783287d42SBarry Smith { 1883287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 1983287d42SBarry Smith IS isicol; 2083287d42SBarry Smith int *r,*ic,ierr,i,n = a->mbs,*ai = a->i,*aj = a->j; 2183287d42SBarry Smith int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = a->bs,bs2=a->bs2; 2283287d42SBarry Smith int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 2383287d42SBarry Smith PetscReal f = 1.0; 2483287d42SBarry Smith 2583287d42SBarry Smith PetscFunctionBegin; 2683287d42SBarry Smith PetscValidHeaderSpecific(isrow,IS_COOKIE); 2783287d42SBarry Smith PetscValidHeaderSpecific(iscol,IS_COOKIE); 2883287d42SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 2983287d42SBarry Smith 3083287d42SBarry Smith if (!isrow) { 3183287d42SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 3283287d42SBarry Smith } 3383287d42SBarry Smith if (!iscol) { 3483287d42SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 3583287d42SBarry Smith } 3683287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3783287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3883287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3983287d42SBarry Smith 4083287d42SBarry Smith if (info) f = info->fill; 4183287d42SBarry Smith /* get new row pointers */ 42*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),& ainew );CHKERRQ(ierr); 4383287d42SBarry Smith ainew[0] = 0; 4483287d42SBarry Smith /* don't know how many column pointers are needed so estimate */ 4583287d42SBarry Smith jmax = (int)(f*ai[n] + 1); 46*b0a32e0cSBarry Smith ierr = PetscMalloc((jmax)*sizeof(int),&( ajnew ));CHKERRQ(ierr); 4783287d42SBarry Smith /* fill is a linked list of nonzeros in active row */ 48*b0a32e0cSBarry Smith ierr = PetscMalloc((2*n+1)*sizeof(int),& fill );CHKERRQ(ierr); 4983287d42SBarry Smith im = fill + n + 1; 5083287d42SBarry Smith /* idnew is location of diagonal in factor */ 51*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),& idnew );CHKERRQ(ierr); 5283287d42SBarry Smith idnew[0] = 0; 5383287d42SBarry Smith 5483287d42SBarry Smith for (i=0; i<n; i++) { 5583287d42SBarry Smith /* first copy previous fill into linked list */ 5683287d42SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 5783287d42SBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 5883287d42SBarry Smith ajtmp = aj + ai[r[i]]; 5983287d42SBarry Smith fill[n] = n; 6083287d42SBarry Smith while (nz--) { 6183287d42SBarry Smith fm = n; 6283287d42SBarry Smith idx = ic[*ajtmp++]; 6383287d42SBarry Smith do { 6483287d42SBarry Smith m = fm; 6583287d42SBarry Smith fm = fill[m]; 6683287d42SBarry Smith } while (fm < idx); 6783287d42SBarry Smith fill[m] = idx; 6883287d42SBarry Smith fill[idx] = fm; 6983287d42SBarry Smith } 7083287d42SBarry Smith row = fill[n]; 7183287d42SBarry Smith while (row < i) { 7283287d42SBarry Smith ajtmp = ajnew + idnew[row] + 1; 7383287d42SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 7483287d42SBarry Smith nz = im[row] - nzbd; 7583287d42SBarry Smith fm = row; 7683287d42SBarry Smith while (nz-- > 0) { 7783287d42SBarry Smith idx = *ajtmp++; 7883287d42SBarry Smith nzbd++; 7983287d42SBarry Smith if (idx == i) im[row] = nzbd; 8083287d42SBarry Smith do { 8183287d42SBarry Smith m = fm; 8283287d42SBarry Smith fm = fill[m]; 8383287d42SBarry Smith } while (fm < idx); 8483287d42SBarry Smith if (fm != idx) { 8583287d42SBarry Smith fill[m] = idx; 8683287d42SBarry Smith fill[idx] = fm; 8783287d42SBarry Smith fm = idx; 8883287d42SBarry Smith nnz++; 8983287d42SBarry Smith } 9083287d42SBarry Smith } 9183287d42SBarry Smith row = fill[row]; 9283287d42SBarry Smith } 9383287d42SBarry Smith /* copy new filled row into permanent storage */ 9483287d42SBarry Smith ainew[i+1] = ainew[i] + nnz; 9583287d42SBarry Smith if (ainew[i+1] > jmax) { 9683287d42SBarry Smith 9783287d42SBarry Smith /* estimate how much additional space we will need */ 9883287d42SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 9983287d42SBarry Smith /* just double the memory each time */ 10083287d42SBarry Smith int maxadd = jmax; 10183287d42SBarry Smith /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */ 10283287d42SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 10383287d42SBarry Smith jmax += maxadd; 10483287d42SBarry Smith 10583287d42SBarry Smith /* allocate a longer ajnew */ 106*b0a32e0cSBarry Smith ierr = PetscMalloc(jmax*sizeof(int),&( ajtmp ));CHKERRQ(ierr); 10783287d42SBarry Smith ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));CHKERRQ(ierr); 10883287d42SBarry Smith ierr = PetscFree(ajnew);CHKERRQ(ierr); 10983287d42SBarry Smith ajnew = ajtmp; 11083287d42SBarry Smith realloc++; /* count how many times we realloc */ 11183287d42SBarry Smith } 11283287d42SBarry Smith ajtmp = ajnew + ainew[i]; 11383287d42SBarry Smith fm = fill[n]; 11483287d42SBarry Smith nzi = 0; 11583287d42SBarry Smith im[i] = nnz; 11683287d42SBarry Smith while (nnz--) { 11783287d42SBarry Smith if (fm < i) nzi++; 11883287d42SBarry Smith *ajtmp++ = fm; 11983287d42SBarry Smith fm = fill[fm]; 12083287d42SBarry Smith } 12183287d42SBarry Smith idnew[i] = ainew[i] + nzi; 12283287d42SBarry Smith } 12383287d42SBarry Smith 12483287d42SBarry Smith if (ai[n] != 0) { 12583287d42SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 126*b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 127*b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af); 128*b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af); 129*b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n"); 13083287d42SBarry Smith } else { 131*b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n"); 13283287d42SBarry Smith } 13383287d42SBarry Smith 13483287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 13583287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 13683287d42SBarry Smith 13783287d42SBarry Smith ierr = PetscFree(fill);CHKERRQ(ierr); 13883287d42SBarry Smith 13983287d42SBarry Smith /* put together the new matrix */ 14083287d42SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B);CHKERRQ(ierr); 141*b0a32e0cSBarry Smith PetscLogObjectParent(*B,isicol); 14283287d42SBarry Smith b = (Mat_SeqBAIJ*)(*B)->data; 14383287d42SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 14483287d42SBarry Smith b->singlemalloc = PETSC_FALSE; 14583287d42SBarry Smith /* the next line frees the default space generated by the Create() */ 14683287d42SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 14783287d42SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 148*b0a32e0cSBarry Smith b->a = (MatScalar*)PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2);CHKERRQ(ierr); 14983287d42SBarry Smith b->j = ajnew; 15083287d42SBarry Smith b->i = ainew; 15183287d42SBarry Smith b->diag = idnew; 15283287d42SBarry Smith b->ilen = 0; 15383287d42SBarry Smith b->imax = 0; 15483287d42SBarry Smith b->row = isrow; 15583287d42SBarry Smith b->col = iscol; 15683287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 15783287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 15883287d42SBarry Smith b->icol = isicol; 159*b0a32e0cSBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(Scalar),& b->solve_work );CHKERRQ(ierr); 16083287d42SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 16183287d42SBarry Smith Allocate idnew, solve_work, new a, new j */ 162*b0a32e0cSBarry Smith PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(MatScalar))); 16383287d42SBarry Smith b->maxnz = b->nz = ainew[n]; 16483287d42SBarry Smith 16583287d42SBarry Smith (*B)->factor = FACTOR_LU; 16683287d42SBarry Smith (*B)->info.factor_mallocs = realloc; 16783287d42SBarry Smith (*B)->info.fill_ratio_given = f; 16883287d42SBarry Smith if (ai[n] != 0) { 16983287d42SBarry Smith (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 17083287d42SBarry Smith } else { 17183287d42SBarry Smith (*B)->info.fill_ratio_needed = 0.0; 17283287d42SBarry Smith } 17383287d42SBarry Smith PetscFunctionReturn(0); 17483287d42SBarry Smith } 175