1*be1d678aSKris Buschelman #define PETSCMAT_DLL 2*be1d678aSKris Buschelman 383287d42SBarry Smith /* 483287d42SBarry Smith Factorization code for BAIJ format. 583287d42SBarry Smith */ 683287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 783287d42SBarry Smith #include "src/inline/ilu.h" 883287d42SBarry Smith 983287d42SBarry Smith /* 1083287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 1183287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 1283287d42SBarry Smith NOT good code reuse. 1383287d42SBarry Smith */ 144a2ae208SSatish Balay #undef __FUNCT__ 154a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 16dfbe8321SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 1783287d42SBarry Smith { 1883287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 1983287d42SBarry Smith IS isicol; 206849ba73SBarry Smith PetscErrorCode ierr; 21690b6cddSBarry Smith PetscInt *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j; 22521d7252SBarry Smith PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = A->bs,bs2=a->bs2; 23418422e8SSatish Balay PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 2483287d42SBarry Smith PetscReal f = 1.0; 2583287d42SBarry Smith 2683287d42SBarry Smith PetscFunctionBegin; 2783287d42SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 2883287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2983287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3083287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3183287d42SBarry Smith 32d3d32019SBarry Smith f = info->fill; 3383287d42SBarry Smith /* get new row pointers */ 34690b6cddSBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 3583287d42SBarry Smith ainew[0] = 0; 3683287d42SBarry Smith /* don't know how many column pointers are needed so estimate */ 37690b6cddSBarry Smith jmax = (PetscInt)(f*ai[n] + 1); 38690b6cddSBarry Smith ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 3983287d42SBarry Smith /* fill is a linked list of nonzeros in active row */ 40690b6cddSBarry Smith ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 4183287d42SBarry Smith im = fill + n + 1; 4283287d42SBarry Smith /* idnew is location of diagonal in factor */ 43690b6cddSBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 4483287d42SBarry Smith idnew[0] = 0; 4583287d42SBarry Smith 4683287d42SBarry Smith for (i=0; i<n; i++) { 4783287d42SBarry Smith /* first copy previous fill into linked list */ 4883287d42SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 4983287d42SBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 5083287d42SBarry Smith ajtmp = aj + ai[r[i]]; 5183287d42SBarry Smith fill[n] = n; 5283287d42SBarry Smith while (nz--) { 5383287d42SBarry Smith fm = n; 5483287d42SBarry Smith idx = ic[*ajtmp++]; 5583287d42SBarry Smith do { 5683287d42SBarry Smith m = fm; 5783287d42SBarry Smith fm = fill[m]; 5883287d42SBarry Smith } while (fm < idx); 5983287d42SBarry Smith fill[m] = idx; 6083287d42SBarry Smith fill[idx] = fm; 6183287d42SBarry Smith } 6283287d42SBarry Smith row = fill[n]; 6383287d42SBarry Smith while (row < i) { 6483287d42SBarry Smith ajtmp = ajnew + idnew[row] + 1; 6583287d42SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 6683287d42SBarry Smith nz = im[row] - nzbd; 6783287d42SBarry Smith fm = row; 6883287d42SBarry Smith while (nz-- > 0) { 6983287d42SBarry Smith idx = *ajtmp++; 7083287d42SBarry Smith nzbd++; 7183287d42SBarry Smith if (idx == i) im[row] = nzbd; 7283287d42SBarry Smith do { 7383287d42SBarry Smith m = fm; 7483287d42SBarry Smith fm = fill[m]; 7583287d42SBarry Smith } while (fm < idx); 7683287d42SBarry Smith if (fm != idx) { 7783287d42SBarry Smith fill[m] = idx; 7883287d42SBarry Smith fill[idx] = fm; 7983287d42SBarry Smith fm = idx; 8083287d42SBarry Smith nnz++; 8183287d42SBarry Smith } 8283287d42SBarry Smith } 8383287d42SBarry Smith row = fill[row]; 8483287d42SBarry Smith } 8583287d42SBarry Smith /* copy new filled row into permanent storage */ 8683287d42SBarry Smith ainew[i+1] = ainew[i] + nnz; 8783287d42SBarry Smith if (ainew[i+1] > jmax) { 8883287d42SBarry Smith 8983287d42SBarry Smith /* estimate how much additional space we will need */ 9083287d42SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 9183287d42SBarry Smith /* just double the memory each time */ 92690b6cddSBarry Smith PetscInt maxadd = jmax; 9383287d42SBarry Smith /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */ 9483287d42SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 9583287d42SBarry Smith jmax += maxadd; 9683287d42SBarry Smith 9783287d42SBarry Smith /* allocate a longer ajnew */ 98690b6cddSBarry Smith ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 99690b6cddSBarry Smith ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(PetscInt));CHKERRQ(ierr); 10083287d42SBarry Smith ierr = PetscFree(ajnew);CHKERRQ(ierr); 10183287d42SBarry Smith ajnew = ajtmp; 102418422e8SSatish Balay reallocs++; /* count how many times we realloc */ 10383287d42SBarry Smith } 10483287d42SBarry Smith ajtmp = ajnew + ainew[i]; 10583287d42SBarry Smith fm = fill[n]; 10683287d42SBarry Smith nzi = 0; 10783287d42SBarry Smith im[i] = nnz; 10883287d42SBarry Smith while (nnz--) { 10983287d42SBarry Smith if (fm < i) nzi++; 11083287d42SBarry Smith *ajtmp++ = fm; 11183287d42SBarry Smith fm = fill[fm]; 11283287d42SBarry Smith } 11383287d42SBarry Smith idnew[i] = ainew[i] + nzi; 11483287d42SBarry Smith } 11583287d42SBarry Smith 11663ba0a88SBarry Smith #if defined(PETSC_USE_DEBUG) 11783287d42SBarry Smith if (ai[n] != 0) { 11883287d42SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 11963ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 12063ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr); 12163ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 12263ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n"));CHKERRQ(ierr); 12383287d42SBarry Smith } else { 12463ba0a88SBarry Smith ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n"));CHKERRQ(ierr); 12583287d42SBarry Smith } 12663ba0a88SBarry Smith #endif 12783287d42SBarry Smith 12883287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 12983287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 13083287d42SBarry Smith 13183287d42SBarry Smith ierr = PetscFree(fill);CHKERRQ(ierr); 13283287d42SBarry Smith 13383287d42SBarry Smith /* put together the new matrix */ 134f204ca49SKris Buschelman ierr = MatCreate(A->comm,bs*n,bs*n,bs*n,bs*n,B);CHKERRQ(ierr); 135f204ca49SKris Buschelman ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 136f204ca49SKris Buschelman ierr = MatSeqBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 13752e6d16bSBarry Smith ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 13883287d42SBarry Smith b = (Mat_SeqBAIJ*)(*B)->data; 13983287d42SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 14083287d42SBarry Smith b->singlemalloc = PETSC_FALSE; 14183287d42SBarry Smith /* the next line frees the default space generated by the Create() */ 14283287d42SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 14383287d42SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 14482502324SSatish Balay ierr = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 14583287d42SBarry Smith b->j = ajnew; 14683287d42SBarry Smith b->i = ainew; 14783287d42SBarry Smith b->diag = idnew; 14883287d42SBarry Smith b->ilen = 0; 14983287d42SBarry Smith b->imax = 0; 15083287d42SBarry Smith b->row = isrow; 15183287d42SBarry Smith b->col = iscol; 152d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 15383287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 15483287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 15583287d42SBarry Smith b->icol = isicol; 15687828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 15783287d42SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 15883287d42SBarry Smith Allocate idnew, solve_work, new a, new j */ 15952e6d16bSBarry Smith ierr = PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 16083287d42SBarry Smith b->maxnz = b->nz = ainew[n]; 16183287d42SBarry Smith 16283287d42SBarry Smith (*B)->factor = FACTOR_LU; 163418422e8SSatish Balay (*B)->info.factor_mallocs = reallocs; 16483287d42SBarry Smith (*B)->info.fill_ratio_given = f; 16583287d42SBarry Smith if (ai[n] != 0) { 16683287d42SBarry Smith (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 16783287d42SBarry Smith } else { 16883287d42SBarry Smith (*B)->info.fill_ratio_needed = 0.0; 16983287d42SBarry Smith } 17083287d42SBarry Smith PetscFunctionReturn(0); 17183287d42SBarry Smith } 172