183287d42SBarry Smith /* 283287d42SBarry Smith Factorization code for BAIJ format. 383287d42SBarry Smith */ 483287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 583287d42SBarry Smith #include "src/inline/ilu.h" 683287d42SBarry Smith 783287d42SBarry Smith /* 883287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 983287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 1083287d42SBarry Smith NOT good code reuse. 1183287d42SBarry Smith */ 124a2ae208SSatish Balay #undef __FUNCT__ 134a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 14dfbe8321SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 1583287d42SBarry Smith { 1683287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 1783287d42SBarry Smith IS isicol; 18*6849ba73SBarry Smith PetscErrorCode ierr; 19*6849ba73SBarry Smith int *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j; 2083287d42SBarry Smith int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = a->bs,bs2=a->bs2; 2183287d42SBarry Smith int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 2283287d42SBarry Smith PetscReal f = 1.0; 2383287d42SBarry Smith 2483287d42SBarry Smith PetscFunctionBegin; 2583287d42SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 2683287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2783287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2883287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2983287d42SBarry Smith 30d3d32019SBarry Smith f = info->fill; 3183287d42SBarry Smith /* get new row pointers */ 32b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 3383287d42SBarry Smith ainew[0] = 0; 3483287d42SBarry Smith /* don't know how many column pointers are needed so estimate */ 3583287d42SBarry Smith jmax = (int)(f*ai[n] + 1); 3682502324SSatish Balay ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 3783287d42SBarry Smith /* fill is a linked list of nonzeros in active row */ 38b0a32e0cSBarry Smith ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 3983287d42SBarry Smith im = fill + n + 1; 4083287d42SBarry Smith /* idnew is location of diagonal in factor */ 41b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 4283287d42SBarry Smith idnew[0] = 0; 4383287d42SBarry Smith 4483287d42SBarry Smith for (i=0; i<n; i++) { 4583287d42SBarry Smith /* first copy previous fill into linked list */ 4683287d42SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 4783287d42SBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 4883287d42SBarry Smith ajtmp = aj + ai[r[i]]; 4983287d42SBarry Smith fill[n] = n; 5083287d42SBarry Smith while (nz--) { 5183287d42SBarry Smith fm = n; 5283287d42SBarry Smith idx = ic[*ajtmp++]; 5383287d42SBarry Smith do { 5483287d42SBarry Smith m = fm; 5583287d42SBarry Smith fm = fill[m]; 5683287d42SBarry Smith } while (fm < idx); 5783287d42SBarry Smith fill[m] = idx; 5883287d42SBarry Smith fill[idx] = fm; 5983287d42SBarry Smith } 6083287d42SBarry Smith row = fill[n]; 6183287d42SBarry Smith while (row < i) { 6283287d42SBarry Smith ajtmp = ajnew + idnew[row] + 1; 6383287d42SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 6483287d42SBarry Smith nz = im[row] - nzbd; 6583287d42SBarry Smith fm = row; 6683287d42SBarry Smith while (nz-- > 0) { 6783287d42SBarry Smith idx = *ajtmp++; 6883287d42SBarry Smith nzbd++; 6983287d42SBarry Smith if (idx == i) im[row] = nzbd; 7083287d42SBarry Smith do { 7183287d42SBarry Smith m = fm; 7283287d42SBarry Smith fm = fill[m]; 7383287d42SBarry Smith } while (fm < idx); 7483287d42SBarry Smith if (fm != idx) { 7583287d42SBarry Smith fill[m] = idx; 7683287d42SBarry Smith fill[idx] = fm; 7783287d42SBarry Smith fm = idx; 7883287d42SBarry Smith nnz++; 7983287d42SBarry Smith } 8083287d42SBarry Smith } 8183287d42SBarry Smith row = fill[row]; 8283287d42SBarry Smith } 8383287d42SBarry Smith /* copy new filled row into permanent storage */ 8483287d42SBarry Smith ainew[i+1] = ainew[i] + nnz; 8583287d42SBarry Smith if (ainew[i+1] > jmax) { 8683287d42SBarry Smith 8783287d42SBarry Smith /* estimate how much additional space we will need */ 8883287d42SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 8983287d42SBarry Smith /* just double the memory each time */ 9083287d42SBarry Smith int maxadd = jmax; 9183287d42SBarry Smith /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */ 9283287d42SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 9383287d42SBarry Smith jmax += maxadd; 9483287d42SBarry Smith 9583287d42SBarry Smith /* allocate a longer ajnew */ 9682502324SSatish Balay ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 9783287d42SBarry Smith ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));CHKERRQ(ierr); 9883287d42SBarry Smith ierr = PetscFree(ajnew);CHKERRQ(ierr); 9983287d42SBarry Smith ajnew = ajtmp; 10083287d42SBarry Smith realloc++; /* count how many times we realloc */ 10183287d42SBarry Smith } 10283287d42SBarry Smith ajtmp = ajnew + ainew[i]; 10383287d42SBarry Smith fm = fill[n]; 10483287d42SBarry Smith nzi = 0; 10583287d42SBarry Smith im[i] = nnz; 10683287d42SBarry Smith while (nnz--) { 10783287d42SBarry Smith if (fm < i) nzi++; 10883287d42SBarry Smith *ajtmp++ = fm; 10983287d42SBarry Smith fm = fill[fm]; 11083287d42SBarry Smith } 11183287d42SBarry Smith idnew[i] = ainew[i] + nzi; 11283287d42SBarry Smith } 11383287d42SBarry Smith 11483287d42SBarry Smith if (ai[n] != 0) { 11583287d42SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 116b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 117b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use \n",af); 118b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);\n",af); 119b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.\n"); 12083287d42SBarry Smith } else { 121b0a32e0cSBarry Smith PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.\n"); 12283287d42SBarry Smith } 12383287d42SBarry Smith 12483287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 12583287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 12683287d42SBarry Smith 12783287d42SBarry Smith ierr = PetscFree(fill);CHKERRQ(ierr); 12883287d42SBarry Smith 12983287d42SBarry Smith /* put together the new matrix */ 130f204ca49SKris Buschelman ierr = MatCreate(A->comm,bs*n,bs*n,bs*n,bs*n,B);CHKERRQ(ierr); 131f204ca49SKris Buschelman ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 132f204ca49SKris Buschelman ierr = MatSeqBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 133b0a32e0cSBarry Smith PetscLogObjectParent(*B,isicol); 13483287d42SBarry Smith b = (Mat_SeqBAIJ*)(*B)->data; 13583287d42SBarry Smith ierr = PetscFree(b->imax);CHKERRQ(ierr); 13683287d42SBarry Smith b->singlemalloc = PETSC_FALSE; 13783287d42SBarry Smith /* the next line frees the default space generated by the Create() */ 13883287d42SBarry Smith ierr = PetscFree(b->a);CHKERRQ(ierr); 13983287d42SBarry Smith ierr = PetscFree(b->ilen);CHKERRQ(ierr); 14082502324SSatish Balay ierr = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 14183287d42SBarry Smith b->j = ajnew; 14283287d42SBarry Smith b->i = ainew; 14383287d42SBarry Smith b->diag = idnew; 14483287d42SBarry Smith b->ilen = 0; 14583287d42SBarry Smith b->imax = 0; 14683287d42SBarry Smith b->row = isrow; 14783287d42SBarry Smith b->col = iscol; 148d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 14983287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 15083287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 15183287d42SBarry Smith b->icol = isicol; 15287828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 15383287d42SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 15483287d42SBarry Smith Allocate idnew, solve_work, new a, new j */ 155b0a32e0cSBarry Smith PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(MatScalar))); 15683287d42SBarry Smith b->maxnz = b->nz = ainew[n]; 15783287d42SBarry Smith 15883287d42SBarry Smith (*B)->factor = FACTOR_LU; 15983287d42SBarry Smith (*B)->info.factor_mallocs = realloc; 16083287d42SBarry Smith (*B)->info.fill_ratio_given = f; 16183287d42SBarry Smith if (ai[n] != 0) { 16283287d42SBarry Smith (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 16383287d42SBarry Smith } else { 16483287d42SBarry Smith (*B)->info.fill_ratio_needed = 0.0; 16583287d42SBarry Smith } 16683287d42SBarry Smith PetscFunctionReturn(0); 16783287d42SBarry Smith } 168