1*e8271787SHong Zhang #include <../src/mat/impls/baij/mpi/mpibaij.h> 2*e8271787SHong Zhang 3*e8271787SHong Zhang #undef __FUNCT__ 4*e8271787SHong Zhang #define __FUNCT__ "MatGetMultiProcBlock_MPIBAIJ" 5*e8271787SHong Zhang PetscErrorCode MatGetMultiProcBlock_MPIBAIJ(Mat mat, MPI_Comm subComm, MatReuse scall,Mat *subMat) 6*e8271787SHong Zhang { 7*e8271787SHong Zhang PetscErrorCode ierr; 8*e8271787SHong Zhang Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 9*e8271787SHong Zhang Mat_SeqBAIJ *aijB = (Mat_SeqBAIJ*)aij->B->data; 10*e8271787SHong Zhang PetscMPIInt commRank,subCommSize,subCommRank; 11*e8271787SHong Zhang PetscMPIInt *commRankMap,subRank,rank,commsize; 12*e8271787SHong Zhang PetscInt *garrayCMap,col,i,j,*nnz,newRow,newCol; 13*e8271787SHong Zhang PetscInt bs=mat->rmap->bs; 14*e8271787SHong Zhang PetscScalar vals[bs*bs]; 15*e8271787SHong Zhang PetscInt newbRow[bs],newbCol[bs],k,k1,k2; 16*e8271787SHong Zhang 17*e8271787SHong Zhang PetscFunctionBegin; 18*e8271787SHong Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&commsize);CHKERRQ(ierr); 19*e8271787SHong Zhang ierr = MPI_Comm_size(subComm,&subCommSize);CHKERRQ(ierr); 20*e8271787SHong Zhang 21*e8271787SHong Zhang /* create subMat object with the relavent layout */ 22*e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 23*e8271787SHong Zhang ierr = MatCreate(subComm,subMat);CHKERRQ(ierr); 24*e8271787SHong Zhang ierr = MatSetType(*subMat,MATMPIBAIJ);CHKERRQ(ierr); 25*e8271787SHong Zhang ierr = MatSetSizes(*subMat,mat->rmap->n,mat->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 26*e8271787SHong Zhang ierr = MatSetBlockSizes(*subMat,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 27*e8271787SHong Zhang 28*e8271787SHong Zhang /* need to setup rmap and cmap before Preallocation */ 29*e8271787SHong Zhang ierr = PetscLayoutSetBlockSize((*subMat)->rmap,mat->rmap->bs);CHKERRQ(ierr); 30*e8271787SHong Zhang ierr = PetscLayoutSetBlockSize((*subMat)->cmap,mat->cmap->bs);CHKERRQ(ierr); 31*e8271787SHong Zhang ierr = PetscLayoutSetUp((*subMat)->rmap);CHKERRQ(ierr); 32*e8271787SHong Zhang ierr = PetscLayoutSetUp((*subMat)->cmap);CHKERRQ(ierr); 33*e8271787SHong Zhang } 34*e8271787SHong Zhang 35*e8271787SHong Zhang /* create a map of comm_rank from subComm to comm - should commRankMap and garrayCMap be kept for reused? */ 36*e8271787SHong Zhang ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&commRank);CHKERRQ(ierr); 37*e8271787SHong Zhang ierr = MPI_Comm_rank(subComm,&subCommRank);CHKERRQ(ierr); 38*e8271787SHong Zhang ierr = PetscMalloc(subCommSize*sizeof(PetscMPIInt),&commRankMap);CHKERRQ(ierr); 39*e8271787SHong Zhang ierr = MPI_Allgather(&commRank,1,MPI_INT,commRankMap,1,MPI_INT,subComm);CHKERRQ(ierr); 40*e8271787SHong Zhang 41*e8271787SHong Zhang /* Traverse garray and identify blocked column indices [of offdiag mat] that 42*e8271787SHong Zhang should be discarded. For the ones not discarded, store the newCol+1 43*e8271787SHong Zhang value in garrayCMap */ 44*e8271787SHong Zhang ierr = PetscMalloc(aij->B->cmap->n/bs*sizeof(PetscInt),&garrayCMap);CHKERRQ(ierr); 45*e8271787SHong Zhang ierr = PetscMemzero(garrayCMap,aij->B->cmap->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 46*e8271787SHong Zhang for (i=0; i<aij->B->cmap->n/bs; i++) { 47*e8271787SHong Zhang col = aij->garray[i]; /* blocked column index */ 48*e8271787SHong Zhang for (subRank=0; subRank<subCommSize; subRank++) { 49*e8271787SHong Zhang rank = commRankMap[subRank]; 50*e8271787SHong Zhang if ((col >= mat->cmap->range[rank]/bs) && (col < mat->cmap->range[rank+1]/bs)) { 51*e8271787SHong Zhang garrayCMap[i] = ((*subMat)->cmap->range[subRank]/bs + col - mat->cmap->range[rank]/bs+1); 52*e8271787SHong Zhang break; 53*e8271787SHong Zhang } 54*e8271787SHong Zhang } 55*e8271787SHong Zhang } 56*e8271787SHong Zhang 57*e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 58*e8271787SHong Zhang /* Now compute preallocation for the offdiag mat */ 59*e8271787SHong Zhang ierr = PetscMalloc(aij->B->rmap->n/bs*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 60*e8271787SHong Zhang ierr = PetscMemzero(nnz,aij->B->rmap->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 61*e8271787SHong Zhang for (i=0; i<aij->B->rmap->n/bs; i++) { 62*e8271787SHong Zhang for (j=aijB->i[i]; j<aijB->i[i+1]; j++) { 63*e8271787SHong Zhang if (garrayCMap[aijB->j[j]]) nnz[i]++; 64*e8271787SHong Zhang } 65*e8271787SHong Zhang } 66*e8271787SHong Zhang ierr = MatMPIBAIJSetPreallocation(*(subMat),bs,0,NULL,0,nnz);CHKERRQ(ierr); 67*e8271787SHong Zhang 68*e8271787SHong Zhang /* reuse diag block with the new submat */ 69*e8271787SHong Zhang ierr = MatDestroy(&((Mat_MPIBAIJ*)((*subMat)->data))->A);CHKERRQ(ierr); 70*e8271787SHong Zhang 71*e8271787SHong Zhang ((Mat_MPIBAIJ*)((*subMat)->data))->A = aij->A; 72*e8271787SHong Zhang 73*e8271787SHong Zhang ierr = PetscObjectReference((PetscObject)aij->A);CHKERRQ(ierr); 74*e8271787SHong Zhang } else if (((Mat_MPIBAIJ*)(*subMat)->data)->A != aij->A) { 75*e8271787SHong Zhang PetscObject obj = (PetscObject)((Mat_MPIBAIJ*)((*subMat)->data))->A; 76*e8271787SHong Zhang 77*e8271787SHong Zhang ierr = PetscObjectReference((PetscObject)obj);CHKERRQ(ierr); 78*e8271787SHong Zhang 79*e8271787SHong Zhang ((Mat_MPIBAIJ*)((*subMat)->data))->A = aij->A; 80*e8271787SHong Zhang 81*e8271787SHong Zhang ierr = PetscObjectReference((PetscObject)aij->A);CHKERRQ(ierr); 82*e8271787SHong Zhang } 83*e8271787SHong Zhang 84*e8271787SHong Zhang /* Now traverse aij->B and insert values into subMat */ 85*e8271787SHong Zhang for (i=0; i<aij->B->rmap->n/bs; i++) { 86*e8271787SHong Zhang newRow = (*subMat)->rmap->range[subCommRank] + i*bs; 87*e8271787SHong Zhang for (j=aijB->i[i]; j<aijB->i[i+1]; j++) { 88*e8271787SHong Zhang newCol = garrayCMap[aijB->j[j]]; 89*e8271787SHong Zhang if (newCol) { 90*e8271787SHong Zhang newCol--; /* remove the increment */ 91*e8271787SHong Zhang newCol *= bs; 92*e8271787SHong Zhang for (k=0; k<bs; k++) { 93*e8271787SHong Zhang newbRow[k] = newRow + k; 94*e8271787SHong Zhang newbCol[k] = newCol + k; 95*e8271787SHong Zhang } 96*e8271787SHong Zhang /* copy column-oriented aijB->a into row-oriented vals */ 97*e8271787SHong Zhang k=0; 98*e8271787SHong Zhang for (k1=0; k1<bs; k1++) { 99*e8271787SHong Zhang for (k2=0; k2<bs; k2++) { 100*e8271787SHong Zhang vals[k1+k2*bs] = *(aijB->a+j*bs*bs + k); k++; 101*e8271787SHong Zhang } 102*e8271787SHong Zhang } 103*e8271787SHong Zhang ierr = MatSetValues(*subMat,bs,newbRow,bs,newbCol,vals,INSERT_VALUES);CHKERRQ(ierr); 104*e8271787SHong Zhang } 105*e8271787SHong Zhang } 106*e8271787SHong Zhang } 107*e8271787SHong Zhang 108*e8271787SHong Zhang /* assemble the submat */ 109*e8271787SHong Zhang ierr = MatAssemblyBegin(*subMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 110*e8271787SHong Zhang ierr = MatAssemblyEnd(*subMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111*e8271787SHong Zhang 112*e8271787SHong Zhang /* deallocate temporary data */ 113*e8271787SHong Zhang ierr = PetscFree(commRankMap);CHKERRQ(ierr); 114*e8271787SHong Zhang ierr = PetscFree(garrayCMap);CHKERRQ(ierr); 115*e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 116*e8271787SHong Zhang ierr = PetscFree(nnz);CHKERRQ(ierr); 117*e8271787SHong Zhang } 118*e8271787SHong Zhang PetscFunctionReturn(0); 119*e8271787SHong Zhang } 120