1e8271787SHong Zhang #include <../src/mat/impls/baij/mpi/mpibaij.h> 2e8271787SHong Zhang 3e8271787SHong Zhang #undef __FUNCT__ 4e8271787SHong Zhang #define __FUNCT__ "MatGetMultiProcBlock_MPIBAIJ" 5e8271787SHong Zhang PetscErrorCode MatGetMultiProcBlock_MPIBAIJ(Mat mat, MPI_Comm subComm, MatReuse scall,Mat *subMat) 6e8271787SHong Zhang { 7e8271787SHong Zhang PetscErrorCode ierr; 8e8271787SHong Zhang Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 9e8271787SHong Zhang Mat_SeqBAIJ *aijB = (Mat_SeqBAIJ*)aij->B->data; 10e8271787SHong Zhang PetscMPIInt commRank,subCommSize,subCommRank; 11e8271787SHong Zhang PetscMPIInt *commRankMap,subRank,rank,commsize; 1202e9429cSHong Zhang PetscInt *garrayCMap,col,i,j,*nnz,newRow,newCol,*newbRow,*newbCol,k,k1; 13e8271787SHong Zhang PetscInt bs=mat->rmap->bs; 1402e9429cSHong Zhang PetscScalar *vals,*aijBvals; 15e8271787SHong Zhang 16e8271787SHong Zhang PetscFunctionBegin; 17e8271787SHong Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&commsize);CHKERRQ(ierr); 18e8271787SHong Zhang ierr = MPI_Comm_size(subComm,&subCommSize);CHKERRQ(ierr); 19e8271787SHong Zhang 20e8271787SHong Zhang /* create subMat object with the relavent layout */ 21e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 22e8271787SHong Zhang ierr = MatCreate(subComm,subMat);CHKERRQ(ierr); 23e8271787SHong Zhang ierr = MatSetType(*subMat,MATMPIBAIJ);CHKERRQ(ierr); 24e8271787SHong Zhang ierr = MatSetSizes(*subMat,mat->rmap->n,mat->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 25e8271787SHong Zhang ierr = MatSetBlockSizes(*subMat,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 26e8271787SHong Zhang 27e8271787SHong Zhang /* need to setup rmap and cmap before Preallocation */ 28e8271787SHong Zhang ierr = PetscLayoutSetBlockSize((*subMat)->rmap,mat->rmap->bs);CHKERRQ(ierr); 29e8271787SHong Zhang ierr = PetscLayoutSetBlockSize((*subMat)->cmap,mat->cmap->bs);CHKERRQ(ierr); 30e8271787SHong Zhang ierr = PetscLayoutSetUp((*subMat)->rmap);CHKERRQ(ierr); 31e8271787SHong Zhang ierr = PetscLayoutSetUp((*subMat)->cmap);CHKERRQ(ierr); 32e8271787SHong Zhang } 33e8271787SHong Zhang 34e8271787SHong Zhang /* create a map of comm_rank from subComm to comm - should commRankMap and garrayCMap be kept for reused? */ 35e8271787SHong Zhang ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&commRank);CHKERRQ(ierr); 36e8271787SHong Zhang ierr = MPI_Comm_rank(subComm,&subCommRank);CHKERRQ(ierr); 37*785e854fSJed Brown ierr = PetscMalloc1(subCommSize,&commRankMap);CHKERRQ(ierr); 38e8271787SHong Zhang ierr = MPI_Allgather(&commRank,1,MPI_INT,commRankMap,1,MPI_INT,subComm);CHKERRQ(ierr); 39e8271787SHong Zhang 40e8271787SHong Zhang /* Traverse garray and identify blocked column indices [of offdiag mat] that 41e8271787SHong Zhang should be discarded. For the ones not discarded, store the newCol+1 42e8271787SHong Zhang value in garrayCMap */ 43*785e854fSJed Brown ierr = PetscMalloc1(aij->B->cmap->n/bs,&garrayCMap);CHKERRQ(ierr); 44e8271787SHong Zhang ierr = PetscMemzero(garrayCMap,aij->B->cmap->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 45e8271787SHong Zhang for (i=0; i<aij->B->cmap->n/bs; i++) { 46e8271787SHong Zhang col = aij->garray[i]; /* blocked column index */ 47e8271787SHong Zhang for (subRank=0; subRank<subCommSize; subRank++) { 48e8271787SHong Zhang rank = commRankMap[subRank]; 49e8271787SHong Zhang if ((col >= mat->cmap->range[rank]/bs) && (col < mat->cmap->range[rank+1]/bs)) { 5002e9429cSHong Zhang garrayCMap[i] = (((*subMat)->cmap->range[subRank]- mat->cmap->range[rank])/bs + col + 1); 51e8271787SHong Zhang break; 52e8271787SHong Zhang } 53e8271787SHong Zhang } 54e8271787SHong Zhang } 55e8271787SHong Zhang 56e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 57e8271787SHong Zhang /* Now compute preallocation for the offdiag mat */ 58*785e854fSJed Brown ierr = PetscMalloc1(aij->B->rmap->n/bs,&nnz);CHKERRQ(ierr); 59e8271787SHong Zhang ierr = PetscMemzero(nnz,aij->B->rmap->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 60e8271787SHong Zhang for (i=0; i<aij->B->rmap->n/bs; i++) { 61e8271787SHong Zhang for (j=aijB->i[i]; j<aijB->i[i+1]; j++) { 62e8271787SHong Zhang if (garrayCMap[aijB->j[j]]) nnz[i]++; 63e8271787SHong Zhang } 64e8271787SHong Zhang } 65e8271787SHong Zhang ierr = MatMPIBAIJSetPreallocation(*(subMat),bs,0,NULL,0,nnz);CHKERRQ(ierr); 66e8271787SHong Zhang 67e8271787SHong Zhang /* reuse diag block with the new submat */ 68e8271787SHong Zhang ierr = MatDestroy(&((Mat_MPIBAIJ*)((*subMat)->data))->A);CHKERRQ(ierr); 69e8271787SHong Zhang 70e8271787SHong Zhang ((Mat_MPIBAIJ*)((*subMat)->data))->A = aij->A; 71e8271787SHong Zhang 72e8271787SHong Zhang ierr = PetscObjectReference((PetscObject)aij->A);CHKERRQ(ierr); 73e8271787SHong Zhang } else if (((Mat_MPIBAIJ*)(*subMat)->data)->A != aij->A) { 74e8271787SHong Zhang PetscObject obj = (PetscObject)((Mat_MPIBAIJ*)((*subMat)->data))->A; 75e8271787SHong Zhang 76e8271787SHong Zhang ierr = PetscObjectReference((PetscObject)obj);CHKERRQ(ierr); 77e8271787SHong Zhang 78e8271787SHong Zhang ((Mat_MPIBAIJ*)((*subMat)->data))->A = aij->A; 79e8271787SHong Zhang 80e8271787SHong Zhang ierr = PetscObjectReference((PetscObject)aij->A);CHKERRQ(ierr); 81e8271787SHong Zhang } 82e8271787SHong Zhang 83e8271787SHong Zhang /* Now traverse aij->B and insert values into subMat */ 84dcca6d9dSJed Brown ierr = PetscMalloc3(bs,&newbRow,bs,&newbCol,bs*bs,&vals);CHKERRQ(ierr); 85e8271787SHong Zhang for (i=0; i<aij->B->rmap->n/bs; i++) { 86e8271787SHong Zhang newRow = (*subMat)->rmap->range[subCommRank] + i*bs; 87e8271787SHong Zhang for (j=aijB->i[i]; j<aijB->i[i+1]; j++) { 88e8271787SHong Zhang newCol = garrayCMap[aijB->j[j]]; 89e8271787SHong Zhang if (newCol) { 90e8271787SHong Zhang newCol--; /* remove the increment */ 91e8271787SHong Zhang newCol *= bs; 92e8271787SHong Zhang for (k=0; k<bs; k++) { 93e8271787SHong Zhang newbRow[k] = newRow + k; 94e8271787SHong Zhang newbCol[k] = newCol + k; 95e8271787SHong Zhang } 96e8271787SHong Zhang /* copy column-oriented aijB->a into row-oriented vals */ 9702e9429cSHong Zhang aijBvals = aijB->a + j*bs*bs; 98e8271787SHong Zhang for (k1=0; k1<bs; k1++) { 9902e9429cSHong Zhang for (k=0; k<bs; k++) { 10002e9429cSHong Zhang vals[k1+k*bs] = *aijBvals++; 101e8271787SHong Zhang } 102e8271787SHong Zhang } 103e8271787SHong Zhang ierr = MatSetValues(*subMat,bs,newbRow,bs,newbCol,vals,INSERT_VALUES);CHKERRQ(ierr); 104e8271787SHong Zhang } 105e8271787SHong Zhang } 106e8271787SHong Zhang } 107e8271787SHong Zhang ierr = MatAssemblyBegin(*subMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 108e8271787SHong Zhang ierr = MatAssemblyEnd(*subMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 109e8271787SHong Zhang 110e8271787SHong Zhang /* deallocate temporary data */ 11102e9429cSHong Zhang ierr = PetscFree3(newbRow,newbCol,vals);CHKERRQ(ierr); 112e8271787SHong Zhang ierr = PetscFree(commRankMap);CHKERRQ(ierr); 113e8271787SHong Zhang ierr = PetscFree(garrayCMap);CHKERRQ(ierr); 114e8271787SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 115e8271787SHong Zhang ierr = PetscFree(nnz);CHKERRQ(ierr); 116e8271787SHong Zhang } 117e8271787SHong Zhang PetscFunctionReturn(0); 118e8271787SHong Zhang } 119