1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang 3dd5b3ca6SJunchao Zhang #define PetscSFPackGet_Allgather PetscSFPackGet_Allgatherv 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used in Allgather on rank != 0, which is not a big deal */ 6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Allgather; 7dd5b3ca6SJunchao Zhang 8*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gather(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,void*,MPI_Op); 9dd5b3ca6SJunchao Zhang 10*eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Allgather(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 11dd5b3ca6SJunchao Zhang { 12dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 13*eb02082bSJunchao Zhang PetscSFPack link; 14dd5b3ca6SJunchao Zhang PetscMPIInt sendcount; 15dd5b3ca6SJunchao Zhang MPI_Comm comm; 16dd5b3ca6SJunchao Zhang 17dd5b3ca6SJunchao Zhang PetscFunctionBegin; 18*eb02082bSJunchao Zhang ierr = PetscSFPackGet_Allgather(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 19dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 20dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 21dd5b3ca6SJunchao Zhang 22dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 23*eb02082bSJunchao Zhang ierr = MPIU_Iallgather(rootdata,sendcount,unit,leafdata,sendcount,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype]);CHKERRQ(ierr); 24dd5b3ca6SJunchao Zhang } else { 25dd5b3ca6SJunchao Zhang /* Allgather to the leaf buffer and then add leaf buffer to rootdata */ 26*eb02082bSJunchao Zhang if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);} 27*eb02082bSJunchao Zhang ierr = MPIU_Iallgather(rootdata,sendcount,unit,link->leafbuf[leafmtype],sendcount,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype]);CHKERRQ(ierr); 28dd5b3ca6SJunchao Zhang } 29dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 30dd5b3ca6SJunchao Zhang } 31dd5b3ca6SJunchao Zhang 32*eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastToZero_Allgather(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata) 33dd5b3ca6SJunchao Zhang { 34dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 35*eb02082bSJunchao Zhang PetscSFPack link; 36dd5b3ca6SJunchao Zhang 37dd5b3ca6SJunchao Zhang PetscFunctionBegin; 38*eb02082bSJunchao Zhang ierr = PetscSFBcastAndOpBegin_Gather(sf,unit,rootmtype,rootdata,leafmtype,leafdata,MPIU_REPLACE);CHKERRQ(ierr); 39dd5b3ca6SJunchao Zhang /* A simplified PetscSFBcastAndOpEnd_Allgatherv */ 40*eb02082bSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 41*eb02082bSJunchao Zhang ierr = MPI_Wait(link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype],MPI_STATUS_IGNORE);CHKERRQ(ierr); 42*eb02082bSJunchao Zhang ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 43dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 44dd5b3ca6SJunchao Zhang } 45dd5b3ca6SJunchao Zhang 46*eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Allgather(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 47dd5b3ca6SJunchao Zhang { 48dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 49*eb02082bSJunchao Zhang PetscSFPack link; 50dd5b3ca6SJunchao Zhang PetscMPIInt rank,count,sendcount; 51dd5b3ca6SJunchao Zhang PetscInt rstart; 52dd5b3ca6SJunchao Zhang MPI_Comm comm; 53dd5b3ca6SJunchao Zhang 54dd5b3ca6SJunchao Zhang PetscFunctionBegin; 55*eb02082bSJunchao Zhang ierr = PetscSFPackGet_Allgather(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 56dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 57dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 58dd5b3ca6SJunchao Zhang 59dd5b3ca6SJunchao Zhang if (op == MPIU_REPLACE) { 60dd5b3ca6SJunchao Zhang /* REPLACE is only meaningful when all processes have the same leafdata to reduce. Therefore copy from local leafdata is fine */ 61dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 62*eb02082bSJunchao Zhang ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr); 63dd5b3ca6SJunchao Zhang } else { 64dd5b3ca6SJunchao Zhang /* Reduce all leafdata on rank 0, then scatter the result to root buffer, then reduce root buffer to leafdata */ 65*eb02082bSJunchao Zhang if (!rank && !link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);} 66dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nleaves*link->bs,&count);CHKERRQ(ierr); 67dd5b3ca6SJunchao Zhang ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 68*eb02082bSJunchao Zhang ierr = MPI_Reduce(leafdata,link->leafbuf[leafmtype],count,link->basicunit,op,0/*rank 0*/,comm);CHKERRQ(ierr); /* Must do reduce with MPI builltin datatype basicunit */ 69*eb02082bSJunchao Zhang if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,sf->nroots*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);} /* Allocate root buffer */ 70*eb02082bSJunchao Zhang ierr = MPIU_Iscatter(link->leafbuf[leafmtype],sendcount,unit,link->rootbuf[rootmtype],sendcount,unit,0/*rank 0*/,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype]);CHKERRQ(ierr); 71dd5b3ca6SJunchao Zhang } 72dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 73dd5b3ca6SJunchao Zhang } 74dd5b3ca6SJunchao Zhang 75dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Allgather(PetscSF sf) 76dd5b3ca6SJunchao Zhang { 77dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 78dd5b3ca6SJunchao Zhang PetscSF_Allgather *dat = (PetscSF_Allgather*)sf->data; 79dd5b3ca6SJunchao Zhang 80dd5b3ca6SJunchao Zhang PetscFunctionBegin; 81dd5b3ca6SJunchao Zhang 82dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 83dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 84dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 85dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 86dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 87dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv; 88dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 89dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 90dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 91dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 92dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 93dd5b3ca6SJunchao Zhang 94dd5b3ca6SJunchao Zhang /* Allgather stuff */ 95dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgather; 96dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Allgather; 97dd5b3ca6SJunchao Zhang sf->ops->BcastToZero = PetscSFBcastToZero_Allgather; 98dd5b3ca6SJunchao Zhang 99dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 100dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 101dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 102dd5b3ca6SJunchao Zhang } 103