1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang 4dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used in Gather, which is not a big deal */ 5dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Gather; 6dd5b3ca6SJunchao Zhang 79371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Gather(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 8cd620004SJunchao Zhang PetscSFLink link; 9855db38dSJunchao Zhang PetscMPIInt sendcount; 10dd5b3ca6SJunchao Zhang MPI_Comm comm; 11cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; 12cd620004SJunchao Zhang MPI_Request *req; 13dd5b3ca6SJunchao Zhang 14dd5b3ca6SJunchao Zhang PetscFunctionBegin; 159566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 169566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 179566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 189566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 199566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(sf->nroots, &sendcount)); 209566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 219566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 229566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Igather(rootbuf == leafbuf ? MPI_IN_PLACE : rootbuf, sendcount, unit, leafbuf, sendcount, unit, 0 /*rank 0*/, comm, req)); 23dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 24dd5b3ca6SJunchao Zhang } 25dd5b3ca6SJunchao Zhang 269371c9d4SSatish Balay static PetscErrorCode PetscSFReduceBegin_Gather(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 27cd620004SJunchao Zhang PetscSFLink link; 28dd5b3ca6SJunchao Zhang PetscMPIInt recvcount; 29dd5b3ca6SJunchao Zhang MPI_Comm comm; 30cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; 31cd620004SJunchao Zhang MPI_Request *req; 32dd5b3ca6SJunchao Zhang 33dd5b3ca6SJunchao Zhang PetscFunctionBegin; 349566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link)); 359566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 369566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 379566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 389566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(sf->nroots, &recvcount)); 399566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 409566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 419566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Iscatter(leafbuf, recvcount, unit, rootbuf == leafbuf ? MPI_IN_PLACE : rootbuf, recvcount, unit, 0 /*rank 0*/, comm, req)); 42dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 43dd5b3ca6SJunchao Zhang } 44dd5b3ca6SJunchao Zhang 459371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFCreate_Gather(PetscSF sf) { 46dd5b3ca6SJunchao Zhang PetscSF_Gather *dat = (PetscSF_Gather *)sf->data; 47dd5b3ca6SJunchao Zhang 48dd5b3ca6SJunchao Zhang PetscFunctionBegin; 49ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 50cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 51cd620004SJunchao Zhang 52dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 53dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 54dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 55dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 56dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 57dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 58dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 59dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 60dd5b3ca6SJunchao Zhang 61cd620004SJunchao Zhang /* Inherit from Allgather */ 62cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 63cd620004SJunchao Zhang 64dd5b3ca6SJunchao Zhang /* Inherit from Gatherv */ 65dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 66dd5b3ca6SJunchao Zhang 67dd5b3ca6SJunchao Zhang /* Gather stuff */ 68ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Gather; 69dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Gather; 70dd5b3ca6SJunchao Zhang 71*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 72dd5b3ca6SJunchao Zhang sf->data = (void *)dat; 73dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 74dd5b3ca6SJunchao Zhang } 75