1dd5b3ca6SJunchao Zhang 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 3dd5b3ca6SJunchao Zhang 4eb02082bSJunchao Zhang /* Reuse the type. The difference is some fields (displs, recvcounts) are only significant 5eb02082bSJunchao Zhang on rank 0 in Gatherv. On other ranks they are harmless NULL. 6eb02082bSJunchao Zhang */ 7eb02082bSJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Gatherv; 8eb02082bSJunchao Zhang 99371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Gatherv(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 10cd620004SJunchao Zhang PetscSFLink link; 11855db38dSJunchao Zhang PetscMPIInt sendcount; 12dd5b3ca6SJunchao Zhang MPI_Comm comm; 13dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv *)sf->data; 14cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer seen by MPI */ 15cd620004SJunchao Zhang MPI_Request *req; 16dd5b3ca6SJunchao Zhang 17dd5b3ca6SJunchao Zhang PetscFunctionBegin; 189566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 199566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 209566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 219566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 229566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(sf->nroots, &sendcount)); 239566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 249566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 259566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Igatherv(rootbuf, sendcount, unit, leafbuf, dat->recvcounts, dat->displs, unit, 0 /*rank 0*/, comm, req)); 26dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 27dd5b3ca6SJunchao Zhang } 28dd5b3ca6SJunchao Zhang 299371c9d4SSatish Balay static PetscErrorCode PetscSFReduceBegin_Gatherv(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 30cd620004SJunchao Zhang PetscSFLink link; 31dd5b3ca6SJunchao Zhang PetscMPIInt recvcount; 32dd5b3ca6SJunchao Zhang MPI_Comm comm; 33dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv *)sf->data; 34cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer seen by MPI */ 35cd620004SJunchao Zhang MPI_Request *req; 36dd5b3ca6SJunchao Zhang 37dd5b3ca6SJunchao Zhang PetscFunctionBegin; 389566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link)); 399566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 409566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 419566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 429566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(sf->nroots, &recvcount)); 439566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 449566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 459566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Iscatterv(leafbuf, dat->recvcounts, dat->displs, unit, rootbuf, recvcount, unit, 0, comm, req)); 46dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 47dd5b3ca6SJunchao Zhang } 48dd5b3ca6SJunchao Zhang 499371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Gatherv(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) { 50dd5b3ca6SJunchao Zhang PetscFunctionBegin; 51dd5b3ca6SJunchao Zhang /* In Gatherv, each root only has one leaf. So we just need to bcast rootdata to leafupdate and then reduce leafdata to rootdata */ 529566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, unit, rootdata, leafupdate, MPI_REPLACE)); 539566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, unit, rootdata, leafupdate, MPI_REPLACE)); 549566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, unit, leafdata, rootdata, op)); 55dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 56dd5b3ca6SJunchao Zhang } 57dd5b3ca6SJunchao Zhang 589371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFCreate_Gatherv(PetscSF sf) { 59dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv *)sf->data; 60dd5b3ca6SJunchao Zhang 61dd5b3ca6SJunchao Zhang PetscFunctionBegin; 62ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 63cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 64cd620004SJunchao Zhang 65dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 66dd5b3ca6SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgatherv; 67dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 68dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 69dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 70dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 71dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 72dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 73dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 74dd5b3ca6SJunchao Zhang 75dd5b3ca6SJunchao Zhang /* Gatherv stuff */ 76ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Gatherv; 77dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Gatherv; 78dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 79dd5b3ca6SJunchao Zhang 80*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 81dd5b3ca6SJunchao Zhang sf->data = (void *)dat; 82dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 83dd5b3ca6SJunchao Zhang } 84