1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 2dd5b3ca6SJunchao Zhang 3eb02082bSJunchao Zhang /* Reuse the type. The difference is some fields (displs, recvcounts) are only significant 4eb02082bSJunchao Zhang on rank 0 in Gatherv. On other ranks they are harmless NULL. 5eb02082bSJunchao Zhang */ 6eb02082bSJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Gatherv; 7eb02082bSJunchao Zhang 8f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Gatherv(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 9d71ae5a4SJacob Faibussowitsch { 10f5d27ee7SJunchao Zhang MPI_Comm comm = MPI_COMM_NULL; 11f5d27ee7SJunchao Zhang PetscMPIInt count; 12dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv *)sf->data; 13cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer seen by MPI */ 14f5d27ee7SJunchao Zhang MPI_Request *req = NULL; 15f5d27ee7SJunchao Zhang MPI_Datatype unit = link->unit; 16dd5b3ca6SJunchao Zhang 17dd5b3ca6SJunchao Zhang PetscFunctionBegin; 18f5d27ee7SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 199566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 20f5d27ee7SJunchao Zhang } else { 21f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */)); 22f5d27ee7SJunchao Zhang } 239566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 24f5d27ee7SJunchao Zhang PetscCall(PetscMPIIntCast(sf->nroots, &count)); 25f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, &rootbuf, &leafbuf, &req, NULL)); 26*646b835dSJunchao Zhang PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); 27f5d27ee7SJunchao Zhang 28f5d27ee7SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 29f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Igatherv(rootbuf, count, unit, leafbuf, dat->recvcounts, dat->displs, unit, 0 /*rank 0*/, comm, req)); 30f5d27ee7SJunchao Zhang } else { 31f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Iscatterv(leafbuf, dat->recvcounts, dat->displs, unit, rootbuf, count, unit, 0, comm, req)); 32f5d27ee7SJunchao Zhang } 333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34dd5b3ca6SJunchao Zhang } 35dd5b3ca6SJunchao Zhang 36f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFSetCommunicationOps_Gatherv(PetscSF sf, PetscSFLink link) 37d71ae5a4SJacob Faibussowitsch { 38dd5b3ca6SJunchao Zhang PetscFunctionBegin; 39f5d27ee7SJunchao Zhang link->StartCommunication = PetscSFLinkStartCommunication_Gatherv; 403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41dd5b3ca6SJunchao Zhang } 42dd5b3ca6SJunchao Zhang 43d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Gatherv(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 44d71ae5a4SJacob Faibussowitsch { 45dd5b3ca6SJunchao Zhang PetscFunctionBegin; 46dd5b3ca6SJunchao Zhang /* In Gatherv, each root only has one leaf. So we just need to bcast rootdata to leafupdate and then reduce leafdata to rootdata */ 479566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, unit, rootdata, leafupdate, MPI_REPLACE)); 489566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, unit, rootdata, leafupdate, MPI_REPLACE)); 499566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, unit, leafdata, rootdata, op)); 503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51dd5b3ca6SJunchao Zhang } 52dd5b3ca6SJunchao Zhang 53d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreate_Gatherv(PetscSF sf) 54d71ae5a4SJacob Faibussowitsch { 55dd5b3ca6SJunchao Zhang PetscSF_Gatherv *dat = (PetscSF_Gatherv *)sf->data; 56dd5b3ca6SJunchao Zhang 57dd5b3ca6SJunchao Zhang PetscFunctionBegin; 58f5d27ee7SJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 59ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 60f5d27ee7SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 61cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 62cd620004SJunchao Zhang 63dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 64dd5b3ca6SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgatherv; 65dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 66dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 67dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 68dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 69dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 70dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 71dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 72dd5b3ca6SJunchao Zhang 73dd5b3ca6SJunchao Zhang /* Gatherv stuff */ 74dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 75dd5b3ca6SJunchao Zhang 76f5d27ee7SJunchao Zhang sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Gatherv; 77f5d27ee7SJunchao Zhang 786677b1c1SJunchao Zhang sf->collective = PETSC_TRUE; 796677b1c1SJunchao Zhang 804dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 81dd5b3ca6SJunchao Zhang sf->data = (void *)dat; 823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 83dd5b3ca6SJunchao Zhang } 84