1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang 4dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used in Gather, which is not a big deal */ 5dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Gather; 6dd5b3ca6SJunchao Zhang 7ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Gather(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 8dd5b3ca6SJunchao Zhang { 9cd620004SJunchao Zhang PetscSFLink link; 10855db38dSJunchao Zhang PetscMPIInt sendcount; 11dd5b3ca6SJunchao Zhang MPI_Comm comm; 12cd620004SJunchao Zhang void *rootbuf = NULL,*leafbuf = NULL; 13cd620004SJunchao Zhang MPI_Request *req; 14dd5b3ca6SJunchao Zhang 15dd5b3ca6SJunchao Zhang PetscFunctionBegin; 16*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link)); 17*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata)); 18*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 19*9566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 20*9566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(sf->nroots,&sendcount)); 21*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL)); 22*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 23*9566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Igather(rootbuf == leafbuf ? MPI_IN_PLACE : rootbuf,sendcount,unit,leafbuf,sendcount,unit,0/*rank 0*/,comm,req)); 24dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 25dd5b3ca6SJunchao Zhang } 26dd5b3ca6SJunchao Zhang 27eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Gather(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 28dd5b3ca6SJunchao Zhang { 29cd620004SJunchao Zhang PetscSFLink link; 30dd5b3ca6SJunchao Zhang PetscMPIInt recvcount; 31dd5b3ca6SJunchao Zhang MPI_Comm comm; 32cd620004SJunchao Zhang void *rootbuf = NULL,*leafbuf = NULL; 33cd620004SJunchao Zhang MPI_Request *req; 34dd5b3ca6SJunchao Zhang 35dd5b3ca6SJunchao Zhang PetscFunctionBegin; 36*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link)); 37*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata)); 38*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 39*9566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 40*9566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(sf->nroots,&recvcount)); 41*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL)); 42*9566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT)); 43*9566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Iscatter(leafbuf,recvcount,unit,rootbuf == leafbuf ? MPI_IN_PLACE : rootbuf,recvcount,unit,0/*rank 0*/,comm,req)); 44dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 45dd5b3ca6SJunchao Zhang } 46dd5b3ca6SJunchao Zhang 47dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Gather(PetscSF sf) 48dd5b3ca6SJunchao Zhang { 49dd5b3ca6SJunchao Zhang PetscSF_Gather *dat = (PetscSF_Gather*)sf->data; 50dd5b3ca6SJunchao Zhang 51dd5b3ca6SJunchao Zhang PetscFunctionBegin; 52ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 53cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 54cd620004SJunchao Zhang 55dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv */ 56dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 57dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 58dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 59dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 60dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 61dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 62dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 63dd5b3ca6SJunchao Zhang 64cd620004SJunchao Zhang /* Inherit from Allgather */ 65cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 66cd620004SJunchao Zhang 67dd5b3ca6SJunchao Zhang /* Inherit from Gatherv */ 68dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 69dd5b3ca6SJunchao Zhang 70dd5b3ca6SJunchao Zhang /* Gather stuff */ 71ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Gather; 72dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Gather; 73dd5b3ca6SJunchao Zhang 74*9566063dSJacob Faibussowitsch PetscCall(PetscNewLog(sf,&dat)); 75dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 76dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 77dd5b3ca6SJunchao Zhang } 78