1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 7dd5b3ca6SJunchao Zhang 8dd5b3ca6SJunchao Zhang /*===================================================================================*/ 9dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 10dd5b3ca6SJunchao Zhang /*===================================================================================*/ 11dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 12dd5b3ca6SJunchao Zhang { 13dd5b3ca6SJunchao Zhang PetscInt i; 14dd5b3ca6SJunchao Zhang 15dd5b3ca6SJunchao Zhang PetscFunctionBegin; 16dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 17dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 18dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 19dd5b3ca6SJunchao Zhang if (iremote) { 20dd5b3ca6SJunchao Zhang if (!sf->remote) { 219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves,&sf->remote)); 22dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 23dd5b3ca6SJunchao Zhang for (i=0; i<sf->nleaves; i++) { 24dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 25dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 26dd5b3ca6SJunchao Zhang } 27dd5b3ca6SJunchao Zhang } 28dd5b3ca6SJunchao Zhang *iremote = sf->remote; 29dd5b3ca6SJunchao Zhang } 30dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 31dd5b3ca6SJunchao Zhang } 32dd5b3ca6SJunchao Zhang 33ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 34dd5b3ca6SJunchao Zhang { 35cd620004SJunchao Zhang PetscSFLink link; 36dd5b3ca6SJunchao Zhang MPI_Comm comm; 37cd620004SJunchao Zhang void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 38cd620004SJunchao Zhang MPI_Request *req; 39dd5b3ca6SJunchao Zhang 40dd5b3ca6SJunchao Zhang PetscFunctionBegin; 419566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link)); 429566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata)); 439566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 449566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 459566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL)); 469566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 479566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req)); 48dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 49dd5b3ca6SJunchao Zhang } 50dd5b3ca6SJunchao Zhang 51eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 52dd5b3ca6SJunchao Zhang { 53cd620004SJunchao Zhang PetscSFLink link; 54dd5b3ca6SJunchao Zhang MPI_Comm comm; 55cd620004SJunchao Zhang void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 56cd620004SJunchao Zhang MPI_Request *req; 57dd5b3ca6SJunchao Zhang 58dd5b3ca6SJunchao Zhang PetscFunctionBegin; 599566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link)); 609566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata)); 619566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 629566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 639566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL)); 649566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT)); 659566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req)); 66dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 67dd5b3ca6SJunchao Zhang } 68dd5b3ca6SJunchao Zhang 69dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 70dd5b3ca6SJunchao Zhang { 71dd5b3ca6SJunchao Zhang PetscInt nroots = 1,nleaves = 1,*ilocal; 72dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 73dd5b3ca6SJunchao Zhang PetscSF lsf; 74dd5b3ca6SJunchao Zhang PetscMPIInt rank; 75dd5b3ca6SJunchao Zhang 76dd5b3ca6SJunchao Zhang PetscFunctionBegin; 77dd5b3ca6SJunchao Zhang nroots = 1; 78dd5b3ca6SJunchao Zhang nleaves = 1; 799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves,&ilocal)); 819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves,&iremote)); 82dd5b3ca6SJunchao Zhang ilocal[0] = rank; 83dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 84dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 85dd5b3ca6SJunchao Zhang 869566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 879566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 889566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 89dd5b3ca6SJunchao Zhang *out = lsf; 90dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 91dd5b3ca6SJunchao Zhang } 92dd5b3ca6SJunchao Zhang 9372502a1fSJunchao Zhang static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 94dd5b3ca6SJunchao Zhang { 95dd5b3ca6SJunchao Zhang PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 96dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 97dd5b3ca6SJunchao Zhang PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 98dd5b3ca6SJunchao Zhang MPI_Comm comm; 99dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 100dd5b3ca6SJunchao Zhang PetscSF esf; 101dd5b3ca6SJunchao Zhang 102dd5b3ca6SJunchao Zhang PetscFunctionBegin; 1039566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm,&rank)); 105dd5b3ca6SJunchao Zhang 106dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 1079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected,&tmproots)); 1089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmproots,selected,nselected)); 1099566063dSJacob Faibussowitsch PetscCall(PetscSortRemoveDupsInt(&nselected,tmproots)); /* nselected might be changed */ 110*c9cc58a2SBarry Smith PetscCheck(tmproots[0] >= 0 && tmproots[nselected-1] < sf->nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",tmproots[0],tmproots[nselected-1],sf->nroots); 111dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 1129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected,&roots)); 113dd5b3ca6SJunchao Zhang for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 1149566063dSJacob Faibussowitsch PetscCall(PetscFree(tmproots)); 115dd5b3ca6SJunchao Zhang 116dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 1179566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/)); 118dd5b3ca6SJunchao Zhang 119dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 120dd5b3ca6SJunchao Zhang ndranks = 0; 121dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 122dd5b3ca6SJunchao Zhang if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 123dd5b3ca6SJunchao Zhang } 1249566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nleaves,leaves)); 125dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 126dd5b3ca6SJunchao Zhang 127dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves,&ilocal)); 1299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves,&iremote)); 130dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 131dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 132dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 133dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 134dd5b3ca6SJunchao Zhang } 1359566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm,&esf)); 1369566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf,PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 1379566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 138dd5b3ca6SJunchao Zhang 139dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 1409566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote)); 141dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 142dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 143dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 144dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 145dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 146dd5b3ca6SJunchao Zhang esf->roffset[i+1] = i+1; 147dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 148dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 149dd5b3ca6SJunchao Zhang } 150dd5b3ca6SJunchao Zhang 151dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 152dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 1539566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset)); 1549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots,&bas->irootloc)); 155dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 156dd5b3ca6SJunchao Zhang ndiranks = 0; 157dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 158dd5b3ca6SJunchao Zhang if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 159dd5b3ca6SJunchao Zhang } 1609566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nroots,roots)); 161dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 162dd5b3ca6SJunchao Zhang 163dd5b3ca6SJunchao Zhang bas->niranks = nroots; 164dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 165dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 166dd5b3ca6SJunchao Zhang bas->itotal = nroots; 167dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 168dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 169dd5b3ca6SJunchao Zhang bas->ioffset[i+1] = i+1; 170dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 171dd5b3ca6SJunchao Zhang } 172dd5b3ca6SJunchao Zhang 17372502a1fSJunchao Zhang /* See PetscSFCreateEmbeddedRootSF_Basic */ 174cd620004SJunchao Zhang esf->nleafreqs = esf->nranks - esf->ndranks; 175cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 176cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 177cd620004SJunchao Zhang /* Setup packing related fields */ 1789566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 179cd620004SJunchao Zhang 180cd620004SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 181dd5b3ca6SJunchao Zhang *newsf = esf; 182dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 183dd5b3ca6SJunchao Zhang } 184dd5b3ca6SJunchao Zhang 185dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 186dd5b3ca6SJunchao Zhang { 187dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 188dd5b3ca6SJunchao Zhang 189dd5b3ca6SJunchao Zhang PetscFunctionBegin; 190ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 191cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 192cd620004SJunchao Zhang 193dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 194dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 195dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 196dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 197dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 198dd5b3ca6SJunchao Zhang 199dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 200dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 201cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 202dd5b3ca6SJunchao Zhang 203dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 204dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 205dd5b3ca6SJunchao Zhang 206dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 207dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 208ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 209dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 210dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 21172502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 212dd5b3ca6SJunchao Zhang 2139566063dSJacob Faibussowitsch PetscCall(PetscNewLog(sf,&dat)); 214dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 215dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 216dd5b3ca6SJunchao Zhang } 217