1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 7dd5b3ca6SJunchao Zhang 8dd5b3ca6SJunchao Zhang /*===================================================================================*/ 9dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 10dd5b3ca6SJunchao Zhang /*===================================================================================*/ 119371c9d4SSatish Balay static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) { 12dd5b3ca6SJunchao Zhang PetscInt i; 13dd5b3ca6SJunchao Zhang 14dd5b3ca6SJunchao Zhang PetscFunctionBegin; 15dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 16dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 17dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 18dd5b3ca6SJunchao Zhang if (iremote) { 19dd5b3ca6SJunchao Zhang if (!sf->remote) { 209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &sf->remote)); 21dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 22dd5b3ca6SJunchao Zhang for (i = 0; i < sf->nleaves; i++) { 23dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 24dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 25dd5b3ca6SJunchao Zhang } 26dd5b3ca6SJunchao Zhang } 27dd5b3ca6SJunchao Zhang *iremote = sf->remote; 28dd5b3ca6SJunchao Zhang } 29dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 30dd5b3ca6SJunchao Zhang } 31dd5b3ca6SJunchao Zhang 329371c9d4SSatish Balay static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 33cd620004SJunchao Zhang PetscSFLink link; 34dd5b3ca6SJunchao Zhang MPI_Comm comm; 35cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 36cd620004SJunchao Zhang MPI_Request *req; 37dd5b3ca6SJunchao Zhang 38dd5b3ca6SJunchao Zhang PetscFunctionBegin; 399566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 409566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 419566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 429566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 439566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 449566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 459566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req)); 46dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 47dd5b3ca6SJunchao Zhang } 48dd5b3ca6SJunchao Zhang 499371c9d4SSatish Balay static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 50cd620004SJunchao Zhang PetscSFLink link; 51dd5b3ca6SJunchao Zhang MPI_Comm comm; 52cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 53cd620004SJunchao Zhang MPI_Request *req; 54dd5b3ca6SJunchao Zhang 55dd5b3ca6SJunchao Zhang PetscFunctionBegin; 569566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link)); 579566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 589566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 599566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 609566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 619566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 629566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req)); 63dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 64dd5b3ca6SJunchao Zhang } 65dd5b3ca6SJunchao Zhang 669371c9d4SSatish Balay static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out) { 67dd5b3ca6SJunchao Zhang PetscInt nroots = 1, nleaves = 1, *ilocal; 68dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 69dd5b3ca6SJunchao Zhang PetscSF lsf; 70dd5b3ca6SJunchao Zhang PetscMPIInt rank; 71dd5b3ca6SJunchao Zhang 72dd5b3ca6SJunchao Zhang PetscFunctionBegin; 73dd5b3ca6SJunchao Zhang nroots = 1; 74dd5b3ca6SJunchao Zhang nleaves = 1; 759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &ilocal)); 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 78dd5b3ca6SJunchao Zhang ilocal[0] = rank; 79dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 80dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 81dd5b3ca6SJunchao Zhang 829566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 839566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 849566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 85dd5b3ca6SJunchao Zhang *out = lsf; 86dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 87dd5b3ca6SJunchao Zhang } 88dd5b3ca6SJunchao Zhang 899371c9d4SSatish Balay static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) { 90dd5b3ca6SJunchao Zhang PetscInt i, *tmproots, *ilocal, ndranks, ndiranks; 91dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 92dd5b3ca6SJunchao Zhang PetscMPIInt nroots, *roots, nleaves, *leaves, rank; 93dd5b3ca6SJunchao Zhang MPI_Comm comm; 94dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 95dd5b3ca6SJunchao Zhang PetscSF esf; 96dd5b3ca6SJunchao Zhang 97dd5b3ca6SJunchao Zhang PetscFunctionBegin; 989566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 100dd5b3ca6SJunchao Zhang 101dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 1029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &tmproots)); 1039566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmproots, selected, nselected)); 1049566063dSJacob Faibussowitsch PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */ 105c9cc58a2SBarry Smith PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots); 106dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 1079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &roots)); 108dd5b3ca6SJunchao Zhang for (i = 0; i < nselected; i++) roots[i] = tmproots[i]; 1099566063dSJacob Faibussowitsch PetscCall(PetscFree(tmproots)); 110dd5b3ca6SJunchao Zhang 111dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 1129566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/)); 113dd5b3ca6SJunchao Zhang 114dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 115dd5b3ca6SJunchao Zhang ndranks = 0; 116dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { 1179371c9d4SSatish Balay if (leaves[i] == rank) { 1189371c9d4SSatish Balay leaves[i] = -rank; 1199371c9d4SSatish Balay ndranks = 1; 1209371c9d4SSatish Balay break; 1219371c9d4SSatish Balay } 122dd5b3ca6SJunchao Zhang } 1239566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nleaves, leaves)); 124dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 125dd5b3ca6SJunchao Zhang 126dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 1279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &ilocal)); 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 129dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */ 130dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 131dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 132dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 133dd5b3ca6SJunchao Zhang } 1349566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &esf)); 1359566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 1369566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 137dd5b3ca6SJunchao Zhang 138dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 1399566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote)); 140dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 141dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 142dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 143dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { 144dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 145dd5b3ca6SJunchao Zhang esf->roffset[i + 1] = i + 1; 146dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 147dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 148dd5b3ca6SJunchao Zhang } 149dd5b3ca6SJunchao Zhang 150dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 151dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic *)esf->data; 1529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset)); 1539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &bas->irootloc)); 154dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 155dd5b3ca6SJunchao Zhang ndiranks = 0; 156dd5b3ca6SJunchao Zhang for (i = 0; i < nroots; i++) { 1579371c9d4SSatish Balay if (roots[i] == rank) { 1589371c9d4SSatish Balay roots[i] = -rank; 1599371c9d4SSatish Balay ndiranks = 1; 1609371c9d4SSatish Balay break; 1619371c9d4SSatish Balay } 162dd5b3ca6SJunchao Zhang } 1639566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nroots, roots)); 164dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 165dd5b3ca6SJunchao Zhang 166dd5b3ca6SJunchao Zhang bas->niranks = nroots; 167dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 168dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 169dd5b3ca6SJunchao Zhang bas->itotal = nroots; 170dd5b3ca6SJunchao Zhang for (i = 0; i < nroots; i++) { 171dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 172dd5b3ca6SJunchao Zhang bas->ioffset[i + 1] = i + 1; 173dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 174dd5b3ca6SJunchao Zhang } 175dd5b3ca6SJunchao Zhang 17672502a1fSJunchao Zhang /* See PetscSFCreateEmbeddedRootSF_Basic */ 177cd620004SJunchao Zhang esf->nleafreqs = esf->nranks - esf->ndranks; 178cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 179cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 180cd620004SJunchao Zhang /* Setup packing related fields */ 1819566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 182cd620004SJunchao Zhang 183cd620004SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 184dd5b3ca6SJunchao Zhang *newsf = esf; 185dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 186dd5b3ca6SJunchao Zhang } 187dd5b3ca6SJunchao Zhang 1889371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) { 189dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data; 190dd5b3ca6SJunchao Zhang 191dd5b3ca6SJunchao Zhang PetscFunctionBegin; 192ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 193cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 194cd620004SJunchao Zhang 195dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 196dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 197dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 198dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 199dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 200dd5b3ca6SJunchao Zhang 201dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 202dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 203cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 204dd5b3ca6SJunchao Zhang 205dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 206dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 207dd5b3ca6SJunchao Zhang 208dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 209dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 210ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 211dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 212dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 21372502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 214dd5b3ca6SJunchao Zhang 215*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 216dd5b3ca6SJunchao Zhang sf->data = (void *)dat; 217dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 218dd5b3ca6SJunchao Zhang } 219