1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 7dd5b3ca6SJunchao Zhang 8dd5b3ca6SJunchao Zhang /*===================================================================================*/ 9dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 10dd5b3ca6SJunchao Zhang /*===================================================================================*/ 11d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 12d71ae5a4SJacob Faibussowitsch { 13dd5b3ca6SJunchao Zhang PetscInt i; 14dd5b3ca6SJunchao Zhang 15dd5b3ca6SJunchao Zhang PetscFunctionBegin; 16dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 17dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 18dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 19dd5b3ca6SJunchao Zhang if (iremote) { 20dd5b3ca6SJunchao Zhang if (!sf->remote) { 219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &sf->remote)); 22dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 23dd5b3ca6SJunchao Zhang for (i = 0; i < sf->nleaves; i++) { 24dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 25dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 26dd5b3ca6SJunchao Zhang } 27dd5b3ca6SJunchao Zhang } 28dd5b3ca6SJunchao Zhang *iremote = sf->remote; 29dd5b3ca6SJunchao Zhang } 30*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31dd5b3ca6SJunchao Zhang } 32dd5b3ca6SJunchao Zhang 33d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 34d71ae5a4SJacob Faibussowitsch { 35cd620004SJunchao Zhang PetscSFLink link; 36dd5b3ca6SJunchao Zhang MPI_Comm comm; 37cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 38cd620004SJunchao Zhang MPI_Request *req; 39dd5b3ca6SJunchao Zhang 40dd5b3ca6SJunchao Zhang PetscFunctionBegin; 419566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 429566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 439566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 449566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 459566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 469566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 479566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req)); 48*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49dd5b3ca6SJunchao Zhang } 50dd5b3ca6SJunchao Zhang 51d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 52d71ae5a4SJacob Faibussowitsch { 53cd620004SJunchao Zhang PetscSFLink link; 54dd5b3ca6SJunchao Zhang MPI_Comm comm; 55cd620004SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 56cd620004SJunchao Zhang MPI_Request *req; 57dd5b3ca6SJunchao Zhang 58dd5b3ca6SJunchao Zhang PetscFunctionBegin; 599566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link)); 609566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 619566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 629566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 639566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 649566063dSJacob Faibussowitsch PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 659566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req)); 66*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 67dd5b3ca6SJunchao Zhang } 68dd5b3ca6SJunchao Zhang 69d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out) 70d71ae5a4SJacob Faibussowitsch { 71dd5b3ca6SJunchao Zhang PetscInt nroots = 1, nleaves = 1, *ilocal; 72dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 73dd5b3ca6SJunchao Zhang PetscSF lsf; 74dd5b3ca6SJunchao Zhang PetscMPIInt rank; 75dd5b3ca6SJunchao Zhang 76dd5b3ca6SJunchao Zhang PetscFunctionBegin; 77dd5b3ca6SJunchao Zhang nroots = 1; 78dd5b3ca6SJunchao Zhang nleaves = 1; 799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &ilocal)); 819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 82dd5b3ca6SJunchao Zhang ilocal[0] = rank; 83dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 84dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 85dd5b3ca6SJunchao Zhang 869566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 879566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 889566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 89dd5b3ca6SJunchao Zhang *out = lsf; 90*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 91dd5b3ca6SJunchao Zhang } 92dd5b3ca6SJunchao Zhang 93d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 94d71ae5a4SJacob Faibussowitsch { 95dd5b3ca6SJunchao Zhang PetscInt i, *tmproots, *ilocal, ndranks, ndiranks; 96dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 97dd5b3ca6SJunchao Zhang PetscMPIInt nroots, *roots, nleaves, *leaves, rank; 98dd5b3ca6SJunchao Zhang MPI_Comm comm; 99dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 100dd5b3ca6SJunchao Zhang PetscSF esf; 101dd5b3ca6SJunchao Zhang 102dd5b3ca6SJunchao Zhang PetscFunctionBegin; 1039566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 105dd5b3ca6SJunchao Zhang 106dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 1079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &tmproots)); 1089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmproots, selected, nselected)); 1099566063dSJacob Faibussowitsch PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */ 110c9cc58a2SBarry Smith PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots); 111dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 1129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &roots)); 113dd5b3ca6SJunchao Zhang for (i = 0; i < nselected; i++) roots[i] = tmproots[i]; 1149566063dSJacob Faibussowitsch PetscCall(PetscFree(tmproots)); 115dd5b3ca6SJunchao Zhang 116dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 1179566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/)); 118dd5b3ca6SJunchao Zhang 119dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 120dd5b3ca6SJunchao Zhang ndranks = 0; 121dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { 1229371c9d4SSatish Balay if (leaves[i] == rank) { 1239371c9d4SSatish Balay leaves[i] = -rank; 1249371c9d4SSatish Balay ndranks = 1; 1259371c9d4SSatish Balay break; 1269371c9d4SSatish Balay } 127dd5b3ca6SJunchao Zhang } 1289566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nleaves, leaves)); 129dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 130dd5b3ca6SJunchao Zhang 131dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 1329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &ilocal)); 1339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 134dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */ 135dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 136dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 137dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 138dd5b3ca6SJunchao Zhang } 1399566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &esf)); 1409566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 1419566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 142dd5b3ca6SJunchao Zhang 143dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 1449566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote)); 145dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 146dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 147dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 148dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { 149dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 150dd5b3ca6SJunchao Zhang esf->roffset[i + 1] = i + 1; 151dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 152dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 153dd5b3ca6SJunchao Zhang } 154dd5b3ca6SJunchao Zhang 155dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 156dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic *)esf->data; 1579566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset)); 1589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &bas->irootloc)); 159dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 160dd5b3ca6SJunchao Zhang ndiranks = 0; 161dd5b3ca6SJunchao Zhang for (i = 0; i < nroots; i++) { 1629371c9d4SSatish Balay if (roots[i] == rank) { 1639371c9d4SSatish Balay roots[i] = -rank; 1649371c9d4SSatish Balay ndiranks = 1; 1659371c9d4SSatish Balay break; 1669371c9d4SSatish Balay } 167dd5b3ca6SJunchao Zhang } 1689566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nroots, roots)); 169dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 170dd5b3ca6SJunchao Zhang 171dd5b3ca6SJunchao Zhang bas->niranks = nroots; 172dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 173dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 174dd5b3ca6SJunchao Zhang bas->itotal = nroots; 175dd5b3ca6SJunchao Zhang for (i = 0; i < nroots; i++) { 176dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 177dd5b3ca6SJunchao Zhang bas->ioffset[i + 1] = i + 1; 178dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 179dd5b3ca6SJunchao Zhang } 180dd5b3ca6SJunchao Zhang 18172502a1fSJunchao Zhang /* See PetscSFCreateEmbeddedRootSF_Basic */ 182cd620004SJunchao Zhang esf->nleafreqs = esf->nranks - esf->ndranks; 183cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 184cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 185cd620004SJunchao Zhang /* Setup packing related fields */ 1869566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 187cd620004SJunchao Zhang 188cd620004SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 189dd5b3ca6SJunchao Zhang *newsf = esf; 190*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 191dd5b3ca6SJunchao Zhang } 192dd5b3ca6SJunchao Zhang 193d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 194d71ae5a4SJacob Faibussowitsch { 195dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data; 196dd5b3ca6SJunchao Zhang 197dd5b3ca6SJunchao Zhang PetscFunctionBegin; 198ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 199cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 200cd620004SJunchao Zhang 201dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 202dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 203dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 204dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 205dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 206dd5b3ca6SJunchao Zhang 207dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 208dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 209cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 210dd5b3ca6SJunchao Zhang 211dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 212dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 213dd5b3ca6SJunchao Zhang 214dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 215dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 216ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 217dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 218dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 21972502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 220dd5b3ca6SJunchao Zhang 2214dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 222dd5b3ca6SJunchao Zhang sf->data = (void *)dat; 223*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 224dd5b3ca6SJunchao Zhang } 225