1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 7dd5b3ca6SJunchao Zhang 8f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Alltoall(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 9f5d27ee7SJunchao Zhang { 10f5d27ee7SJunchao Zhang MPI_Comm comm = MPI_COMM_NULL; 11f5d27ee7SJunchao Zhang void *rootbuf = NULL, *leafbuf = NULL; 12f5d27ee7SJunchao Zhang MPI_Request *req = NULL; 13f5d27ee7SJunchao Zhang MPI_Datatype unit = link->unit; 14f5d27ee7SJunchao Zhang 15f5d27ee7SJunchao Zhang PetscFunctionBegin; 16f5d27ee7SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 17f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 18f5d27ee7SJunchao Zhang } else { 19f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */)); 20f5d27ee7SJunchao Zhang } 21f5d27ee7SJunchao Zhang PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 22f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, &rootbuf, &leafbuf, &req, NULL)); 23*646b835dSJunchao Zhang PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); 24f5d27ee7SJunchao Zhang 25f5d27ee7SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 26f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req)); 27f5d27ee7SJunchao Zhang } else { 28f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req)); 29f5d27ee7SJunchao Zhang } 30f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 31f5d27ee7SJunchao Zhang } 32f5d27ee7SJunchao Zhang 33f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFSetCommunicationOps_Alltoall(PetscSF sf, PetscSFLink link) 34f5d27ee7SJunchao Zhang { 35f5d27ee7SJunchao Zhang PetscFunctionBegin; 36f5d27ee7SJunchao Zhang link->StartCommunication = PetscSFLinkStartCommunication_Alltoall; 37f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 38f5d27ee7SJunchao Zhang } 39f5d27ee7SJunchao Zhang 40dd5b3ca6SJunchao Zhang /*===================================================================================*/ 41dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 42dd5b3ca6SJunchao Zhang /*===================================================================================*/ 43d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 44d71ae5a4SJacob Faibussowitsch { 45dd5b3ca6SJunchao Zhang PetscInt i; 46dd5b3ca6SJunchao Zhang 47dd5b3ca6SJunchao Zhang PetscFunctionBegin; 48dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 49dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 50dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 51dd5b3ca6SJunchao Zhang if (iremote) { 52dd5b3ca6SJunchao Zhang if (!sf->remote) { 539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &sf->remote)); 54dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 55dd5b3ca6SJunchao Zhang for (i = 0; i < sf->nleaves; i++) { 56dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 57dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 58dd5b3ca6SJunchao Zhang } 59dd5b3ca6SJunchao Zhang } 60dd5b3ca6SJunchao Zhang *iremote = sf->remote; 61dd5b3ca6SJunchao Zhang } 623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63dd5b3ca6SJunchao Zhang } 64dd5b3ca6SJunchao Zhang 65d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out) 66d71ae5a4SJacob Faibussowitsch { 67dd5b3ca6SJunchao Zhang PetscInt nroots = 1, nleaves = 1, *ilocal; 68dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 69dd5b3ca6SJunchao Zhang PetscSF lsf; 70dd5b3ca6SJunchao Zhang PetscMPIInt rank; 71dd5b3ca6SJunchao Zhang 72dd5b3ca6SJunchao Zhang PetscFunctionBegin; 73dd5b3ca6SJunchao Zhang nroots = 1; 74dd5b3ca6SJunchao Zhang nleaves = 1; 759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &ilocal)); 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 78dd5b3ca6SJunchao Zhang ilocal[0] = rank; 79dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 80dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 81dd5b3ca6SJunchao Zhang 829566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 839566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 849566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf)); 85dd5b3ca6SJunchao Zhang *out = lsf; 863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 87dd5b3ca6SJunchao Zhang } 88dd5b3ca6SJunchao Zhang 89d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 90d71ae5a4SJacob Faibussowitsch { 91dd5b3ca6SJunchao Zhang PetscInt i, *tmproots, *ilocal, ndranks, ndiranks; 92dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 93dd5b3ca6SJunchao Zhang PetscMPIInt nroots, *roots, nleaves, *leaves, rank; 94dd5b3ca6SJunchao Zhang MPI_Comm comm; 95dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 96dd5b3ca6SJunchao Zhang PetscSF esf; 97dd5b3ca6SJunchao Zhang 98dd5b3ca6SJunchao Zhang PetscFunctionBegin; 999566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 101dd5b3ca6SJunchao Zhang 102dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 1039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &tmproots)); 1049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmproots, selected, nselected)); 1059566063dSJacob Faibussowitsch PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */ 106c9cc58a2SBarry Smith PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots); 107dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 1089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &roots)); 109dd5b3ca6SJunchao Zhang for (i = 0; i < nselected; i++) roots[i] = tmproots[i]; 1109566063dSJacob Faibussowitsch PetscCall(PetscFree(tmproots)); 111dd5b3ca6SJunchao Zhang 112dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 1139566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/)); 114dd5b3ca6SJunchao Zhang 115dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 116dd5b3ca6SJunchao Zhang ndranks = 0; 117dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { 1189371c9d4SSatish Balay if (leaves[i] == rank) { 1199371c9d4SSatish Balay leaves[i] = -rank; 1209371c9d4SSatish Balay ndranks = 1; 1219371c9d4SSatish Balay break; 1229371c9d4SSatish Balay } 123dd5b3ca6SJunchao Zhang } 1249566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nleaves, leaves)); 125dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 126dd5b3ca6SJunchao Zhang 127dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &ilocal)); 1299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 130dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */ 131dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 132dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 133dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 134dd5b3ca6SJunchao Zhang } 1359566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &esf)); 1369566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 1379566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 138dd5b3ca6SJunchao Zhang 139dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 1409566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote)); 141dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 142dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 143dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 144dd5b3ca6SJunchao Zhang for (i = 0; i < nleaves; i++) { 145dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 146dd5b3ca6SJunchao Zhang esf->roffset[i + 1] = i + 1; 147dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 148dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 149dd5b3ca6SJunchao Zhang } 150dd5b3ca6SJunchao Zhang 151dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 152dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic *)esf->data; 1539566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset)); 1549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &bas->irootloc)); 155dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 156dd5b3ca6SJunchao Zhang ndiranks = 0; 157dd5b3ca6SJunchao Zhang for (i = 0; i < nroots; i++) { 1589371c9d4SSatish Balay if (roots[i] == rank) { 1599371c9d4SSatish Balay roots[i] = -rank; 1609371c9d4SSatish Balay ndiranks = 1; 1619371c9d4SSatish Balay break; 1629371c9d4SSatish Balay } 163dd5b3ca6SJunchao Zhang } 1649566063dSJacob Faibussowitsch PetscCall(PetscSortMPIInt(nroots, roots)); 165dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 166dd5b3ca6SJunchao Zhang 167dd5b3ca6SJunchao Zhang bas->niranks = nroots; 168dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 169dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 170dd5b3ca6SJunchao Zhang bas->itotal = nroots; 171dd5b3ca6SJunchao Zhang for (i = 0; i < nroots; i++) { 172dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 173dd5b3ca6SJunchao Zhang bas->ioffset[i + 1] = i + 1; 174dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 175dd5b3ca6SJunchao Zhang } 176dd5b3ca6SJunchao Zhang 17772502a1fSJunchao Zhang /* See PetscSFCreateEmbeddedRootSF_Basic */ 178cd620004SJunchao Zhang esf->nleafreqs = esf->nranks - esf->ndranks; 179cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 180cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 181cd620004SJunchao Zhang /* Setup packing related fields */ 1829566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 183cd620004SJunchao Zhang 184cd620004SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 185dd5b3ca6SJunchao Zhang *newsf = esf; 1863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 187dd5b3ca6SJunchao Zhang } 188dd5b3ca6SJunchao Zhang 189d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 190d71ae5a4SJacob Faibussowitsch { 191dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data; 192dd5b3ca6SJunchao Zhang 193dd5b3ca6SJunchao Zhang PetscFunctionBegin; 194f5d27ee7SJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 195ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 196f5d27ee7SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 197cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 198cd620004SJunchao Zhang 199dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 200dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 201dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 202dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 203dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 204dd5b3ca6SJunchao Zhang 205dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 206dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 207cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 208dd5b3ca6SJunchao Zhang 209dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 210dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 211dd5b3ca6SJunchao Zhang 212dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 213dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 214dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 21572502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 216dd5b3ca6SJunchao Zhang 217f5d27ee7SJunchao Zhang sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Alltoall; 218f5d27ee7SJunchao Zhang 2196677b1c1SJunchao Zhang sf->collective = PETSC_TRUE; 2206677b1c1SJunchao Zhang 2214dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 222dd5b3ca6SJunchao Zhang sf->data = (void *)dat; 2233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 224dd5b3ca6SJunchao Zhang } 225