1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang typedef PetscSFPack_Allgatherv PetscSFPack_Alltoall; 6dd5b3ca6SJunchao Zhang #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv 7dd5b3ca6SJunchao Zhang 8dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 9dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 10dd5b3ca6SJunchao Zhang 11dd5b3ca6SJunchao Zhang /*===================================================================================*/ 12dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 13dd5b3ca6SJunchao Zhang /*===================================================================================*/ 14dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 15dd5b3ca6SJunchao Zhang { 16dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 17dd5b3ca6SJunchao Zhang PetscInt i; 18dd5b3ca6SJunchao Zhang 19dd5b3ca6SJunchao Zhang PetscFunctionBegin; 20dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 21dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 22dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 23dd5b3ca6SJunchao Zhang if (iremote) { 24dd5b3ca6SJunchao Zhang if (!sf->remote) { 25dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 26dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 27dd5b3ca6SJunchao Zhang for (i=0; i<sf->nleaves; i++) { 28dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 29dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 30dd5b3ca6SJunchao Zhang } 31dd5b3ca6SJunchao Zhang } 32dd5b3ca6SJunchao Zhang *iremote = sf->remote; 33dd5b3ca6SJunchao Zhang } 34dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 35dd5b3ca6SJunchao Zhang } 36dd5b3ca6SJunchao Zhang 37dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 38dd5b3ca6SJunchao Zhang { 39dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 40dd5b3ca6SJunchao Zhang PetscSFPack_Alltoall link; 41dd5b3ca6SJunchao Zhang MPI_Comm comm; 42dd5b3ca6SJunchao Zhang void *recvbuf; 43dd5b3ca6SJunchao Zhang 44dd5b3ca6SJunchao Zhang PetscFunctionBegin; 45dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Alltoall(sf,unit,rootdata,&link);CHKERRQ(ierr); 46dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 47dd5b3ca6SJunchao Zhang 48dd5b3ca6SJunchao Zhang if (op != MPIU_REPLACE) { 49dd5b3ca6SJunchao Zhang if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 50dd5b3ca6SJunchao Zhang recvbuf = link->leaf; 51dd5b3ca6SJunchao Zhang } else { 52dd5b3ca6SJunchao Zhang recvbuf = leafdata; 53dd5b3ca6SJunchao Zhang } 54dd5b3ca6SJunchao Zhang ierr = MPIU_Ialltoall(rootdata,1,unit,recvbuf,1,unit,comm,&link->request);CHKERRQ(ierr); 55dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 56dd5b3ca6SJunchao Zhang } 57dd5b3ca6SJunchao Zhang 58dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 59dd5b3ca6SJunchao Zhang { 60dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 61dd5b3ca6SJunchao Zhang PetscSFPack_Alltoall link; 62dd5b3ca6SJunchao Zhang MPI_Comm comm; 63dd5b3ca6SJunchao Zhang void *recvbuf; 64dd5b3ca6SJunchao Zhang 65dd5b3ca6SJunchao Zhang PetscFunctionBegin; 66dd5b3ca6SJunchao Zhang ierr = PetscSFPackGet_Alltoall(sf,unit,leafdata,&link);CHKERRQ(ierr); 67dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 68dd5b3ca6SJunchao Zhang 69dd5b3ca6SJunchao Zhang if (op != MPIU_REPLACE) { 70dd5b3ca6SJunchao Zhang if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);} 71dd5b3ca6SJunchao Zhang recvbuf = link->root; 72dd5b3ca6SJunchao Zhang } else { 73dd5b3ca6SJunchao Zhang recvbuf = rootdata; 74dd5b3ca6SJunchao Zhang } 75dd5b3ca6SJunchao Zhang ierr = MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,&link->request);CHKERRQ(ierr); 76dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 77dd5b3ca6SJunchao Zhang } 78dd5b3ca6SJunchao Zhang 79dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 80dd5b3ca6SJunchao Zhang { 81dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 82dd5b3ca6SJunchao Zhang PetscInt nroots=1,nleaves=1,*ilocal; 83dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 84dd5b3ca6SJunchao Zhang PetscSF lsf; 85dd5b3ca6SJunchao Zhang PetscMPIInt rank; 86dd5b3ca6SJunchao Zhang 87dd5b3ca6SJunchao Zhang PetscFunctionBegin; 88dd5b3ca6SJunchao Zhang nroots = 1; 89dd5b3ca6SJunchao Zhang nleaves = 1; 90dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 91dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 92dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 93dd5b3ca6SJunchao Zhang ilocal[0] = rank; 94dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 95dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 96dd5b3ca6SJunchao Zhang 97dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 98dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 99dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 100dd5b3ca6SJunchao Zhang *out = lsf; 101dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 102dd5b3ca6SJunchao Zhang } 103dd5b3ca6SJunchao Zhang 104dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 105dd5b3ca6SJunchao Zhang { 106dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 107dd5b3ca6SJunchao Zhang PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 108dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 109dd5b3ca6SJunchao Zhang PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 110dd5b3ca6SJunchao Zhang MPI_Comm comm; 111dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 112dd5b3ca6SJunchao Zhang PetscSF esf; 113dd5b3ca6SJunchao Zhang 114dd5b3ca6SJunchao Zhang PetscFunctionBegin; 115dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 116dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 117dd5b3ca6SJunchao Zhang 118dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 119dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr); 120*da2e4c71SJunchao Zhang ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr); 121dd5b3ca6SJunchao Zhang ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */ 122dd5b3ca6SJunchao Zhang if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots); 123dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 124dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 125dd5b3ca6SJunchao Zhang for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 126dd5b3ca6SJunchao Zhang ierr = PetscFree(tmproots);CHKERRQ(ierr); 127dd5b3ca6SJunchao Zhang 128dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 129dd5b3ca6SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr); 130dd5b3ca6SJunchao Zhang 131dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 132dd5b3ca6SJunchao Zhang ndranks = 0; 133dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 134dd5b3ca6SJunchao Zhang if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 135dd5b3ca6SJunchao Zhang } 136dd5b3ca6SJunchao Zhang ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr); 137dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 138dd5b3ca6SJunchao Zhang 139dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 140dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 141dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 142dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 143dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 144dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 145dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 146dd5b3ca6SJunchao Zhang } 147dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr); 148dd5b3ca6SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 149dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 150dd5b3ca6SJunchao Zhang 151dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 152dd5b3ca6SJunchao Zhang ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr); 153dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 154dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 155dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 156dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 157dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 158dd5b3ca6SJunchao Zhang esf->roffset[i+1] = i+1; 159dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 160dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 161dd5b3ca6SJunchao Zhang } 162dd5b3ca6SJunchao Zhang 163dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 164dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 165dd5b3ca6SJunchao Zhang ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr); 166dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr); 167dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 168dd5b3ca6SJunchao Zhang ndiranks = 0; 169dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 170dd5b3ca6SJunchao Zhang if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 171dd5b3ca6SJunchao Zhang } 172dd5b3ca6SJunchao Zhang ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr); 173dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 174dd5b3ca6SJunchao Zhang 175dd5b3ca6SJunchao Zhang bas->niranks = nroots; 176dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 177dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 178dd5b3ca6SJunchao Zhang bas->itotal = nroots; 179dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 180dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 181dd5b3ca6SJunchao Zhang bas->ioffset[i+1] = i+1; 182dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 183dd5b3ca6SJunchao Zhang } 184dd5b3ca6SJunchao Zhang 185dd5b3ca6SJunchao Zhang *newsf = esf; 186dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 187dd5b3ca6SJunchao Zhang } 188dd5b3ca6SJunchao Zhang 189dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 190dd5b3ca6SJunchao Zhang { 191dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 192dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 193dd5b3ca6SJunchao Zhang 194dd5b3ca6SJunchao Zhang PetscFunctionBegin; 195dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 196dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 197dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 198dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 199dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 200dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 201dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 202dd5b3ca6SJunchao Zhang 203dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 204dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 205dd5b3ca6SJunchao Zhang 206dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 207dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 208dd5b3ca6SJunchao Zhang 209dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 210dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 211dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Alltoall; 212dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 213dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 214dd5b3ca6SJunchao Zhang sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall; 215dd5b3ca6SJunchao Zhang 216dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 217dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 218dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 219dd5b3ca6SJunchao Zhang } 220dd5b3ca6SJunchao Zhang 221dd5b3ca6SJunchao Zhang 222