1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv 6dd5b3ca6SJunchao Zhang 7dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 8dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 9dd5b3ca6SJunchao Zhang 10dd5b3ca6SJunchao Zhang /*===================================================================================*/ 11dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 12dd5b3ca6SJunchao Zhang /*===================================================================================*/ 13dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 14dd5b3ca6SJunchao Zhang { 15dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 16dd5b3ca6SJunchao Zhang PetscInt i; 17dd5b3ca6SJunchao Zhang 18dd5b3ca6SJunchao Zhang PetscFunctionBegin; 19dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 20dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 21dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 22dd5b3ca6SJunchao Zhang if (iremote) { 23dd5b3ca6SJunchao Zhang if (!sf->remote) { 24dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 25dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 26dd5b3ca6SJunchao Zhang for (i=0; i<sf->nleaves; i++) { 27dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 28dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 29dd5b3ca6SJunchao Zhang } 30dd5b3ca6SJunchao Zhang } 31dd5b3ca6SJunchao Zhang *iremote = sf->remote; 32dd5b3ca6SJunchao Zhang } 33dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 34dd5b3ca6SJunchao Zhang } 35dd5b3ca6SJunchao Zhang 36*eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 37dd5b3ca6SJunchao Zhang { 38dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 39*eb02082bSJunchao Zhang PetscSFPack link; 40dd5b3ca6SJunchao Zhang MPI_Comm comm; 41*eb02082bSJunchao Zhang char *recvbuf; 42dd5b3ca6SJunchao Zhang 43dd5b3ca6SJunchao Zhang PetscFunctionBegin; 44*eb02082bSJunchao Zhang ierr = PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 45dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 46dd5b3ca6SJunchao Zhang 47dd5b3ca6SJunchao Zhang if (op != MPIU_REPLACE) { 48*eb02082bSJunchao Zhang if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);} 49*eb02082bSJunchao Zhang recvbuf = link->leafbuf[leafmtype]; 50dd5b3ca6SJunchao Zhang } else { 51*eb02082bSJunchao Zhang recvbuf = (char*)leafdata; 52dd5b3ca6SJunchao Zhang } 53*eb02082bSJunchao Zhang ierr = MPIU_Ialltoall(rootdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype]);CHKERRQ(ierr); 54dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 55dd5b3ca6SJunchao Zhang } 56dd5b3ca6SJunchao Zhang 57*eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 58dd5b3ca6SJunchao Zhang { 59dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 60*eb02082bSJunchao Zhang PetscSFPack link; 61dd5b3ca6SJunchao Zhang MPI_Comm comm; 62*eb02082bSJunchao Zhang char *recvbuf; 63dd5b3ca6SJunchao Zhang 64dd5b3ca6SJunchao Zhang PetscFunctionBegin; 65*eb02082bSJunchao Zhang ierr = PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 66dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 67dd5b3ca6SJunchao Zhang 68dd5b3ca6SJunchao Zhang if (op != MPIU_REPLACE) { 69*eb02082bSJunchao Zhang if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,sf->nroots*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);} 70*eb02082bSJunchao Zhang recvbuf = link->rootbuf[rootmtype]; 71dd5b3ca6SJunchao Zhang } else { 72*eb02082bSJunchao Zhang recvbuf = (char*)rootdata; 73dd5b3ca6SJunchao Zhang } 74*eb02082bSJunchao Zhang ierr = MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype]);CHKERRQ(ierr); 75dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 76dd5b3ca6SJunchao Zhang } 77dd5b3ca6SJunchao Zhang 78dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 79dd5b3ca6SJunchao Zhang { 80dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 81dd5b3ca6SJunchao Zhang PetscInt nroots=1,nleaves=1,*ilocal; 82dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 83dd5b3ca6SJunchao Zhang PetscSF lsf; 84dd5b3ca6SJunchao Zhang PetscMPIInt rank; 85dd5b3ca6SJunchao Zhang 86dd5b3ca6SJunchao Zhang PetscFunctionBegin; 87dd5b3ca6SJunchao Zhang nroots = 1; 88dd5b3ca6SJunchao Zhang nleaves = 1; 89dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 90dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 91dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 92dd5b3ca6SJunchao Zhang ilocal[0] = rank; 93dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 94dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 95dd5b3ca6SJunchao Zhang 96dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 97dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 98dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 99dd5b3ca6SJunchao Zhang *out = lsf; 100dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 101dd5b3ca6SJunchao Zhang } 102dd5b3ca6SJunchao Zhang 103dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 104dd5b3ca6SJunchao Zhang { 105dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 106dd5b3ca6SJunchao Zhang PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 107dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 108dd5b3ca6SJunchao Zhang PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 109dd5b3ca6SJunchao Zhang MPI_Comm comm; 110dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 111dd5b3ca6SJunchao Zhang PetscSF esf; 112dd5b3ca6SJunchao Zhang 113dd5b3ca6SJunchao Zhang PetscFunctionBegin; 114dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 115dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 116dd5b3ca6SJunchao Zhang 117dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 118dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr); 119da2e4c71SJunchao Zhang ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr); 120dd5b3ca6SJunchao Zhang ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */ 121dd5b3ca6SJunchao Zhang if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots); 122dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 123dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 124dd5b3ca6SJunchao Zhang for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 125dd5b3ca6SJunchao Zhang ierr = PetscFree(tmproots);CHKERRQ(ierr); 126dd5b3ca6SJunchao Zhang 127dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 128dd5b3ca6SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr); 129dd5b3ca6SJunchao Zhang 130dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 131dd5b3ca6SJunchao Zhang ndranks = 0; 132dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 133dd5b3ca6SJunchao Zhang if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 134dd5b3ca6SJunchao Zhang } 135dd5b3ca6SJunchao Zhang ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr); 136dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 137dd5b3ca6SJunchao Zhang 138dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 139dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 140dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 141dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 142dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 143dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 144dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 145dd5b3ca6SJunchao Zhang } 146dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr); 147dd5b3ca6SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 148dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 149dd5b3ca6SJunchao Zhang 150dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 151dd5b3ca6SJunchao Zhang ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr); 152dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 153dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 154dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 155dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 156dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 157dd5b3ca6SJunchao Zhang esf->roffset[i+1] = i+1; 158dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 159dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 160dd5b3ca6SJunchao Zhang } 161dd5b3ca6SJunchao Zhang 162dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 163dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 164dd5b3ca6SJunchao Zhang ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr); 165dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr); 166dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 167dd5b3ca6SJunchao Zhang ndiranks = 0; 168dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 169dd5b3ca6SJunchao Zhang if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 170dd5b3ca6SJunchao Zhang } 171dd5b3ca6SJunchao Zhang ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr); 172dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 173dd5b3ca6SJunchao Zhang 174dd5b3ca6SJunchao Zhang bas->niranks = nroots; 175dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 176dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 177dd5b3ca6SJunchao Zhang bas->itotal = nroots; 178dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 179dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 180dd5b3ca6SJunchao Zhang bas->ioffset[i+1] = i+1; 181dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 182dd5b3ca6SJunchao Zhang } 183dd5b3ca6SJunchao Zhang 184dd5b3ca6SJunchao Zhang *newsf = esf; 185dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 186dd5b3ca6SJunchao Zhang } 187dd5b3ca6SJunchao Zhang 188dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 189dd5b3ca6SJunchao Zhang { 190dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 191dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 192dd5b3ca6SJunchao Zhang 193dd5b3ca6SJunchao Zhang PetscFunctionBegin; 194dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 195dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 196dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 197dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 198dd5b3ca6SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 199dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 200dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 201dd5b3ca6SJunchao Zhang 202dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 203dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 204dd5b3ca6SJunchao Zhang 205dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 206dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 207dd5b3ca6SJunchao Zhang 208dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 209dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 210dd5b3ca6SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Alltoall; 211dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 212dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 213dd5b3ca6SJunchao Zhang sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall; 214dd5b3ca6SJunchao Zhang 215dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 216dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 217dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 218dd5b3ca6SJunchao Zhang } 219dd5b3ca6SJunchao Zhang 220dd5b3ca6SJunchao Zhang 221