1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4dd5b3ca6SJunchao Zhang 5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall; 7dd5b3ca6SJunchao Zhang 8dd5b3ca6SJunchao Zhang /*===================================================================================*/ 9dd5b3ca6SJunchao Zhang /* Implementations of SF public APIs */ 10dd5b3ca6SJunchao Zhang /*===================================================================================*/ 11dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 12dd5b3ca6SJunchao Zhang { 13dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 14dd5b3ca6SJunchao Zhang PetscInt i; 15dd5b3ca6SJunchao Zhang 16dd5b3ca6SJunchao Zhang PetscFunctionBegin; 17dd5b3ca6SJunchao Zhang if (nroots) *nroots = sf->nroots; 18dd5b3ca6SJunchao Zhang if (nleaves) *nleaves = sf->nleaves; 19dd5b3ca6SJunchao Zhang if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 20dd5b3ca6SJunchao Zhang if (iremote) { 21dd5b3ca6SJunchao Zhang if (!sf->remote) { 22dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 23dd5b3ca6SJunchao Zhang sf->remote_alloc = sf->remote; 24dd5b3ca6SJunchao Zhang for (i=0; i<sf->nleaves; i++) { 25dd5b3ca6SJunchao Zhang sf->remote[i].rank = i; 26dd5b3ca6SJunchao Zhang sf->remote[i].index = i; 27dd5b3ca6SJunchao Zhang } 28dd5b3ca6SJunchao Zhang } 29dd5b3ca6SJunchao Zhang *iremote = sf->remote; 30dd5b3ca6SJunchao Zhang } 31dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 32dd5b3ca6SJunchao Zhang } 33dd5b3ca6SJunchao Zhang 34ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 35dd5b3ca6SJunchao Zhang { 36dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 37cd620004SJunchao Zhang PetscSFLink link; 38dd5b3ca6SJunchao Zhang MPI_Comm comm; 39cd620004SJunchao Zhang void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 40cd620004SJunchao Zhang MPI_Request *req; 41dd5b3ca6SJunchao Zhang 42dd5b3ca6SJunchao Zhang PetscFunctionBegin; 43cd620004SJunchao Zhang ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 44cd620004SJunchao Zhang ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 4571438e86SJunchao Zhang ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 46dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 47cd620004SJunchao Zhang ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 4871438e86SJunchao Zhang ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 49820f2d46SBarry Smith ierr = MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req);CHKERRMPI(ierr); 50dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 51dd5b3ca6SJunchao Zhang } 52dd5b3ca6SJunchao Zhang 53eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 54dd5b3ca6SJunchao Zhang { 55dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 56cd620004SJunchao Zhang PetscSFLink link; 57dd5b3ca6SJunchao Zhang MPI_Comm comm; 58cd620004SJunchao Zhang void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 59cd620004SJunchao Zhang MPI_Request *req; 60dd5b3ca6SJunchao Zhang 61dd5b3ca6SJunchao Zhang PetscFunctionBegin; 62cd620004SJunchao Zhang ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 63cd620004SJunchao Zhang ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 6471438e86SJunchao Zhang ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 65dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 66cd620004SJunchao Zhang ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 6771438e86SJunchao Zhang ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 68820f2d46SBarry Smith ierr = MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req);CHKERRMPI(ierr); 69dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 70dd5b3ca6SJunchao Zhang } 71dd5b3ca6SJunchao Zhang 72dd5b3ca6SJunchao Zhang static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 73dd5b3ca6SJunchao Zhang { 74dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 75dd5b3ca6SJunchao Zhang PetscInt nroots = 1,nleaves = 1,*ilocal; 76dd5b3ca6SJunchao Zhang PetscSFNode *iremote = NULL; 77dd5b3ca6SJunchao Zhang PetscSF lsf; 78dd5b3ca6SJunchao Zhang PetscMPIInt rank; 79dd5b3ca6SJunchao Zhang 80dd5b3ca6SJunchao Zhang PetscFunctionBegin; 81dd5b3ca6SJunchao Zhang nroots = 1; 82dd5b3ca6SJunchao Zhang nleaves = 1; 83ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 84dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 85dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 86dd5b3ca6SJunchao Zhang ilocal[0] = rank; 87dd5b3ca6SJunchao Zhang iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 88dd5b3ca6SJunchao Zhang iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 89dd5b3ca6SJunchao Zhang 90dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 91dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 92dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 93dd5b3ca6SJunchao Zhang *out = lsf; 94dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 95dd5b3ca6SJunchao Zhang } 96dd5b3ca6SJunchao Zhang 9772502a1fSJunchao Zhang static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 98dd5b3ca6SJunchao Zhang { 99dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 100dd5b3ca6SJunchao Zhang PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 101dd5b3ca6SJunchao Zhang PetscSFNode *iremote; 102dd5b3ca6SJunchao Zhang PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 103dd5b3ca6SJunchao Zhang MPI_Comm comm; 104dd5b3ca6SJunchao Zhang PetscSF_Basic *bas; 105dd5b3ca6SJunchao Zhang PetscSF esf; 106dd5b3ca6SJunchao Zhang 107dd5b3ca6SJunchao Zhang PetscFunctionBegin; 108dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 109ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 110dd5b3ca6SJunchao Zhang 111dd5b3ca6SJunchao Zhang /* Uniq selected[] and store the result in roots[] */ 112dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr); 113da2e4c71SJunchao Zhang ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr); 114dd5b3ca6SJunchao Zhang ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */ 115*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",tmproots[0],tmproots[nselected-1],sf->nroots); 116dd5b3ca6SJunchao Zhang nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 117dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 118dd5b3ca6SJunchao Zhang for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 119dd5b3ca6SJunchao Zhang ierr = PetscFree(tmproots);CHKERRQ(ierr); 120dd5b3ca6SJunchao Zhang 121dd5b3ca6SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 122dd5b3ca6SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr); 123dd5b3ca6SJunchao Zhang 124dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 125dd5b3ca6SJunchao Zhang ndranks = 0; 126dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 127dd5b3ca6SJunchao Zhang if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 128dd5b3ca6SJunchao Zhang } 129dd5b3ca6SJunchao Zhang ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr); 130dd5b3ca6SJunchao Zhang if (nleaves && leaves[0] < 0) leaves[0] = rank; 131dd5b3ca6SJunchao Zhang 132dd5b3ca6SJunchao Zhang /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 133dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 134dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 135dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 136dd5b3ca6SJunchao Zhang ilocal[i] = leaves[i]; 137dd5b3ca6SJunchao Zhang iremote[i].rank = leaves[i]; 138dd5b3ca6SJunchao Zhang iremote[i].index = leaves[i]; 139dd5b3ca6SJunchao Zhang } 140dd5b3ca6SJunchao Zhang ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr); 141dd5b3ca6SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 142dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 143dd5b3ca6SJunchao Zhang 144dd5b3ca6SJunchao Zhang /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 145dd5b3ca6SJunchao Zhang ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr); 146dd5b3ca6SJunchao Zhang esf->nranks = nleaves; 147dd5b3ca6SJunchao Zhang esf->ndranks = ndranks; 148dd5b3ca6SJunchao Zhang esf->roffset[0] = 0; 149dd5b3ca6SJunchao Zhang for (i=0; i<nleaves; i++) { 150dd5b3ca6SJunchao Zhang esf->ranks[i] = leaves[i]; 151dd5b3ca6SJunchao Zhang esf->roffset[i+1] = i+1; 152dd5b3ca6SJunchao Zhang esf->rmine[i] = leaves[i]; 153dd5b3ca6SJunchao Zhang esf->rremote[i] = leaves[i]; 154dd5b3ca6SJunchao Zhang } 155dd5b3ca6SJunchao Zhang 156dd5b3ca6SJunchao Zhang /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 157dd5b3ca6SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 158dd5b3ca6SJunchao Zhang ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr); 159dd5b3ca6SJunchao Zhang ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr); 160dd5b3ca6SJunchao Zhang /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 161dd5b3ca6SJunchao Zhang ndiranks = 0; 162dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 163dd5b3ca6SJunchao Zhang if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 164dd5b3ca6SJunchao Zhang } 165dd5b3ca6SJunchao Zhang ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr); 166dd5b3ca6SJunchao Zhang if (nroots && roots[0] < 0) roots[0] = rank; 167dd5b3ca6SJunchao Zhang 168dd5b3ca6SJunchao Zhang bas->niranks = nroots; 169dd5b3ca6SJunchao Zhang bas->ndiranks = ndiranks; 170dd5b3ca6SJunchao Zhang bas->ioffset[0] = 0; 171dd5b3ca6SJunchao Zhang bas->itotal = nroots; 172dd5b3ca6SJunchao Zhang for (i=0; i<nroots; i++) { 173dd5b3ca6SJunchao Zhang bas->iranks[i] = roots[i]; 174dd5b3ca6SJunchao Zhang bas->ioffset[i+1] = i+1; 175dd5b3ca6SJunchao Zhang bas->irootloc[i] = roots[i]; 176dd5b3ca6SJunchao Zhang } 177dd5b3ca6SJunchao Zhang 17872502a1fSJunchao Zhang /* See PetscSFCreateEmbeddedRootSF_Basic */ 179cd620004SJunchao Zhang esf->nleafreqs = esf->nranks - esf->ndranks; 180cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 181cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 182cd620004SJunchao Zhang /* Setup packing related fields */ 183cd620004SJunchao Zhang ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 184cd620004SJunchao Zhang 185cd620004SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 186dd5b3ca6SJunchao Zhang *newsf = esf; 187dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 188dd5b3ca6SJunchao Zhang } 189dd5b3ca6SJunchao Zhang 190dd5b3ca6SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 191dd5b3ca6SJunchao Zhang { 192dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 193dd5b3ca6SJunchao Zhang PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 194dd5b3ca6SJunchao Zhang 195dd5b3ca6SJunchao Zhang PetscFunctionBegin; 196ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 197cd620004SJunchao Zhang sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 198cd620004SJunchao Zhang 199dd5b3ca6SJunchao Zhang /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 200dd5b3ca6SJunchao Zhang sf->ops->Destroy = PetscSFDestroy_Allgatherv; 201dd5b3ca6SJunchao Zhang sf->ops->Reset = PetscSFReset_Allgatherv; 202dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 203dd5b3ca6SJunchao Zhang sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 204dd5b3ca6SJunchao Zhang 205dd5b3ca6SJunchao Zhang /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 206dd5b3ca6SJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 207cd620004SJunchao Zhang sf->ops->SetUp = PetscSFSetUp_Allgather; 208dd5b3ca6SJunchao Zhang 209dd5b3ca6SJunchao Zhang /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 210dd5b3ca6SJunchao Zhang sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 211dd5b3ca6SJunchao Zhang 212dd5b3ca6SJunchao Zhang /* Alltoall stuff */ 213dd5b3ca6SJunchao Zhang sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 214ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 215dd5b3ca6SJunchao Zhang sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 216dd5b3ca6SJunchao Zhang sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 21772502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 218dd5b3ca6SJunchao Zhang 219dd5b3ca6SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 220dd5b3ca6SJunchao Zhang sf->data = (void*)dat; 221dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 222dd5b3ca6SJunchao Zhang } 223dd5b3ca6SJunchao Zhang 224