1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 #if defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) 5 6 typedef struct { 7 SFBASICHEADER; 8 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 9 PetscBool initialized[2]; /* Are the two communicators initialized? */ 10 PetscMPIInt *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */ 11 PetscInt rootdegree,leafdegree; 12 } PetscSF_Neighbor; 13 14 /*===================================================================================*/ 15 /* Internal utility routines */ 16 /*===================================================================================*/ 17 18 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 19 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm) 20 { 21 PetscErrorCode ierr; 22 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 23 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 24 const PetscMPIInt *rootranks,*leafranks; 25 MPI_Comm comm; 26 27 PetscFunctionBegin; 28 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL);CHKERRQ(ierr); /* Which ranks will access my roots (I am a destination) */ 29 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL);CHKERRQ(ierr); /* My leaves will access whose roots (I am a source) */ 30 31 if (!dat->initialized[direction]) { 32 const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks; 33 const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks; 34 MPI_Comm *mycomm = &dat->comms[direction]; 35 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 36 if (direction == PETSCSF_LEAF2ROOT) { 37 ierr = MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootcounts/*src weights*/,outdegree,destinations,dat->leafcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRMPI(ierr); 38 } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */ 39 ierr = MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafcounts/*src weights*/,indegree,sources,dat->rootcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRMPI(ierr); 40 } 41 dat->initialized[direction] = PETSC_TRUE; 42 } 43 *distcomm = dat->comms[direction]; 44 PetscFunctionReturn(0); 45 } 46 47 /*===================================================================================*/ 48 /* Implementations of SF public APIs */ 49 /*===================================================================================*/ 50 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 51 { 52 PetscErrorCode ierr; 53 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 54 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 55 const PetscInt *rootoffset,*leafoffset; 56 PetscMPIInt m,n; 57 58 PetscFunctionBegin; 59 /* SFNeighbor inherits from Basic */ 60 ierr = PetscSFSetUp_Basic(sf);CHKERRQ(ierr); 61 /* SFNeighbor specific */ 62 sf->persistent = PETSC_FALSE; 63 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 64 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 65 dat->rootdegree = nrootranks-ndrootranks; 66 dat->leafdegree = nleafranks-ndleafranks; 67 sf->nleafreqs = 0; 68 dat->nrootreqs = 1; 69 70 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 71 ierr = PetscMalloc4(dat->rootdegree,&dat->rootdispls,dat->rootdegree,&dat->rootcounts,dat->leafdegree,&dat->leafdispls,dat->leafdegree,&dat->leafcounts);CHKERRQ(ierr); 72 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 73 ierr = PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m);CHKERRQ(ierr); dat->rootdispls[j] = m; 74 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n);CHKERRQ(ierr); dat->rootcounts[j] = n; 75 } 76 77 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 78 ierr = PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m);CHKERRQ(ierr); dat->leafdispls[j] = m; 79 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n);CHKERRQ(ierr); dat->leafcounts[j] = n; 80 } 81 PetscFunctionReturn(0); 82 } 83 84 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 85 { 86 PetscErrorCode ierr; 87 PetscInt i; 88 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 89 90 PetscFunctionBegin; 91 if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 92 ierr = PetscFree4(dat->rootdispls,dat->rootcounts,dat->leafdispls,dat->leafcounts);CHKERRQ(ierr); 93 for (i=0; i<2; i++) { 94 if (dat->initialized[i]) { 95 ierr = MPI_Comm_free(&dat->comms[i]);CHKERRMPI(ierr); 96 dat->initialized[i] = PETSC_FALSE; 97 } 98 } 99 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); /* Common part */ 100 PetscFunctionReturn(0); 101 } 102 103 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 104 { 105 PetscErrorCode ierr; 106 107 PetscFunctionBegin; 108 ierr = PetscSFReset_Neighbor(sf);CHKERRQ(ierr); 109 ierr = PetscFree(sf->data);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 114 { 115 PetscErrorCode ierr; 116 PetscSFLink link; 117 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 118 MPI_Comm distcomm = MPI_COMM_NULL; 119 void *rootbuf = NULL,*leafbuf = NULL; 120 MPI_Request *req; 121 122 PetscFunctionBegin; 123 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 124 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 125 /* Do neighborhood alltoallv for remote ranks */ 126 ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 127 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&distcomm);CHKERRQ(ierr); 128 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 129 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 130 ierr = MPI_Start_ineighbor_alltoallv(dat->rootdegree,dat->leafdegree,rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);CHKERRMPI(ierr); 131 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 136 { 137 PetscErrorCode ierr; 138 PetscSFLink link; 139 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 140 MPI_Comm distcomm = MPI_COMM_NULL; 141 void *rootbuf = NULL,*leafbuf = NULL; 142 MPI_Request *req = NULL; 143 144 PetscFunctionBegin; 145 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 146 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 147 /* Do neighborhood alltoallv for remote ranks */ 148 ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 149 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT,&distcomm);CHKERRQ(ierr); 150 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 151 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 152 ierr = MPI_Start_ineighbor_alltoallv(dat->leafdegree,dat->rootdegree,leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);CHKERRMPI(ierr); 153 *out = link; 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 158 { 159 PetscErrorCode ierr; 160 PetscSFLink link = NULL; 161 162 PetscFunctionBegin; 163 ierr = PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 164 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr); 165 PetscFunctionReturn(0); 166 } 167 168 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 169 { 170 PetscErrorCode ierr; 171 PetscSFLink link = NULL; 172 173 PetscFunctionBegin; 174 ierr = PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 175 ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 180 { 181 PetscErrorCode ierr; 182 PetscSFLink link = NULL; 183 MPI_Comm comm = MPI_COMM_NULL; 184 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 185 void *rootbuf = NULL,*leafbuf = NULL; 186 187 PetscFunctionBegin; 188 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 189 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 190 /* Process remote fetch-and-op */ 191 ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr); 192 /* Bcast the updated rootbuf back to leaves */ 193 ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 194 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&comm);CHKERRQ(ierr); 195 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,NULL,NULL);CHKERRQ(ierr); 196 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 197 ierr = MPI_Start_neighbor_alltoallv(dat->rootdegree,dat->leafdegree,rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm);CHKERRMPI(ierr); 198 ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */);CHKERRQ(ierr); 199 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 200 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 204 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 205 { 206 PetscErrorCode ierr; 207 PetscSF_Neighbor *dat; 208 209 PetscFunctionBegin; 210 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 211 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 212 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 213 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 214 sf->ops->View = PetscSFView_Basic; 215 216 sf->ops->SetUp = PetscSFSetUp_Neighbor; 217 sf->ops->Reset = PetscSFReset_Neighbor; 218 sf->ops->Destroy = PetscSFDestroy_Neighbor; 219 sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor; 220 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 221 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor; 222 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 223 224 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 225 sf->data = (void*)dat; 226 PetscFunctionReturn(0); 227 } 228 #endif 229