1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /* Convenience local types */ 5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 6 typedef MPI_Count PetscSFCount; 7 typedef MPI_Aint PetscSFAint; 8 #else 9 typedef PetscMPIInt PetscSFCount; 10 typedef PetscMPIInt PetscSFAint; 11 #endif 12 13 typedef struct { 14 SFBASICHEADER; 15 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 16 PetscBool initialized[2]; /* Are the two communicators initialized? */ 17 PetscSFCount *rootcounts,*leafcounts; /* counts for non-distinguished ranks */ 18 PetscSFAint *rootdispls,*leafdispls; /* displs for non-distinguished ranks */ 19 PetscMPIInt *rootweights,*leafweights; 20 PetscInt rootdegree,leafdegree; 21 } PetscSF_Neighbor; 22 23 /*===================================================================================*/ 24 /* Internal utility routines */ 25 /*===================================================================================*/ 26 27 static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend,PetscSFCount *sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount* recvcnts,MPI_Datatype recvtype) 28 { 29 PetscFunctionBegin; 30 #if defined(PETSC_USE_LOG) 31 petsc_isend_ct += (PetscLogDouble)nsend; 32 petsc_irecv_ct += (PetscLogDouble)nrecv; 33 34 if (sendtype != MPI_DATATYPE_NULL) { 35 PetscMPIInt i,typesize; 36 PetscCallMPI(MPI_Type_size(sendtype,&typesize)); 37 for (i=0; i<nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i]*typesize); 38 } 39 40 if (recvtype != MPI_DATATYPE_NULL) { 41 PetscMPIInt i,typesize; 42 PetscCallMPI(MPI_Type_size(recvtype,&typesize)); 43 for (i=0; i<nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i]*typesize); 44 } 45 #endif 46 PetscFunctionReturn(0); 47 } 48 49 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 50 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm) 51 { 52 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 53 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 54 const PetscMPIInt *rootranks,*leafranks; 55 MPI_Comm comm; 56 57 PetscFunctionBegin; 58 PetscCall(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL)); /* Which ranks will access my roots (I am a destination) */ 59 PetscCall(PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL)); /* My leaves will access whose roots (I am a source) */ 60 61 if (!dat->initialized[direction]) { 62 const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks; 63 const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks; 64 MPI_Comm *mycomm = &dat->comms[direction]; 65 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 66 if (direction == PETSCSF_LEAF2ROOT) { 67 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootweights,outdegree,destinations,dat->leafweights,MPI_INFO_NULL,1/*reorder*/,mycomm)); 68 } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */ 69 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafweights,indegree,sources,dat->rootweights,MPI_INFO_NULL,1/*reorder*/,mycomm)); 70 } 71 dat->initialized[direction] = PETSC_TRUE; 72 } 73 *distcomm = dat->comms[direction]; 74 PetscFunctionReturn(0); 75 } 76 77 /*===================================================================================*/ 78 /* Implementations of SF public APIs */ 79 /*===================================================================================*/ 80 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 81 { 82 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 83 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 84 const PetscInt *rootoffset,*leafoffset; 85 PetscMPIInt m,n; 86 87 PetscFunctionBegin; 88 /* SFNeighbor inherits from Basic */ 89 PetscCall(PetscSFSetUp_Basic(sf)); 90 /* SFNeighbor specific */ 91 sf->persistent = PETSC_FALSE; 92 PetscCall(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL)); 93 PetscCall(PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL)); 94 dat->rootdegree = m = (PetscMPIInt)(nrootranks-ndrootranks); 95 dat->leafdegree = n = (PetscMPIInt)(nleafranks-ndleafranks); 96 sf->nleafreqs = 0; 97 dat->nrootreqs = 1; 98 99 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 100 PetscCall(PetscMalloc6(m,&dat->rootdispls,m,&dat->rootcounts,m,&dat->rootweights,n,&dat->leafdispls,n,&dat->leafcounts,n,&dat->leafweights)); 101 102 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 103 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 104 dat->rootdispls[j] = rootoffset[i]-rootoffset[ndrootranks]; 105 dat->rootcounts[j] = rootoffset[i+1]-rootoffset[i]; 106 dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); /* Scale to range of PetscMPIInt */ 107 } 108 109 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 110 dat->leafdispls[j] = leafoffset[i]-leafoffset[ndleafranks]; 111 dat->leafcounts[j] = leafoffset[i+1]-leafoffset[i]; 112 dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); 113 } 114 #else 115 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 116 PetscCall(PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m)); dat->rootdispls[j] = m; 117 PetscCall(PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n)); dat->rootcounts[j] = n; 118 dat->rootweights[j] = n; 119 } 120 121 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 122 PetscCall(PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m)); dat->leafdispls[j] = m; 123 PetscCall(PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n)); dat->leafcounts[j] = n; 124 dat->leafweights[j] = n; 125 } 126 #endif 127 PetscFunctionReturn(0); 128 } 129 130 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 131 { 132 PetscInt i; 133 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 134 135 PetscFunctionBegin; 136 PetscCheck(!dat->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 137 PetscCall(PetscFree6(dat->rootdispls,dat->rootcounts,dat->rootweights,dat->leafdispls,dat->leafcounts,dat->leafweights)); 138 for (i=0; i<2; i++) { 139 if (dat->initialized[i]) { 140 PetscCallMPI(MPI_Comm_free(&dat->comms[i])); 141 dat->initialized[i] = PETSC_FALSE; 142 } 143 } 144 PetscCall(PetscSFReset_Basic(sf)); /* Common part */ 145 PetscFunctionReturn(0); 146 } 147 148 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 149 { 150 PetscFunctionBegin; 151 PetscCall(PetscSFReset_Neighbor(sf)); 152 PetscCall(PetscFree(sf->data)); 153 PetscFunctionReturn(0); 154 } 155 156 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 157 { 158 PetscSFLink link; 159 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 160 MPI_Comm distcomm = MPI_COMM_NULL; 161 void *rootbuf = NULL,*leafbuf = NULL; 162 MPI_Request *req; 163 164 PetscFunctionBegin; 165 PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link)); 166 PetscCall(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata)); 167 /* Do neighborhood alltoallv for remote ranks */ 168 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 169 PetscCall(PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&distcomm)); 170 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL)); 171 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 172 /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */ 173 if (dat->rootdegree || dat->leafdegree) { 174 PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req)); 175 } 176 PetscCall(PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit)); 177 PetscCall(PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op)); 178 PetscFunctionReturn(0); 179 } 180 181 static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 182 { 183 PetscSFLink link; 184 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 185 MPI_Comm distcomm = MPI_COMM_NULL; 186 void *rootbuf = NULL,*leafbuf = NULL; 187 MPI_Request *req = NULL; 188 189 PetscFunctionBegin; 190 PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link)); 191 PetscCall(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata)); 192 /* Do neighborhood alltoallv for remote ranks */ 193 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 194 PetscCall(PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT,&distcomm)); 195 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL)); 196 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT)); 197 if (dat->rootdegree || dat->leafdegree) { 198 PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req)); 199 } 200 PetscCall(PetscLogMPIMessages(dat->leafdegree,dat->leafcounts,unit,dat->rootdegree,dat->rootcounts,unit)); 201 *out = link; 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 206 { 207 PetscSFLink link = NULL; 208 209 PetscFunctionBegin; 210 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link)); 211 PetscCall(PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op)); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 216 { 217 PetscSFLink link = NULL; 218 219 PetscFunctionBegin; 220 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link)); 221 PetscCall(PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op)); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 226 { 227 PetscSFLink link = NULL; 228 MPI_Comm comm = MPI_COMM_NULL; 229 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 230 void *rootbuf = NULL,*leafbuf = NULL; 231 232 PetscFunctionBegin; 233 PetscCall(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link)); 234 PetscCall(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT)); 235 /* Process remote fetch-and-op */ 236 PetscCall(PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op)); 237 /* Bcast the updated rootbuf back to leaves */ 238 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 239 PetscCall(PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&comm)); 240 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,NULL,NULL)); 241 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 242 if (dat->rootdegree || dat->leafdegree) { 243 PetscCallMPI(MPIU_Neighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm)); 244 } 245 PetscCall(PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit)); 246 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */)); 247 PetscCall(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE)); 248 PetscCall(PetscSFLinkReclaim(sf,&link)); 249 PetscFunctionReturn(0); 250 } 251 252 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 253 { 254 PetscSF_Neighbor *dat; 255 256 PetscFunctionBegin; 257 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 258 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 259 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 260 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 261 sf->ops->View = PetscSFView_Basic; 262 263 sf->ops->SetUp = PetscSFSetUp_Neighbor; 264 sf->ops->Reset = PetscSFReset_Neighbor; 265 sf->ops->Destroy = PetscSFDestroy_Neighbor; 266 sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor; 267 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 268 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor; 269 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 270 271 PetscCall(PetscNewLog(sf,&dat)); 272 sf->data = (void*)dat; 273 PetscFunctionReturn(0); 274 } 275