1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /* A convenience temporary type */ 5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 6 typedef PetscInt PetscSFCount; 7 #else 8 typedef PetscMPIInt PetscSFCount; 9 #endif 10 11 typedef struct { 12 SFBASICHEADER; 13 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 14 PetscBool initialized[2]; /* Are the two communicators initialized? */ 15 PetscSFCount *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */ 16 PetscMPIInt *rootweights,*leafweights; 17 PetscInt rootdegree,leafdegree; 18 } PetscSF_Neighbor; 19 20 /*===================================================================================*/ 21 /* Internal utility routines */ 22 /*===================================================================================*/ 23 24 static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend,PetscSFCount *sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount* recvcnts,MPI_Datatype recvtype) 25 { 26 PetscFunctionBegin; 27 #if defined(PETSC_USE_LOG) 28 petsc_isend_ct += (PetscLogDouble)nsend; 29 petsc_irecv_ct += (PetscLogDouble)nrecv; 30 31 if (sendtype != MPI_DATATYPE_NULL) { 32 PetscMPIInt i,typesize; 33 PetscCallMPI(MPI_Type_size(sendtype,&typesize)); 34 for (i=0; i<nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i]*typesize); 35 } 36 37 if (recvtype != MPI_DATATYPE_NULL) { 38 PetscMPIInt i,typesize; 39 PetscCallMPI(MPI_Type_size(recvtype,&typesize)); 40 for (i=0; i<nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i]*typesize); 41 } 42 #endif 43 PetscFunctionReturn(0); 44 } 45 46 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 47 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm) 48 { 49 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 50 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 51 const PetscMPIInt *rootranks,*leafranks; 52 MPI_Comm comm; 53 54 PetscFunctionBegin; 55 PetscCall(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL)); /* Which ranks will access my roots (I am a destination) */ 56 PetscCall(PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL)); /* My leaves will access whose roots (I am a source) */ 57 58 if (!dat->initialized[direction]) { 59 const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks; 60 const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks; 61 MPI_Comm *mycomm = &dat->comms[direction]; 62 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 63 if (direction == PETSCSF_LEAF2ROOT) { 64 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootweights,outdegree,destinations,dat->leafweights,MPI_INFO_NULL,1/*reorder*/,mycomm)); 65 } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */ 66 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafweights,indegree,sources,dat->rootweights,MPI_INFO_NULL,1/*reorder*/,mycomm)); 67 } 68 dat->initialized[direction] = PETSC_TRUE; 69 } 70 *distcomm = dat->comms[direction]; 71 PetscFunctionReturn(0); 72 } 73 74 /*===================================================================================*/ 75 /* Implementations of SF public APIs */ 76 /*===================================================================================*/ 77 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 78 { 79 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 80 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 81 const PetscInt *rootoffset,*leafoffset; 82 PetscMPIInt m,n; 83 84 PetscFunctionBegin; 85 /* SFNeighbor inherits from Basic */ 86 PetscCall(PetscSFSetUp_Basic(sf)); 87 /* SFNeighbor specific */ 88 sf->persistent = PETSC_FALSE; 89 PetscCall(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL)); 90 PetscCall(PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL)); 91 dat->rootdegree = m = (PetscMPIInt)(nrootranks-ndrootranks); 92 dat->leafdegree = n = (PetscMPIInt)(nleafranks-ndleafranks); 93 sf->nleafreqs = 0; 94 dat->nrootreqs = 1; 95 96 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 97 PetscCall(PetscMalloc6(m,&dat->rootdispls,m,&dat->rootcounts,m,&dat->rootweights,n,&dat->leafdispls,n,&dat->leafcounts,n,&dat->leafweights)); 98 99 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 100 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 101 dat->rootdispls[j] = rootoffset[i]-rootoffset[ndrootranks]; 102 dat->rootcounts[j] = rootoffset[i+1]-rootoffset[i]; 103 dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); /* Scale to range of PetscMPIInt */ 104 } 105 106 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 107 dat->leafdispls[j] = leafoffset[i]-leafoffset[ndleafranks]; 108 dat->leafcounts[j] = leafoffset[i+1]-leafoffset[i]; 109 dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); 110 } 111 #else 112 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 113 PetscCall(PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m)); dat->rootdispls[j] = m; 114 PetscCall(PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n)); dat->rootcounts[j] = n; 115 dat->rootweights[j] = n; 116 } 117 118 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 119 PetscCall(PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m)); dat->leafdispls[j] = m; 120 PetscCall(PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n)); dat->leafcounts[j] = n; 121 dat->leafweights[j] = n; 122 } 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 128 { 129 PetscInt i; 130 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 131 132 PetscFunctionBegin; 133 PetscCheck(!dat->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 134 PetscCall(PetscFree6(dat->rootdispls,dat->rootcounts,dat->rootweights,dat->leafdispls,dat->leafcounts,dat->leafweights)); 135 for (i=0; i<2; i++) { 136 if (dat->initialized[i]) { 137 PetscCallMPI(MPI_Comm_free(&dat->comms[i])); 138 dat->initialized[i] = PETSC_FALSE; 139 } 140 } 141 PetscCall(PetscSFReset_Basic(sf)); /* Common part */ 142 PetscFunctionReturn(0); 143 } 144 145 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 146 { 147 PetscFunctionBegin; 148 PetscCall(PetscSFReset_Neighbor(sf)); 149 PetscCall(PetscFree(sf->data)); 150 PetscFunctionReturn(0); 151 } 152 153 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 154 { 155 PetscSFLink link; 156 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 157 MPI_Comm distcomm = MPI_COMM_NULL; 158 void *rootbuf = NULL,*leafbuf = NULL; 159 MPI_Request *req; 160 161 PetscFunctionBegin; 162 PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link)); 163 PetscCall(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata)); 164 /* Do neighborhood alltoallv for remote ranks */ 165 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 166 PetscCall(PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&distcomm)); 167 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL)); 168 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 169 /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */ 170 if (dat->rootdegree || dat->leafdegree) { 171 PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req)); 172 } 173 PetscCall(PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit)); 174 PetscCall(PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op)); 175 PetscFunctionReturn(0); 176 } 177 178 static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 179 { 180 PetscSFLink link; 181 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 182 MPI_Comm distcomm = MPI_COMM_NULL; 183 void *rootbuf = NULL,*leafbuf = NULL; 184 MPI_Request *req = NULL; 185 186 PetscFunctionBegin; 187 PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link)); 188 PetscCall(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata)); 189 /* Do neighborhood alltoallv for remote ranks */ 190 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 191 PetscCall(PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT,&distcomm)); 192 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL)); 193 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT)); 194 if (dat->rootdegree || dat->leafdegree) { 195 PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req)); 196 } 197 PetscCall(PetscLogMPIMessages(dat->leafdegree,dat->leafcounts,unit,dat->rootdegree,dat->rootcounts,unit)); 198 *out = link; 199 PetscFunctionReturn(0); 200 } 201 202 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 203 { 204 PetscSFLink link = NULL; 205 206 PetscFunctionBegin; 207 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link)); 208 PetscCall(PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op)); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 213 { 214 PetscSFLink link = NULL; 215 216 PetscFunctionBegin; 217 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link)); 218 PetscCall(PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op)); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 223 { 224 PetscSFLink link = NULL; 225 MPI_Comm comm = MPI_COMM_NULL; 226 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 227 void *rootbuf = NULL,*leafbuf = NULL; 228 229 PetscFunctionBegin; 230 PetscCall(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link)); 231 PetscCall(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT)); 232 /* Process remote fetch-and-op */ 233 PetscCall(PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op)); 234 /* Bcast the updated rootbuf back to leaves */ 235 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 236 PetscCall(PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&comm)); 237 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,NULL,NULL)); 238 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 239 if (dat->rootdegree || dat->leafdegree) { 240 PetscCallMPI(MPIU_Neighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm)); 241 } 242 PetscCall(PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit)); 243 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */)); 244 PetscCall(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE)); 245 PetscCall(PetscSFLinkReclaim(sf,&link)); 246 PetscFunctionReturn(0); 247 } 248 249 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 250 { 251 PetscSF_Neighbor *dat; 252 253 PetscFunctionBegin; 254 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 255 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 256 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 257 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 258 sf->ops->View = PetscSFView_Basic; 259 260 sf->ops->SetUp = PetscSFSetUp_Neighbor; 261 sf->ops->Reset = PetscSFReset_Neighbor; 262 sf->ops->Destroy = PetscSFDestroy_Neighbor; 263 sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor; 264 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 265 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor; 266 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 267 268 PetscCall(PetscNewLog(sf,&dat)); 269 sf->data = (void*)dat; 270 PetscFunctionReturn(0); 271 } 272