1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /* Convenience local types */ 5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 6 typedef MPI_Count PetscSFCount; 7 typedef MPI_Aint PetscSFAint; 8 #else 9 typedef PetscMPIInt PetscSFCount; 10 typedef PetscMPIInt PetscSFAint; 11 #endif 12 13 typedef struct { 14 SFBASICHEADER; 15 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 16 PetscBool initialized[2]; /* Are the two communicators initialized? */ 17 PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */ 18 PetscSFAint *rootdispls, *leafdispls; /* displs for non-distinguished ranks */ 19 PetscMPIInt *rootweights, *leafweights; 20 PetscInt rootdegree, leafdegree; 21 } PetscSF_Neighbor; 22 23 /*===================================================================================*/ 24 /* Internal utility routines */ 25 /*===================================================================================*/ 26 27 static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype) 28 { 29 PetscFunctionBegin; 30 #if defined(PETSC_USE_LOG) 31 petsc_isend_ct += (PetscLogDouble)nsend; 32 petsc_irecv_ct += (PetscLogDouble)nrecv; 33 34 if (sendtype != MPI_DATATYPE_NULL) { 35 PetscMPIInt i, typesize; 36 PetscCallMPI(MPI_Type_size(sendtype, &typesize)); 37 for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize); 38 } 39 40 if (recvtype != MPI_DATATYPE_NULL) { 41 PetscMPIInt i, typesize; 42 PetscCallMPI(MPI_Type_size(recvtype, &typesize)); 43 for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize); 44 } 45 #endif 46 PetscFunctionReturn(0); 47 } 48 49 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 50 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm) 51 { 52 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 53 PetscInt nrootranks, ndrootranks, nleafranks, ndleafranks; 54 const PetscMPIInt *rootranks, *leafranks; 55 MPI_Comm comm; 56 57 PetscFunctionBegin; 58 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL)); /* Which ranks will access my roots (I am a destination) */ 59 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL)); /* My leaves will access whose roots (I am a source) */ 60 61 if (!dat->initialized[direction]) { 62 const PetscMPIInt indegree = nrootranks - ndrootranks, *sources = rootranks + ndrootranks; 63 const PetscMPIInt outdegree = nleafranks - ndleafranks, *destinations = leafranks + ndleafranks; 64 MPI_Comm *mycomm = &dat->comms[direction]; 65 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 66 if (direction == PETSCSF_LEAF2ROOT) { 67 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm)); 68 } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */ 69 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm)); 70 } 71 dat->initialized[direction] = PETSC_TRUE; 72 } 73 *distcomm = dat->comms[direction]; 74 PetscFunctionReturn(0); 75 } 76 77 /*===================================================================================*/ 78 /* Implementations of SF public APIs */ 79 /*===================================================================================*/ 80 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 81 { 82 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 83 PetscInt i, j, nrootranks, ndrootranks, nleafranks, ndleafranks; 84 const PetscInt *rootoffset, *leafoffset; 85 PetscMPIInt m, n; 86 87 PetscFunctionBegin; 88 /* SFNeighbor inherits from Basic */ 89 PetscCall(PetscSFSetUp_Basic(sf)); 90 /* SFNeighbor specific */ 91 sf->persistent = PETSC_FALSE; 92 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 93 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 94 dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks); 95 dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks); 96 sf->nleafreqs = 0; 97 dat->nrootreqs = 1; 98 99 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 100 PetscCall(PetscMalloc6(m, &dat->rootdispls, m, &dat->rootcounts, m, &dat->rootweights, n, &dat->leafdispls, n, &dat->leafcounts, n, &dat->leafweights)); 101 102 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 103 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 104 dat->rootdispls[j] = rootoffset[i] - rootoffset[ndrootranks]; 105 dat->rootcounts[j] = rootoffset[i + 1] - rootoffset[i]; 106 dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); /* Scale to range of PetscMPIInt */ 107 } 108 109 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 110 dat->leafdispls[j] = leafoffset[i] - leafoffset[ndleafranks]; 111 dat->leafcounts[j] = leafoffset[i + 1] - leafoffset[i]; 112 dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); 113 } 114 #else 115 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 116 PetscCall(PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m)); 117 dat->rootdispls[j] = m; 118 PetscCall(PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n)); 119 dat->rootcounts[j] = n; 120 dat->rootweights[j] = n; 121 } 122 123 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 124 PetscCall(PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m)); 125 dat->leafdispls[j] = m; 126 PetscCall(PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n)); 127 dat->leafcounts[j] = n; 128 dat->leafweights[j] = n; 129 } 130 #endif 131 PetscFunctionReturn(0); 132 } 133 134 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 135 { 136 PetscInt i; 137 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 138 139 PetscFunctionBegin; 140 PetscCheck(!dat->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 141 PetscCall(PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights)); 142 for (i = 0; i < 2; i++) { 143 if (dat->initialized[i]) { 144 PetscCallMPI(MPI_Comm_free(&dat->comms[i])); 145 dat->initialized[i] = PETSC_FALSE; 146 } 147 } 148 PetscCall(PetscSFReset_Basic(sf)); /* Common part */ 149 PetscFunctionReturn(0); 150 } 151 152 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 153 { 154 PetscFunctionBegin; 155 PetscCall(PetscSFReset_Neighbor(sf)); 156 PetscCall(PetscFree(sf->data)); 157 PetscFunctionReturn(0); 158 } 159 160 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 161 { 162 PetscSFLink link; 163 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 164 MPI_Comm distcomm = MPI_COMM_NULL; 165 void *rootbuf = NULL, *leafbuf = NULL; 166 MPI_Request *req; 167 168 PetscFunctionBegin; 169 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 170 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 171 /* Do neighborhood alltoallv for remote ranks */ 172 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 173 PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_ROOT2LEAF, &distcomm)); 174 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 175 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 176 /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */ 177 if (dat->rootdegree || dat->leafdegree) PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, distcomm, req)); 178 PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit)); 179 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 180 PetscFunctionReturn(0); 181 } 182 183 static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) 184 { 185 PetscSFLink link; 186 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 187 MPI_Comm distcomm = MPI_COMM_NULL; 188 void *rootbuf = NULL, *leafbuf = NULL; 189 MPI_Request *req = NULL; 190 191 PetscFunctionBegin; 192 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 193 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 194 /* Do neighborhood alltoallv for remote ranks */ 195 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 196 PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_LEAF2ROOT, &distcomm)); 197 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 198 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 199 if (dat->rootdegree || dat->leafdegree) PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, unit, rootbuf, dat->rootcounts, dat->rootdispls, unit, distcomm, req)); 200 PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, unit, dat->rootdegree, dat->rootcounts, unit)); 201 *out = link; 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 206 { 207 PetscSFLink link = NULL; 208 209 PetscFunctionBegin; 210 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 211 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 216 { 217 PetscSFLink link = NULL; 218 219 PetscFunctionBegin; 220 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 221 PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 226 { 227 PetscSFLink link = NULL; 228 MPI_Comm comm = MPI_COMM_NULL; 229 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 230 void *rootbuf = NULL, *leafbuf = NULL; 231 232 PetscFunctionBegin; 233 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 234 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 235 /* Process remote fetch-and-op */ 236 PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 237 /* Bcast the updated rootbuf back to leaves */ 238 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 239 PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_ROOT2LEAF, &comm)); 240 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, NULL, NULL)); 241 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 242 if (dat->rootdegree || dat->leafdegree) PetscCallMPI(MPIU_Neighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, comm)); 243 PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit)); 244 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE /* host2device after recving */)); 245 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 246 PetscCall(PetscSFLinkReclaim(sf, &link)); 247 PetscFunctionReturn(0); 248 } 249 250 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 251 { 252 PetscSF_Neighbor *dat; 253 254 PetscFunctionBegin; 255 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 256 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 257 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 258 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 259 sf->ops->View = PetscSFView_Basic; 260 261 sf->ops->SetUp = PetscSFSetUp_Neighbor; 262 sf->ops->Reset = PetscSFReset_Neighbor; 263 sf->ops->Destroy = PetscSFDestroy_Neighbor; 264 sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor; 265 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 266 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor; 267 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 268 269 PetscCall(PetscNew(&dat)); 270 sf->data = (void *)dat; 271 PetscFunctionReturn(0); 272 } 273