1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /* Convenience local types */ 5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 6 typedef MPI_Count PetscSFCount; 7 typedef MPI_Aint PetscSFAint; 8 #else 9 typedef PetscMPIInt PetscSFCount; 10 typedef PetscMPIInt PetscSFAint; 11 #endif 12 13 typedef struct { 14 SFBASICHEADER; 15 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 16 PetscBool initialized[2]; /* Are the two communicators initialized? */ 17 PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */ 18 PetscSFAint *rootdispls, *leafdispls; /* displs for non-distinguished ranks */ 19 PetscMPIInt *rootweights, *leafweights; 20 PetscInt rootdegree, leafdegree; 21 } PetscSF_Neighbor; 22 23 /*===================================================================================*/ 24 /* Internal utility routines */ 25 /*===================================================================================*/ 26 27 static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype) { 28 PetscFunctionBegin; 29 #if defined(PETSC_USE_LOG) 30 petsc_isend_ct += (PetscLogDouble)nsend; 31 petsc_irecv_ct += (PetscLogDouble)nrecv; 32 33 if (sendtype != MPI_DATATYPE_NULL) { 34 PetscMPIInt i, typesize; 35 PetscCallMPI(MPI_Type_size(sendtype, &typesize)); 36 for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize); 37 } 38 39 if (recvtype != MPI_DATATYPE_NULL) { 40 PetscMPIInt i, typesize; 41 PetscCallMPI(MPI_Type_size(recvtype, &typesize)); 42 for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize); 43 } 44 #endif 45 PetscFunctionReturn(0); 46 } 47 48 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 49 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm) { 50 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 51 PetscInt nrootranks, ndrootranks, nleafranks, ndleafranks; 52 const PetscMPIInt *rootranks, *leafranks; 53 MPI_Comm comm; 54 55 PetscFunctionBegin; 56 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL)); /* Which ranks will access my roots (I am a destination) */ 57 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL)); /* My leaves will access whose roots (I am a source) */ 58 59 if (!dat->initialized[direction]) { 60 const PetscMPIInt indegree = nrootranks - ndrootranks, *sources = rootranks + ndrootranks; 61 const PetscMPIInt outdegree = nleafranks - ndleafranks, *destinations = leafranks + ndleafranks; 62 MPI_Comm *mycomm = &dat->comms[direction]; 63 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 64 if (direction == PETSCSF_LEAF2ROOT) { 65 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm)); 66 } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */ 67 PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm)); 68 } 69 dat->initialized[direction] = PETSC_TRUE; 70 } 71 *distcomm = dat->comms[direction]; 72 PetscFunctionReturn(0); 73 } 74 75 /*===================================================================================*/ 76 /* Implementations of SF public APIs */ 77 /*===================================================================================*/ 78 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) { 79 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 80 PetscInt i, j, nrootranks, ndrootranks, nleafranks, ndleafranks; 81 const PetscInt *rootoffset, *leafoffset; 82 PetscMPIInt m, n; 83 84 PetscFunctionBegin; 85 /* SFNeighbor inherits from Basic */ 86 PetscCall(PetscSFSetUp_Basic(sf)); 87 /* SFNeighbor specific */ 88 sf->persistent = PETSC_FALSE; 89 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 90 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 91 dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks); 92 dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks); 93 sf->nleafreqs = 0; 94 dat->nrootreqs = 1; 95 96 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 97 PetscCall(PetscMalloc6(m, &dat->rootdispls, m, &dat->rootcounts, m, &dat->rootweights, n, &dat->leafdispls, n, &dat->leafcounts, n, &dat->leafweights)); 98 99 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 100 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 101 dat->rootdispls[j] = rootoffset[i] - rootoffset[ndrootranks]; 102 dat->rootcounts[j] = rootoffset[i + 1] - rootoffset[i]; 103 dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); /* Scale to range of PetscMPIInt */ 104 } 105 106 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 107 dat->leafdispls[j] = leafoffset[i] - leafoffset[ndleafranks]; 108 dat->leafcounts[j] = leafoffset[i + 1] - leafoffset[i]; 109 dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); 110 } 111 #else 112 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 113 PetscCall(PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m)); 114 dat->rootdispls[j] = m; 115 PetscCall(PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n)); 116 dat->rootcounts[j] = n; 117 dat->rootweights[j] = n; 118 } 119 120 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 121 PetscCall(PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m)); 122 dat->leafdispls[j] = m; 123 PetscCall(PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n)); 124 dat->leafcounts[j] = n; 125 dat->leafweights[j] = n; 126 } 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) { 132 PetscInt i; 133 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 134 135 PetscFunctionBegin; 136 PetscCheck(!dat->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 137 PetscCall(PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights)); 138 for (i = 0; i < 2; i++) { 139 if (dat->initialized[i]) { 140 PetscCallMPI(MPI_Comm_free(&dat->comms[i])); 141 dat->initialized[i] = PETSC_FALSE; 142 } 143 } 144 PetscCall(PetscSFReset_Basic(sf)); /* Common part */ 145 PetscFunctionReturn(0); 146 } 147 148 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) { 149 PetscFunctionBegin; 150 PetscCall(PetscSFReset_Neighbor(sf)); 151 PetscCall(PetscFree(sf->data)); 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 156 PetscSFLink link; 157 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 158 MPI_Comm distcomm = MPI_COMM_NULL; 159 void *rootbuf = NULL, *leafbuf = NULL; 160 MPI_Request *req; 161 162 PetscFunctionBegin; 163 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 164 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 165 /* Do neighborhood alltoallv for remote ranks */ 166 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 167 PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_ROOT2LEAF, &distcomm)); 168 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 169 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 170 /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */ 171 if (dat->rootdegree || dat->leafdegree) { PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, distcomm, req)); } 172 PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit)); 173 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 174 PetscFunctionReturn(0); 175 } 176 177 static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) { 178 PetscSFLink link; 179 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 180 MPI_Comm distcomm = MPI_COMM_NULL; 181 void *rootbuf = NULL, *leafbuf = NULL; 182 MPI_Request *req = NULL; 183 184 PetscFunctionBegin; 185 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 186 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 187 /* Do neighborhood alltoallv for remote ranks */ 188 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 189 PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_LEAF2ROOT, &distcomm)); 190 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 191 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 192 if (dat->rootdegree || dat->leafdegree) { PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, unit, rootbuf, dat->rootcounts, dat->rootdispls, unit, distcomm, req)); } 193 PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, unit, dat->rootdegree, dat->rootcounts, unit)); 194 *out = link; 195 PetscFunctionReturn(0); 196 } 197 198 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 199 PetscSFLink link = NULL; 200 201 PetscFunctionBegin; 202 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 203 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) { 208 PetscSFLink link = NULL; 209 210 PetscFunctionBegin; 211 PetscCall(PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 212 PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 217 PetscSFLink link = NULL; 218 MPI_Comm comm = MPI_COMM_NULL; 219 PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data; 220 void *rootbuf = NULL, *leafbuf = NULL; 221 222 PetscFunctionBegin; 223 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 224 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 225 /* Process remote fetch-and-op */ 226 PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 227 /* Bcast the updated rootbuf back to leaves */ 228 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 229 PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_ROOT2LEAF, &comm)); 230 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, NULL, NULL)); 231 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 232 if (dat->rootdegree || dat->leafdegree) { PetscCallMPI(MPIU_Neighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, comm)); } 233 PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit)); 234 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE /* host2device after recving */)); 235 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 236 PetscCall(PetscSFLinkReclaim(sf, &link)); 237 PetscFunctionReturn(0); 238 } 239 240 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) { 241 PetscSF_Neighbor *dat; 242 243 PetscFunctionBegin; 244 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 245 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 246 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 247 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 248 sf->ops->View = PetscSFView_Basic; 249 250 sf->ops->SetUp = PetscSFSetUp_Neighbor; 251 sf->ops->Reset = PetscSFReset_Neighbor; 252 sf->ops->Destroy = PetscSFDestroy_Neighbor; 253 sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor; 254 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 255 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor; 256 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 257 258 PetscCall(PetscNewLog(sf, &dat)); 259 sf->data = (void *)dat; 260 PetscFunctionReturn(0); 261 } 262