1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /* A convenience temporary type */ 5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 6 typedef PetscInt PetscSFCount; 7 #else 8 typedef PetscMPIInt PetscSFCount; 9 #endif 10 11 typedef struct { 12 SFBASICHEADER; 13 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 14 PetscBool initialized[2]; /* Are the two communicators initialized? */ 15 PetscSFCount *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */ 16 PetscMPIInt *rootweights,*leafweights; 17 PetscInt rootdegree,leafdegree; 18 } PetscSF_Neighbor; 19 20 /*===================================================================================*/ 21 /* Internal utility routines */ 22 /*===================================================================================*/ 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscLogMPIMessages(PetscInt nsend,PetscSFCount *sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount* recvcnts,MPI_Datatype recvtype) 25 { 26 PetscFunctionBegin; 27 #if defined(PETSC_USE_LOG) 28 petsc_isend_ct += (PetscLogDouble)nsend; 29 petsc_irecv_ct += (PetscLogDouble)nrecv; 30 31 if (sendtype != MPI_DATATYPE_NULL) { 32 PetscErrorCode ierr; 33 PetscMPIInt i,typesize; 34 ierr = MPI_Type_size(sendtype,&typesize);CHKERRMPI(ierr); 35 for (i=0; i<nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i]*typesize); 36 } 37 38 if (recvtype != MPI_DATATYPE_NULL) { 39 PetscErrorCode ierr; 40 PetscMPIInt i,typesize; 41 ierr = MPI_Type_size(recvtype,&typesize);CHKERRMPI(ierr); 42 for (i=0; i<nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i]*typesize); 43 } 44 #endif 45 PetscFunctionReturn(0); 46 } 47 48 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 49 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm) 50 { 51 PetscErrorCode ierr; 52 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 53 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 54 const PetscMPIInt *rootranks,*leafranks; 55 MPI_Comm comm; 56 57 PetscFunctionBegin; 58 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL);CHKERRQ(ierr); /* Which ranks will access my roots (I am a destination) */ 59 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL);CHKERRQ(ierr); /* My leaves will access whose roots (I am a source) */ 60 61 if (!dat->initialized[direction]) { 62 const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks; 63 const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks; 64 MPI_Comm *mycomm = &dat->comms[direction]; 65 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 66 if (direction == PETSCSF_LEAF2ROOT) { 67 ierr = MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootweights,outdegree,destinations,dat->leafweights,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRMPI(ierr); 68 } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */ 69 ierr = MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafweights,indegree,sources,dat->rootweights,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRMPI(ierr); 70 } 71 dat->initialized[direction] = PETSC_TRUE; 72 } 73 *distcomm = dat->comms[direction]; 74 PetscFunctionReturn(0); 75 } 76 77 /*===================================================================================*/ 78 /* Implementations of SF public APIs */ 79 /*===================================================================================*/ 80 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 81 { 82 PetscErrorCode ierr; 83 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 84 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 85 const PetscInt *rootoffset,*leafoffset; 86 PetscMPIInt m,n; 87 88 PetscFunctionBegin; 89 /* SFNeighbor inherits from Basic */ 90 ierr = PetscSFSetUp_Basic(sf);CHKERRQ(ierr); 91 /* SFNeighbor specific */ 92 sf->persistent = PETSC_FALSE; 93 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 94 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 95 dat->rootdegree = m = (PetscMPIInt)(nrootranks-ndrootranks); 96 dat->leafdegree = n = (PetscMPIInt)(nleafranks-ndleafranks); 97 sf->nleafreqs = 0; 98 dat->nrootreqs = 1; 99 100 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 101 ierr = PetscMalloc6(m,&dat->rootdispls,m,&dat->rootcounts,m,&dat->rootweights,n,&dat->leafdispls,n,&dat->leafcounts,n,&dat->leafweights);CHKERRQ(ierr); 102 103 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES) 104 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 105 dat->rootdispls[j] = rootoffset[i]-rootoffset[ndrootranks]; 106 dat->rootcounts[j] = rootoffset[i+1]-rootoffset[i]; 107 dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); /* Scale to range of PetscMPIInt */ 108 } 109 110 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 111 dat->leafdispls[j] = leafoffset[i]-leafoffset[ndleafranks]; 112 dat->leafcounts[j] = leafoffset[i+1]-leafoffset[i]; 113 dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); 114 } 115 #else 116 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 117 ierr = PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m);CHKERRQ(ierr); dat->rootdispls[j] = m; 118 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n);CHKERRQ(ierr); dat->rootcounts[j] = n; 119 dat->rootweights[j] = n; 120 } 121 122 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 123 ierr = PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m);CHKERRQ(ierr); dat->leafdispls[j] = m; 124 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n);CHKERRQ(ierr); dat->leafcounts[j] = n; 125 dat->leafweights[j] = n; 126 } 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 132 { 133 PetscErrorCode ierr; 134 PetscInt i; 135 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 136 137 PetscFunctionBegin; 138 if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 139 ierr = PetscFree6(dat->rootdispls,dat->rootcounts,dat->rootweights,dat->leafdispls,dat->leafcounts,dat->leafweights);CHKERRQ(ierr); 140 for (i=0; i<2; i++) { 141 if (dat->initialized[i]) { 142 ierr = MPI_Comm_free(&dat->comms[i]);CHKERRMPI(ierr); 143 dat->initialized[i] = PETSC_FALSE; 144 } 145 } 146 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); /* Common part */ 147 PetscFunctionReturn(0); 148 } 149 150 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 151 { 152 PetscErrorCode ierr; 153 154 PetscFunctionBegin; 155 ierr = PetscSFReset_Neighbor(sf);CHKERRQ(ierr); 156 ierr = PetscFree(sf->data);CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 161 { 162 PetscErrorCode ierr; 163 PetscSFLink link; 164 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 165 MPI_Comm distcomm = MPI_COMM_NULL; 166 void *rootbuf = NULL,*leafbuf = NULL; 167 MPI_Request *req; 168 169 PetscFunctionBegin; 170 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 171 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 172 /* Do neighborhood alltoallv for remote ranks */ 173 ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 174 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&distcomm);CHKERRQ(ierr); 175 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 176 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 177 /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */ 178 if (dat->rootdegree || dat->leafdegree) { 179 ierr = MPIU_Ineighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);CHKERRMPI(ierr); 180 } 181 ierr = PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);CHKERRQ(ierr); 182 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 187 { 188 PetscErrorCode ierr; 189 PetscSFLink link; 190 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 191 MPI_Comm distcomm = MPI_COMM_NULL; 192 void *rootbuf = NULL,*leafbuf = NULL; 193 MPI_Request *req = NULL; 194 195 PetscFunctionBegin; 196 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 197 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 198 /* Do neighborhood alltoallv for remote ranks */ 199 ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 200 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT,&distcomm);CHKERRQ(ierr); 201 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 202 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 203 if (dat->rootdegree || dat->leafdegree) { 204 ierr = MPIU_Ineighbor_alltoallv(leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);CHKERRMPI(ierr); 205 } 206 ierr = PetscLogMPIMessages(dat->leafdegree,dat->leafcounts,unit,dat->rootdegree,dat->rootcounts,unit);CHKERRQ(ierr); 207 *out = link; 208 PetscFunctionReturn(0); 209 } 210 211 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 212 { 213 PetscErrorCode ierr; 214 PetscSFLink link = NULL; 215 216 PetscFunctionBegin; 217 ierr = PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 218 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 223 { 224 PetscErrorCode ierr; 225 PetscSFLink link = NULL; 226 227 PetscFunctionBegin; 228 ierr = PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 229 ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 234 { 235 PetscErrorCode ierr; 236 PetscSFLink link = NULL; 237 MPI_Comm comm = MPI_COMM_NULL; 238 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 239 void *rootbuf = NULL,*leafbuf = NULL; 240 241 PetscFunctionBegin; 242 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 243 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 244 /* Process remote fetch-and-op */ 245 ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr); 246 /* Bcast the updated rootbuf back to leaves */ 247 ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 248 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&comm);CHKERRQ(ierr); 249 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,NULL,NULL);CHKERRQ(ierr); 250 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 251 if (dat->rootdegree || dat->leafdegree) { 252 ierr = MPIU_Neighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm);CHKERRMPI(ierr); 253 } 254 ierr = PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);CHKERRQ(ierr); 255 ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */);CHKERRQ(ierr); 256 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 257 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 262 { 263 PetscErrorCode ierr; 264 PetscSF_Neighbor *dat; 265 266 PetscFunctionBegin; 267 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 268 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 269 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 270 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 271 sf->ops->View = PetscSFView_Basic; 272 273 sf->ops->SetUp = PetscSFSetUp_Neighbor; 274 sf->ops->Reset = PetscSFReset_Neighbor; 275 sf->ops->Destroy = PetscSFDestroy_Neighbor; 276 sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor; 277 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 278 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor; 279 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 280 281 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 282 sf->data = (void*)dat; 283 PetscFunctionReturn(0); 284 } 285