1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 #if defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) 5 6 /* SF Neighbor completely relies on the two sided info built by SF Basic. Therefore we build Neighbor as a subclass of Basic */ 7 8 typedef struct { 9 SPPACKBASICHEADER; 10 char *rootbuf; /* contiguous buffer for all root ranks */ 11 char *leafbuf; /* contiguous buffer for all non-distinguished leaf ranks. Distiguished ones share root buffers. */ 12 } *PetscSFPack_Neighbor; 13 14 typedef struct { 15 SFBASICHEADER; 16 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 17 PetscBool initialized[2]; /* Are the two communicators initialized? */ 18 PetscMPIInt *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */ 19 } PetscSF_Neighbor; 20 21 /*===================================================================================*/ 22 /* Internal utility routines */ 23 /*===================================================================================*/ 24 25 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 26 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm) 27 { 28 PetscErrorCode ierr; 29 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 30 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 31 const PetscMPIInt *rootranks,*leafranks; 32 MPI_Comm comm; 33 34 PetscFunctionBegin; 35 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL);CHKERRQ(ierr); /* Which ranks will access my roots (I am a destination) */ 36 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL);CHKERRQ(ierr); /* My leaves will access whose roots (I am a source) */ 37 38 if (!dat->initialized[direction]) { 39 const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks; 40 const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks; 41 MPI_Comm *mycomm = &dat->comms[direction]; 42 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 43 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 44 ierr = MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootcounts/*src weights*/,outdegree,destinations,dat->leafcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRQ(ierr); 45 } else { /* PETSCSF_ROOT2LEAF_BCAST, reverse src & dest */ 46 ierr = MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafcounts/*src weights*/,indegree,sources,dat->rootcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRQ(ierr); 47 } 48 dat->initialized[direction] = PETSC_TRUE; 49 } 50 *distcomm = dat->comms[direction]; 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PetscSFPackGet_Neighbor(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Neighbor *mylink) 55 { 56 PetscErrorCode ierr; 57 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 58 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 59 const PetscInt *rootoffset,*leafoffset; 60 PetscSFPack *p; 61 PetscSFPack_Neighbor link; 62 63 PetscFunctionBegin; 64 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 65 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 66 67 /* Look for types in cache */ 68 for (p=&dat->avail; (link=(PetscSFPack_Neighbor)*p); p=&link->next) { 69 PetscBool match; 70 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 71 if (match) { 72 *p = link->next; /* Remove from available list */ 73 goto found; 74 } 75 } 76 77 ierr = PetscNew(&link);CHKERRQ(ierr); 78 ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 79 ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 80 /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */ 81 link->half = 1; 82 ierr = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr); 83 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* Actually, tag is not need for neighborhood collectives */ 84 85 /* Allocate root and leaf buffers */ 86 ierr = PetscMalloc2(rootoffset[nrootranks]*link->unitbytes,&link->rootbuf,(leafoffset[nleafranks]-leafoffset[ndleafranks])*link->unitbytes,&link->leafbuf);CHKERRQ(ierr); 87 for (i=0; i<nrootranks; i++) link->root[i] = link->rootbuf + rootoffset[i]*link->unitbytes; 88 for (i=0; i<nleafranks; i++) { 89 if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 90 if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 91 link->leaf[i] = link->root[0]; 92 continue; 93 } 94 link->leaf[i] = link->leafbuf + (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 95 } 96 97 found: 98 link->key = key; 99 link->next = dat->inuse; 100 dat->inuse = (PetscSFPack)link; 101 *mylink = link; 102 PetscFunctionReturn(0); 103 } 104 105 /*===================================================================================*/ 106 /* Implementations of SF public APIs */ 107 /*===================================================================================*/ 108 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 109 { 110 PetscErrorCode ierr; 111 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 112 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 113 const PetscInt *rootoffset,*leafoffset; 114 PetscMPIInt m,n; 115 116 PetscFunctionBegin; 117 ierr = PetscSFSetUp_Basic(sf);CHKERRQ(ierr); 118 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 119 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 120 121 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 122 ierr = PetscMalloc4(nrootranks-ndrootranks,&dat->rootdispls,nrootranks-ndrootranks,&dat->rootcounts,nleafranks-ndleafranks,&dat->leafdispls,nleafranks-ndleafranks,&dat->leafcounts);CHKERRQ(ierr); 123 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 124 ierr = PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m);CHKERRQ(ierr); dat->rootdispls[j] = m; 125 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n);CHKERRQ(ierr); dat->rootcounts[j] = n; 126 } 127 128 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 129 ierr = PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m);CHKERRQ(ierr); dat->leafdispls[j] = m; 130 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n);CHKERRQ(ierr); dat->leafcounts[j] = n; 131 } 132 PetscFunctionReturn(0); 133 } 134 135 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 136 { 137 PetscErrorCode ierr; 138 PetscInt i; 139 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 140 PetscSFPack_Neighbor link,next; 141 142 PetscFunctionBegin; 143 if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 144 ierr = PetscFree4(dat->rootdispls,dat->rootcounts,dat->leafdispls,dat->leafcounts);CHKERRQ(ierr); 145 for (i=0; i<2; i++) { 146 if (dat->initialized[i]) { 147 ierr = MPI_Comm_free(&dat->comms[i]);CHKERRQ(ierr); 148 dat->initialized[i] = PETSC_FALSE; 149 } 150 } 151 152 ierr = PetscFree2(dat->iranks,dat->ioffset);CHKERRQ(ierr); 153 ierr = PetscFree(dat->irootloc);CHKERRQ(ierr); 154 for (link=(PetscSFPack_Neighbor)dat->avail; link; link=next) { 155 next = (PetscSFPack_Neighbor)link->next; 156 if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 157 ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 158 ierr = PetscFree2(link->rootbuf,link->leafbuf);CHKERRQ(ierr); 159 ierr = PetscFree(link->requests);CHKERRQ(ierr); 160 ierr = PetscFree(link);CHKERRQ(ierr); 161 } 162 dat->avail = NULL; 163 ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 164 ierr = PetscSFPackDestoryOptimization(&dat->rootpackopt);CHKERRQ(ierr); 165 PetscFunctionReturn(0); 166 } 167 168 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 169 { 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = PetscSFReset_Neighbor(sf);CHKERRQ(ierr); 174 ierr = PetscFree(sf->data);CHKERRQ(ierr); 175 PetscFunctionReturn(0); 176 } 177 178 static PetscErrorCode PetscSFBcastAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 179 { 180 PetscErrorCode ierr; 181 PetscSFPack_Neighbor link; 182 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 183 const PetscInt *rootoffset,*rootloc; 184 PetscMPIInt n,ind,outd; 185 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 186 MPI_Comm distcomm; 187 MPI_Request *req; 188 void *sbuf,*rbuf; 189 190 PetscFunctionBegin; 191 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 192 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 193 ierr = PetscSFPackGet_Neighbor(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 194 195 /* Pack root data */ 196 for (i=0; i<nrootranks; i++) { 197 void *packstart = link->root[i]; 198 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 199 (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,dat->rootpackopt,rootdata,packstart); 200 } 201 202 /* Do neighborhood alltoallv for non-distinguished ranks */ 203 req = &link->requests[PETSCSF_ROOT2LEAF_BCAST]; 204 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF_BCAST,&distcomm);CHKERRQ(ierr); 205 outd = nrootranks - ndrootranks; 206 ind = nleafranks - ndleafranks; 207 sbuf = link->root ? link->root[ndrootranks] : NULL; 208 rbuf = link->leaf ? link->leaf[ndleafranks] : NULL; 209 ierr = MPI_Start_ineighbor_alltoallv(outd,ind,sbuf,dat->rootcounts,dat->rootdispls,unit,rbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);CHKERRQ(ierr); 210 PetscFunctionReturn(0); 211 } 212 213 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 214 { 215 PetscErrorCode ierr; 216 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 217 const PetscInt *leafoffset,*leafloc; 218 PetscMPIInt n,ind,outd; 219 PetscSFPack_Neighbor link; 220 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 221 MPI_Comm distcomm; 222 MPI_Request *req; 223 void *sbuf,*rbuf; 224 225 PetscFunctionBegin; 226 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr); 227 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 228 ierr = PetscSFPackGet_Neighbor(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 229 230 /* Pack leaf data */ 231 for (i=0; i<nleafranks; i++) { 232 void *packstart = link->leaf[i]; 233 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 234 (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart); 235 } 236 237 /* Do neighborhood alltoallv for non-distinguished ranks */ 238 req = &link->requests[PETSCSF_LEAF2ROOT_REDUCE]; 239 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT_REDUCE,&distcomm);CHKERRQ(ierr); 240 ind = nrootranks - ndrootranks; 241 outd = nleafranks - ndleafranks; 242 sbuf = link->leaf ? link->leaf[ndleafranks] : NULL; 243 rbuf = link->root ? link->root[ndrootranks] : NULL; 244 ierr = MPI_Start_ineighbor_alltoallv(outd,ind,sbuf,dat->leafcounts,dat->leafdispls,unit,rbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 249 { 250 PetscErrorCode ierr; 251 PetscErrorCode (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 252 PetscSFPack_Basic link; 253 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 254 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 255 const PetscMPIInt *rootranks,*leafranks; 256 MPI_Comm comm; 257 PetscMPIInt n,ind,outd; 258 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 259 void *sbuf,*rbuf; 260 261 PetscFunctionBegin; 262 ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 263 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 264 265 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 266 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 267 268 /* Process local fetch-and-op */ 269 ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 270 for (i=0; i<nrootranks; i++) { 271 void *packstart = link->root[i]; 272 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 273 (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,dat->rootpackopt,rootdata,packstart); 274 } 275 276 /* Bcast the updated root buffer back to leaves */ 277 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF_BCAST,&comm);CHKERRQ(ierr); 278 outd = nrootranks - ndrootranks; 279 ind = nleafranks - ndleafranks; 280 sbuf = link->root ? link->root[ndrootranks] : NULL; 281 rbuf = link->leaf ? link->leaf[ndleafranks] : NULL; 282 ierr = MPI_Start_neighbor_alltoallv(outd,ind,sbuf,dat->rootcounts,dat->rootdispls,unit,rbuf,dat->leafcounts,dat->leafdispls,unit,comm);CHKERRQ(ierr); 283 284 for (i=0; i<nleafranks; i++) { 285 const void *packstart = link->leaf[i]; 286 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 287 (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart); 288 } 289 290 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 291 PetscFunctionReturn(0); 292 } 293 294 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 295 { 296 PetscErrorCode ierr; 297 PetscSF_Neighbor *dat; 298 299 PetscFunctionBegin; 300 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 301 sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 302 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 303 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 304 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 305 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 306 sf->ops->View = PetscSFView_Basic; 307 308 sf->ops->SetUp = PetscSFSetUp_Neighbor; 309 sf->ops->Reset = PetscSFReset_Neighbor; 310 sf->ops->Destroy = PetscSFDestroy_Neighbor; 311 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Neighbor; 312 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 313 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 314 315 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 316 sf->data = (void*)dat; 317 PetscFunctionReturn(0); 318 } 319 #endif 320