1 #include <../src/vec/is/sf/impls/basic/sfpack.h> 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 #if defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) 5 6 /* SF Neighbor completely relies on the two sided info built by SF Basic. Therefore we build Neighbor as a subclass of Basic */ 7 8 typedef struct { 9 SPPACKBASICHEADER; 10 char *rootbuf; /* contiguous buffer for all root ranks */ 11 char *leafbuf; /* contiguous buffer for all non-distinguished leaf ranks. Distiguished ones share root buffers. */ 12 } *PetscSFPack_Neighbor; 13 14 typedef struct { 15 SFBASICHEADER; 16 MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */ 17 PetscBool initialized[2]; /* Are the two communicators initialized? */ 18 PetscMPIInt *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */ 19 } PetscSF_Neighbor; 20 21 /*===================================================================================*/ 22 /* Internal utility routines */ 23 /*===================================================================================*/ 24 25 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */ 26 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm) 27 { 28 PetscErrorCode ierr; 29 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 30 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 31 const PetscMPIInt *rootranks,*leafranks; 32 MPI_Comm comm; 33 34 PetscFunctionBegin; 35 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL);CHKERRQ(ierr); /* Which ranks will access my roots (I am a destination) */ 36 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL);CHKERRQ(ierr); /* My leaves will access whose roots (I am a source) */ 37 38 if (!dat->initialized[direction]) { 39 const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks; 40 const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks; 41 MPI_Comm *mycomm = &dat->comms[direction]; 42 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 43 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 44 ierr = MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootcounts/*src weights*/,outdegree,destinations,dat->leafcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRQ(ierr); 45 } else { /* PETSCSF_ROOT2LEAF_BCAST, reverse src & dest */ 46 ierr = MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafcounts/*src weights*/,indegree,sources,dat->rootcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRQ(ierr); 47 } 48 dat->initialized[direction] = PETSC_TRUE; 49 } 50 *distcomm = dat->comms[direction]; 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PetscSFPackGet_Neighbor(PetscSF sf,MPI_Datatype unit,const void *rkey,const void *lkey,PetscSFDirection direction,PetscSFPack_Neighbor *mylink) 55 { 56 PetscErrorCode ierr; 57 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 58 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 59 const PetscInt *rootoffset,*leafoffset; 60 PetscSFPack *p; 61 PetscSFPack_Neighbor link; 62 63 PetscFunctionBegin; 64 ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rkey,lkey);CHKERRQ(ierr); 65 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 66 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 67 68 /* Look for types in cache */ 69 for (p=&dat->avail; (link=(PetscSFPack_Neighbor)*p); p=&link->next) { 70 PetscBool match; 71 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 72 if (match) { 73 *p = link->next; /* Remove from available list */ 74 goto found; 75 } 76 } 77 78 ierr = PetscNew(&link);CHKERRQ(ierr); 79 ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 80 ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 81 /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */ 82 link->half = 1; 83 ierr = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr); 84 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* Actually, tag is not need for neighborhood collectives */ 85 86 /* Allocate root and leaf buffers */ 87 ierr = PetscMalloc2(rootoffset[nrootranks]*link->unitbytes,&link->rootbuf,(leafoffset[nleafranks]-leafoffset[ndleafranks])*link->unitbytes,&link->leafbuf);CHKERRQ(ierr); 88 for (i=0; i<nrootranks; i++) link->root[i] = link->rootbuf + rootoffset[i]*link->unitbytes; 89 for (i=0; i<nleafranks; i++) { 90 if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 91 if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 92 link->leaf[i] = link->root[0]; 93 continue; 94 } 95 link->leaf[i] = link->leafbuf + (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 96 } 97 98 found: 99 link->rkey = rkey; 100 link->lkey = lkey; 101 link->next = dat->inuse; 102 dat->inuse = (PetscSFPack)link; 103 *mylink = link; 104 PetscFunctionReturn(0); 105 } 106 107 /*===================================================================================*/ 108 /* Implementations of SF public APIs */ 109 /*===================================================================================*/ 110 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf) 111 { 112 PetscErrorCode ierr; 113 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 114 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 115 const PetscInt *rootoffset,*leafoffset; 116 PetscMPIInt m,n; 117 118 PetscFunctionBegin; 119 ierr = PetscSFSetUp_Basic(sf);CHKERRQ(ierr); 120 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 121 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 122 123 /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */ 124 ierr = PetscMalloc4(nrootranks-ndrootranks,&dat->rootdispls,nrootranks-ndrootranks,&dat->rootcounts,nleafranks-ndleafranks,&dat->leafdispls,nleafranks-ndleafranks,&dat->leafcounts);CHKERRQ(ierr); 125 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 126 ierr = PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m);CHKERRQ(ierr); dat->rootdispls[j] = m; 127 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n);CHKERRQ(ierr); dat->rootcounts[j] = n; 128 } 129 130 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 131 ierr = PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m);CHKERRQ(ierr); dat->leafdispls[j] = m; 132 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n);CHKERRQ(ierr); dat->leafcounts[j] = n; 133 } 134 PetscFunctionReturn(0); 135 } 136 137 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf) 138 { 139 PetscErrorCode ierr; 140 PetscInt i; 141 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 142 PetscSFPack_Neighbor link,next; 143 144 PetscFunctionBegin; 145 if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 146 ierr = PetscFree4(dat->rootdispls,dat->rootcounts,dat->leafdispls,dat->leafcounts);CHKERRQ(ierr); 147 for (i=0; i<2; i++) { 148 if (dat->initialized[i]) { 149 ierr = MPI_Comm_free(&dat->comms[i]);CHKERRQ(ierr); 150 dat->initialized[i] = PETSC_FALSE; 151 } 152 } 153 154 ierr = PetscFree2(dat->iranks,dat->ioffset);CHKERRQ(ierr); 155 ierr = PetscFree(dat->irootloc);CHKERRQ(ierr); 156 for (link=(PetscSFPack_Neighbor)dat->avail; link; link=next) { 157 next = (PetscSFPack_Neighbor)link->next; 158 if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 159 ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 160 ierr = PetscFree2(link->rootbuf,link->leafbuf);CHKERRQ(ierr); 161 ierr = PetscFree(link->requests);CHKERRQ(ierr); 162 ierr = PetscFree(link);CHKERRQ(ierr); 163 } 164 dat->avail = NULL; 165 ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 166 ierr = PetscSFPackDestoryOptimization(&dat->rootpackopt);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf) 171 { 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 ierr = PetscSFReset_Neighbor(sf);CHKERRQ(ierr); 176 ierr = PetscFree(sf->data);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode PetscSFBcastAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 181 { 182 PetscErrorCode ierr; 183 PetscSFPack_Neighbor link; 184 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 185 const PetscInt *rootoffset,*rootloc; 186 PetscMPIInt n,ind,outd; 187 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 188 MPI_Comm distcomm; 189 MPI_Request *req; 190 void *sbuf,*rbuf; 191 192 PetscFunctionBegin; 193 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 194 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 195 ierr = PetscSFPackGet_Neighbor(sf,unit,rootdata,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 196 197 /* Pack root data */ 198 for (i=0; i<nrootranks; i++) { 199 void *packstart = link->root[i]; 200 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 201 (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,dat->rootpackopt,rootdata,packstart); 202 } 203 204 /* Do neighborhood alltoallv for non-distinguished ranks */ 205 req = &link->requests[PETSCSF_ROOT2LEAF_BCAST]; 206 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF_BCAST,&distcomm);CHKERRQ(ierr); 207 outd = nrootranks - ndrootranks; 208 ind = nleafranks - ndleafranks; 209 sbuf = link->root ? link->root[ndrootranks] : NULL; 210 rbuf = link->leaf ? link->leaf[ndleafranks] : NULL; 211 ierr = MPI_Start_ineighbor_alltoallv(outd,ind,sbuf,dat->rootcounts,dat->rootdispls,unit,rbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);CHKERRQ(ierr); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 216 { 217 PetscErrorCode ierr; 218 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 219 const PetscInt *leafoffset,*leafloc; 220 PetscMPIInt n,ind,outd; 221 PetscSFPack_Neighbor link; 222 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 223 MPI_Comm distcomm; 224 MPI_Request *req; 225 void *sbuf,*rbuf; 226 227 PetscFunctionBegin; 228 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr); 229 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 230 ierr = PetscSFPackGet_Neighbor(sf,unit,rootdata,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 231 232 /* Pack leaf data */ 233 for (i=0; i<nleafranks; i++) { 234 void *packstart = link->leaf[i]; 235 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 236 (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart); 237 } 238 239 /* Do neighborhood alltoallv for non-distinguished ranks */ 240 req = &link->requests[PETSCSF_LEAF2ROOT_REDUCE]; 241 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT_REDUCE,&distcomm);CHKERRQ(ierr); 242 ind = nrootranks - ndrootranks; 243 outd = nleafranks - ndleafranks; 244 sbuf = link->leaf ? link->leaf[ndleafranks] : NULL; 245 rbuf = link->root ? link->root[ndrootranks] : NULL; 246 ierr = MPI_Start_ineighbor_alltoallv(outd,ind,sbuf,dat->leafcounts,dat->leafdispls,unit,rbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 251 { 252 PetscErrorCode ierr; 253 PetscErrorCode (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 254 PetscSFPack_Basic link; 255 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 256 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 257 const PetscMPIInt *rootranks,*leafranks; 258 MPI_Comm comm; 259 PetscMPIInt n,ind,outd; 260 PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data; 261 void *sbuf,*rbuf; 262 263 PetscFunctionBegin; 264 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 265 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 266 267 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 268 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 269 270 /* Process local fetch-and-op */ 271 ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 272 for (i=0; i<nrootranks; i++) { 273 void *packstart = link->root[i]; 274 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 275 (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,dat->rootpackopt,rootdata,packstart); 276 } 277 278 /* Bcast the updated root buffer back to leaves */ 279 ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF_BCAST,&comm);CHKERRQ(ierr); 280 outd = nrootranks - ndrootranks; 281 ind = nleafranks - ndleafranks; 282 sbuf = link->root ? link->root[ndrootranks] : NULL; 283 rbuf = link->leaf ? link->leaf[ndleafranks] : NULL; 284 ierr = MPI_Start_neighbor_alltoallv(outd,ind,sbuf,dat->rootcounts,dat->rootdispls,unit,rbuf,dat->leafcounts,dat->leafdispls,unit,comm);CHKERRQ(ierr); 285 286 for (i=0; i<nleafranks; i++) { 287 const void *packstart = link->leaf[i]; 288 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 289 (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart); 290 } 291 292 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf) 297 { 298 PetscErrorCode ierr; 299 PetscSF_Neighbor *dat; 300 301 PetscFunctionBegin; 302 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 303 sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 304 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 305 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 306 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 307 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 308 sf->ops->View = PetscSFView_Basic; 309 310 sf->ops->SetUp = PetscSFSetUp_Neighbor; 311 sf->ops->Reset = PetscSFReset_Neighbor; 312 sf->ops->Destroy = PetscSFDestroy_Neighbor; 313 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Neighbor; 314 sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor; 315 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor; 316 317 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 318 sf->data = (void*)dat; 319 PetscFunctionReturn(0); 320 } 321 #endif 322