xref: /petsc/src/vec/is/sf/impls/basic/neighbor/sfneighbor.c (revision 58c0e5077dcf40d6a880c19c87f1075ae1d22c8e)
1 #include <../src/vec/is/sf/impls/basic/sfpack.h>
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 #if defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES)
5 
6 /* SF Neighbor completely relies on the two sided info built by SF Basic. Therefore we build Neighbor as a subclass of Basic */
7 
8 typedef struct {
9   SPPACKBASICHEADER;
10   char *rootbuf; /* contiguous buffer for all root ranks */
11   char *leafbuf; /* contiguous buffer for all non-distinguished leaf ranks. Distiguished ones share root buffers. */
12 } *PetscSFPack_Neighbor;
13 
14 typedef struct {
15   SFBASICHEADER;
16   MPI_Comm      comms[2];       /* Communicators with distributed topology in both directions */
17   PetscBool     initialized[2]; /* Are the two communicators initialized? */
18   PetscMPIInt   *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */
19 } PetscSF_Neighbor;
20 
21 /*===================================================================================*/
22 /*              Internal utility routines                                            */
23 /*===================================================================================*/
24 
25 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
26 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm)
27 {
28   PetscErrorCode    ierr;
29   PetscSF_Neighbor  *dat = (PetscSF_Neighbor*)sf->data;
30   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;
31   const PetscMPIInt *rootranks,*leafranks;
32   MPI_Comm          comm;
33 
34   PetscFunctionBegin;
35   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL);CHKERRQ(ierr);      /* Which ranks will access my roots (I am a destination) */
36   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL);CHKERRQ(ierr); /* My leaves will access whose roots (I am a source) */
37 
38   if (!dat->initialized[direction]) {
39     const PetscMPIInt indegree  = nrootranks-ndrootranks,*sources      = rootranks+ndrootranks;
40     const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks;
41     MPI_Comm          *mycomm   = &dat->comms[direction];
42     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
43     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
44       ierr = MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootcounts/*src weights*/,outdegree,destinations,dat->leafcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRQ(ierr);
45     } else { /* PETSCSF_ROOT2LEAF_BCAST, reverse src & dest */
46       ierr = MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafcounts/*src weights*/,indegree,sources,dat->rootcounts/*dest weights*/,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRQ(ierr);
47     }
48     dat->initialized[direction] = PETSC_TRUE;
49   }
50   *distcomm = dat->comms[direction];
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode PetscSFPackGet_Neighbor(PetscSF sf,MPI_Datatype unit,const void *rkey,const void *lkey,PetscSFDirection direction,PetscSFPack_Neighbor *mylink)
55 {
56   PetscErrorCode       ierr;
57   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
58   PetscInt             i,nrootranks,ndrootranks,nleafranks,ndleafranks;
59   const PetscInt       *rootoffset,*leafoffset;
60   PetscSFPack          *p;
61   PetscSFPack_Neighbor link;
62 
63   PetscFunctionBegin;
64   ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rkey,lkey);CHKERRQ(ierr);
65   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
66   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
67 
68   /* Look for types in cache */
69   for (p=&dat->avail; (link=(PetscSFPack_Neighbor)*p); p=&link->next) {
70     PetscBool match;
71     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
72     if (match) {
73       *p = link->next; /* Remove from available list */
74       goto found;
75     }
76   }
77 
78   ierr = PetscNew(&link);CHKERRQ(ierr);
79   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
80   ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr);
81   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
82   link->half = 1;
83   ierr       = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr);
84   ierr       = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* Actually, tag is not need for neighborhood collectives */
85 
86   /* Allocate root and leaf buffers */
87   ierr = PetscMalloc2(rootoffset[nrootranks]*link->unitbytes,&link->rootbuf,(leafoffset[nleafranks]-leafoffset[ndleafranks])*link->unitbytes,&link->leafbuf);CHKERRQ(ierr);
88   for (i=0; i<nrootranks; i++) link->root[i] = link->rootbuf + rootoffset[i]*link->unitbytes;
89   for (i=0; i<nleafranks; i++) {
90     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
91       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
92       link->leaf[i] = link->root[0];
93       continue;
94     }
95     link->leaf[i] = link->leafbuf + (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
96   }
97 
98 found:
99   link->rkey = rkey;
100   link->lkey = lkey;
101   link->next = dat->inuse;
102   dat->inuse = (PetscSFPack)link;
103   *mylink    = link;
104   PetscFunctionReturn(0);
105 }
106 
107 /*===================================================================================*/
108 /*              Implementations of SF public APIs                                    */
109 /*===================================================================================*/
110 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
111 {
112   PetscErrorCode   ierr;
113   PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
114   PetscInt         i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
115   const PetscInt   *rootoffset,*leafoffset;
116   PetscMPIInt      m,n;
117 
118   PetscFunctionBegin;
119   ierr = PetscSFSetUp_Basic(sf);CHKERRQ(ierr);
120   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
121   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
122 
123   /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
124   ierr = PetscMalloc4(nrootranks-ndrootranks,&dat->rootdispls,nrootranks-ndrootranks,&dat->rootcounts,nleafranks-ndleafranks,&dat->leafdispls,nleafranks-ndleafranks,&dat->leafcounts);CHKERRQ(ierr);
125   for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
126     ierr = PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m);CHKERRQ(ierr); dat->rootdispls[j] = m;
127     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],        &n);CHKERRQ(ierr); dat->rootcounts[j] = n;
128   }
129 
130   for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
131     ierr = PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m);CHKERRQ(ierr); dat->leafdispls[j] = m;
132     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],        &n);CHKERRQ(ierr); dat->leafcounts[j] = n;
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
138 {
139   PetscErrorCode       ierr;
140   PetscInt             i;
141   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
142   PetscSFPack_Neighbor link,next;
143 
144   PetscFunctionBegin;
145   if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
146   ierr = PetscFree4(dat->rootdispls,dat->rootcounts,dat->leafdispls,dat->leafcounts);CHKERRQ(ierr);
147   for (i=0; i<2; i++) {
148     if (dat->initialized[i]) {
149       ierr = MPI_Comm_free(&dat->comms[i]);CHKERRQ(ierr);
150       dat->initialized[i] = PETSC_FALSE;
151     }
152   }
153 
154   ierr = PetscFree2(dat->iranks,dat->ioffset);CHKERRQ(ierr);
155   ierr = PetscFree(dat->irootloc);CHKERRQ(ierr);
156   for (link=(PetscSFPack_Neighbor)dat->avail; link; link=next) {
157     next = (PetscSFPack_Neighbor)link->next;
158     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
159     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
160     ierr = PetscFree2(link->rootbuf,link->leafbuf);CHKERRQ(ierr);
161     ierr = PetscFree(link->requests);CHKERRQ(ierr);
162     ierr = PetscFree(link);CHKERRQ(ierr);
163   }
164   dat->avail = NULL;
165   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
166   ierr = PetscSFPackDestoryOptimization(&dat->rootpackopt);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
171 {
172   PetscErrorCode ierr;
173 
174   PetscFunctionBegin;
175   ierr = PetscSFReset_Neighbor(sf);CHKERRQ(ierr);
176   ierr = PetscFree(sf->data);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode PetscSFBcastAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
181 {
182   PetscErrorCode       ierr;
183   PetscSFPack_Neighbor link;
184   PetscInt             i,nrootranks,ndrootranks,nleafranks,ndleafranks;
185   const PetscInt       *rootoffset,*rootloc;
186   PetscMPIInt          n,ind,outd;
187   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
188   MPI_Comm             distcomm;
189   MPI_Request          *req;
190   void                 *sbuf,*rbuf;
191 
192   PetscFunctionBegin;
193   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
194   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
195   ierr = PetscSFPackGet_Neighbor(sf,unit,rootdata,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
196 
197   /* Pack root data */
198   for (i=0; i<nrootranks; i++) {
199     void *packstart = link->root[i];
200     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
201     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,dat->rootpackopt,rootdata,packstart);
202   }
203 
204   /* Do neighborhood alltoallv for non-distinguished ranks */
205   req  = &link->requests[PETSCSF_ROOT2LEAF_BCAST];
206   ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF_BCAST,&distcomm);CHKERRQ(ierr);
207   outd = nrootranks - ndrootranks;
208   ind  = nleafranks - ndleafranks;
209   sbuf = link->root ? link->root[ndrootranks] : NULL;
210   rbuf = link->leaf ? link->leaf[ndleafranks] : NULL;
211   ierr = MPI_Start_ineighbor_alltoallv(outd,ind,sbuf,dat->rootcounts,dat->rootdispls,unit,rbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);CHKERRQ(ierr);
212   PetscFunctionReturn(0);
213 }
214 
215 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
216 {
217   PetscErrorCode       ierr;
218   PetscInt             i,nrootranks,ndrootranks,nleafranks,ndleafranks;
219   const PetscInt       *leafoffset,*leafloc;
220   PetscMPIInt          n,ind,outd;
221   PetscSFPack_Neighbor link;
222   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
223   MPI_Comm             distcomm;
224   MPI_Request          *req;
225   void                 *sbuf,*rbuf;
226 
227   PetscFunctionBegin;
228   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr);
229   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
230   ierr = PetscSFPackGet_Neighbor(sf,unit,rootdata,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
231 
232   /* Pack leaf data */
233   for (i=0; i<nleafranks; i++) {
234     void *packstart = link->leaf[i];
235     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
236     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
237   }
238 
239   /* Do neighborhood alltoallv for non-distinguished ranks */
240   req  = &link->requests[PETSCSF_LEAF2ROOT_REDUCE];
241   ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT_REDUCE,&distcomm);CHKERRQ(ierr);
242   ind  = nrootranks - ndrootranks;
243   outd = nleafranks - ndleafranks;
244   sbuf = link->leaf ? link->leaf[ndleafranks] : NULL;
245   rbuf = link->root ? link->root[ndrootranks] : NULL;
246   ierr = MPI_Start_ineighbor_alltoallv(outd,ind,sbuf,dat->leafcounts,dat->leafdispls,unit,rbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
251 {
252   PetscErrorCode    ierr;
253   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
254   PetscSFPack_Basic link;
255   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
256   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
257   const PetscMPIInt *rootranks,*leafranks;
258   MPI_Comm          comm;
259   PetscMPIInt       n,ind,outd;
260   PetscSF_Neighbor  *dat = (PetscSF_Neighbor*)sf->data;
261   void              *sbuf,*rbuf;
262 
263   PetscFunctionBegin;
264   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
265   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
266 
267   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
268   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
269 
270   /* Process local fetch-and-op */
271   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
272   for (i=0; i<nrootranks; i++) {
273     void *packstart = link->root[i];
274     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
275     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,dat->rootpackopt,rootdata,packstart);
276   }
277 
278   /* Bcast the updated root buffer back to leaves */
279   ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF_BCAST,&comm);CHKERRQ(ierr);
280   outd = nrootranks - ndrootranks;
281   ind  = nleafranks - ndleafranks;
282   sbuf = link->root ? link->root[ndrootranks] : NULL;
283   rbuf = link->leaf ? link->leaf[ndleafranks] : NULL;
284   ierr = MPI_Start_neighbor_alltoallv(outd,ind,sbuf,dat->rootcounts,dat->rootdispls,unit,rbuf,dat->leafcounts,dat->leafdispls,unit,comm);CHKERRQ(ierr);
285 
286   for (i=0; i<nleafranks; i++) {
287     const void *packstart = link->leaf[i];
288     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
289     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
290   }
291 
292   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
293   PetscFunctionReturn(0);
294 }
295 
296 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
297 {
298   PetscErrorCode   ierr;
299   PetscSF_Neighbor *dat;
300 
301   PetscFunctionBegin;
302   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
303   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
304   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
305   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
306   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
307   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
308   sf->ops->View                 = PetscSFView_Basic;
309 
310   sf->ops->SetUp                = PetscSFSetUp_Neighbor;
311   sf->ops->Reset                = PetscSFReset_Neighbor;
312   sf->ops->Destroy              = PetscSFDestroy_Neighbor;
313   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Neighbor;
314   sf->ops->ReduceBegin          = PetscSFReduceBegin_Neighbor;
315   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Neighbor;
316 
317   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
318   sf->data = (void*)dat;
319   PetscFunctionReturn(0);
320 }
321 #endif
322