xref: /petsc/src/vec/is/sf/impls/basic/neighbor/sfneighbor.c (revision 8ebe3e4e9e00d86ece2e9fcd0cc84910b0ad437c)
1 #include <../src/vec/is/sf/impls/basic/sfpack.h>
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 /* A convenience temporary type */
5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
6   typedef PetscInt     PetscSFCount;
7 #else
8   typedef PetscMPIInt  PetscSFCount;
9 #endif
10 
11 typedef struct {
12   SFBASICHEADER;
13   MPI_Comm      comms[2];       /* Communicators with distributed topology in both directions */
14   PetscBool     initialized[2]; /* Are the two communicators initialized? */
15   PetscSFCount  *rootdispls,*rootcounts,*leafdispls,*leafcounts; /* displs/counts for non-distinguished ranks */
16   PetscMPIInt   *rootweights,*leafweights;
17   PetscInt      rootdegree,leafdegree;
18 } PetscSF_Neighbor;
19 
20 /*===================================================================================*/
21 /*              Internal utility routines                                            */
22 /*===================================================================================*/
23 
24 PETSC_STATIC_INLINE PetscErrorCode PetscLogMPIMessages(PetscInt nsend,PetscSFCount *sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount* recvcnts,MPI_Datatype recvtype)
25 {
26   PetscFunctionBegin;
27 #if defined(PETSC_USE_LOG)
28   petsc_isend_ct += (PetscLogDouble)nsend;
29   petsc_irecv_ct += (PetscLogDouble)nrecv;
30 
31   if (sendtype != MPI_DATATYPE_NULL) {
32     PetscErrorCode    ierr;
33     PetscMPIInt       i,typesize;
34     ierr = MPI_Type_size(sendtype,&typesize);CHKERRMPI(ierr);
35     for (i=0; i<nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i]*typesize);
36   }
37 
38   if (recvtype != MPI_DATATYPE_NULL) {
39     PetscErrorCode    ierr;
40     PetscMPIInt       i,typesize;
41     ierr = MPI_Type_size(recvtype,&typesize);CHKERRMPI(ierr);
42     for (i=0; i<nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i]*typesize);
43   }
44 #endif
45   PetscFunctionReturn(0);
46 }
47 
48 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
49 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm)
50 {
51   PetscErrorCode    ierr;
52   PetscSF_Neighbor  *dat = (PetscSF_Neighbor*)sf->data;
53   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;
54   const PetscMPIInt *rootranks,*leafranks;
55   MPI_Comm          comm;
56 
57   PetscFunctionBegin;
58   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL);CHKERRQ(ierr);      /* Which ranks will access my roots (I am a destination) */
59   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL);CHKERRQ(ierr); /* My leaves will access whose roots (I am a source) */
60 
61   if (!dat->initialized[direction]) {
62     const PetscMPIInt indegree  = nrootranks-ndrootranks,*sources      = rootranks+ndrootranks;
63     const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks;
64     MPI_Comm          *mycomm   = &dat->comms[direction];
65     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
66     if (direction == PETSCSF_LEAF2ROOT) {
67       ierr = MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootweights,outdegree,destinations,dat->leafweights,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRMPI(ierr);
68     } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */
69       ierr = MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafweights,indegree,sources,dat->rootweights,MPI_INFO_NULL,1/*reorder*/,mycomm);CHKERRMPI(ierr);
70     }
71     dat->initialized[direction] = PETSC_TRUE;
72   }
73   *distcomm = dat->comms[direction];
74   PetscFunctionReturn(0);
75 }
76 
77 /*===================================================================================*/
78 /*              Implementations of SF public APIs                                    */
79 /*===================================================================================*/
80 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
81 {
82   PetscErrorCode   ierr;
83   PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
84   PetscInt         i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
85   const PetscInt   *rootoffset,*leafoffset;
86   PetscMPIInt      m,n;
87 
88   PetscFunctionBegin;
89   /* SFNeighbor inherits from Basic */
90   ierr = PetscSFSetUp_Basic(sf);CHKERRQ(ierr);
91   /* SFNeighbor specific */
92   sf->persistent  = PETSC_FALSE;
93   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
94   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
95   dat->rootdegree = m = (PetscMPIInt)(nrootranks-ndrootranks);
96   dat->leafdegree = n = (PetscMPIInt)(nleafranks-ndleafranks);
97   sf->nleafreqs   = 0;
98   dat->nrootreqs  = 1;
99 
100   /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
101   ierr = PetscMalloc6(m,&dat->rootdispls,m,&dat->rootcounts,m,&dat->rootweights,n,&dat->leafdispls,n,&dat->leafcounts,n,&dat->leafweights);CHKERRQ(ierr);
102 
103  #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
104   for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
105     dat->rootdispls[j]  = rootoffset[i]-rootoffset[ndrootranks];
106     dat->rootcounts[j]  = rootoffset[i+1]-rootoffset[i];
107     dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); /* Scale to range of PetscMPIInt */
108   }
109 
110   for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
111     dat->leafdispls[j]  = leafoffset[i]-leafoffset[ndleafranks];
112     dat->leafcounts[j]  = leafoffset[i+1]-leafoffset[i];
113     dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647);
114   }
115  #else
116   for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
117     ierr = PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m);CHKERRQ(ierr); dat->rootdispls[j] = m;
118     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],        &n);CHKERRQ(ierr); dat->rootcounts[j] = n;
119     dat->rootweights[j] = n;
120   }
121 
122   for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
123     ierr = PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m);CHKERRQ(ierr); dat->leafdispls[j] = m;
124     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],        &n);CHKERRQ(ierr); dat->leafcounts[j] = n;
125     dat->leafweights[j] = n;
126   }
127  #endif
128   PetscFunctionReturn(0);
129 }
130 
131 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
132 {
133   PetscErrorCode       ierr;
134   PetscInt             i;
135   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
136 
137   PetscFunctionBegin;
138   if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
139   ierr = PetscFree6(dat->rootdispls,dat->rootcounts,dat->rootweights,dat->leafdispls,dat->leafcounts,dat->leafweights);CHKERRQ(ierr);
140   for (i=0; i<2; i++) {
141     if (dat->initialized[i]) {
142       ierr = MPI_Comm_free(&dat->comms[i]);CHKERRMPI(ierr);
143       dat->initialized[i] = PETSC_FALSE;
144     }
145   }
146   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); /* Common part */
147   PetscFunctionReturn(0);
148 }
149 
150 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
151 {
152   PetscErrorCode ierr;
153 
154   PetscFunctionBegin;
155   ierr = PetscSFReset_Neighbor(sf);CHKERRQ(ierr);
156   ierr = PetscFree(sf->data);CHKERRQ(ierr);
157   PetscFunctionReturn(0);
158 }
159 
160 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
161 {
162   PetscErrorCode       ierr;
163   PetscSFLink          link;
164   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
165   MPI_Comm             distcomm = MPI_COMM_NULL;
166   void                 *rootbuf = NULL,*leafbuf = NULL;
167   MPI_Request          *req;
168 
169   PetscFunctionBegin;
170   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
171   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
172   /* Do neighborhood alltoallv for remote ranks */
173   ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr);
174   ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&distcomm);CHKERRQ(ierr);
175   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr);
176   ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
177   /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */
178   if (dat->rootdegree || dat->leafdegree) {
179     ierr = MPIU_Ineighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);CHKERRMPI(ierr);
180   }
181   ierr = PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);CHKERRQ(ierr);
182   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
187 {
188   PetscErrorCode       ierr;
189   PetscSFLink          link;
190   PetscSF_Neighbor     *dat = (PetscSF_Neighbor*)sf->data;
191   MPI_Comm             distcomm = MPI_COMM_NULL;
192   void                 *rootbuf = NULL,*leafbuf = NULL;
193   MPI_Request          *req = NULL;
194 
195   PetscFunctionBegin;
196   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
197   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
198   /* Do neighborhood alltoallv for remote ranks */
199   ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr);
200   ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2ROOT,&distcomm);CHKERRQ(ierr);
201   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr);
202   ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
203   if (dat->rootdegree || dat->leafdegree) {
204     ierr = MPIU_Ineighbor_alltoallv(leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);CHKERRMPI(ierr);
205   }
206   ierr = PetscLogMPIMessages(dat->leafdegree,dat->leafcounts,unit,dat->rootdegree,dat->rootcounts,unit);CHKERRQ(ierr);
207   *out = link;
208   PetscFunctionReturn(0);
209 }
210 
211 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
212 {
213   PetscErrorCode       ierr;
214   PetscSFLink          link = NULL;
215 
216   PetscFunctionBegin;
217   ierr = PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
218   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
223 {
224   PetscErrorCode       ierr;
225   PetscSFLink          link = NULL;
226 
227   PetscFunctionBegin;
228   ierr = PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
229   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
234 {
235   PetscErrorCode    ierr;
236   PetscSFLink       link = NULL;
237   MPI_Comm          comm = MPI_COMM_NULL;
238   PetscSF_Neighbor  *dat = (PetscSF_Neighbor*)sf->data;
239   void              *rootbuf = NULL,*leafbuf = NULL;
240 
241   PetscFunctionBegin;
242   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
243   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
244   /* Process remote fetch-and-op */
245   ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr);
246   /* Bcast the updated rootbuf back to leaves */
247   ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr);
248   ierr = PetscSFGetDistComm_Neighbor(sf,PETSCSF_ROOT2LEAF,&comm);CHKERRQ(ierr);
249   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,NULL,NULL);CHKERRQ(ierr);
250   ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
251   if (dat->rootdegree || dat->leafdegree) {
252     ierr = MPIU_Neighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm);CHKERRMPI(ierr);
253   }
254   ierr = PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);CHKERRQ(ierr);
255   ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */);CHKERRQ(ierr);
256   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
257   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
262 {
263   PetscErrorCode   ierr;
264   PetscSF_Neighbor *dat;
265 
266   PetscFunctionBegin;
267   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
268   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
269   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
270   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
271   sf->ops->View                 = PetscSFView_Basic;
272 
273   sf->ops->SetUp                = PetscSFSetUp_Neighbor;
274   sf->ops->Reset                = PetscSFReset_Neighbor;
275   sf->ops->Destroy              = PetscSFDestroy_Neighbor;
276   sf->ops->BcastBegin           = PetscSFBcastBegin_Neighbor;
277   sf->ops->ReduceBegin          = PetscSFReduceBegin_Neighbor;
278   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Neighbor;
279   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Neighbor;
280 
281   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
282   sf->data = (void*)dat;
283   PetscFunctionReturn(0);
284 }
285