xref: /petsc/src/vec/is/sf/impls/basic/neighbor/sfneighbor.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 #include <../src/vec/is/sf/impls/basic/sfpack.h>
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 /* Convenience local types */
5 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
6 typedef MPI_Count PetscSFCount;
7 typedef MPI_Aint  PetscSFAint;
8 #else
9 typedef PetscMPIInt PetscSFCount;
10 typedef PetscMPIInt PetscSFAint;
11 #endif
12 
13 typedef struct {
14   SFBASICHEADER;
15   MPI_Comm      comms[2];                /* Communicators with distributed topology in both directions */
16   PetscBool     initialized[2];          /* Are the two communicators initialized? */
17   PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */
18   PetscSFAint  *rootdispls, *leafdispls; /* displs for non-distinguished ranks */
19   PetscMPIInt  *rootweights, *leafweights;
20   PetscInt      rootdegree, leafdegree;
21 } PetscSF_Neighbor;
22 
23 /*===================================================================================*/
24 /*              Internal utility routines                                            */
25 /*===================================================================================*/
26 
27 static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype)
28 {
29   PetscFunctionBegin;
30 #if defined(PETSC_USE_LOG)
31   petsc_isend_ct += (PetscLogDouble)nsend;
32   petsc_irecv_ct += (PetscLogDouble)nrecv;
33 
34   if (sendtype != MPI_DATATYPE_NULL) {
35     PetscMPIInt i, typesize;
36     PetscCallMPI(MPI_Type_size(sendtype, &typesize));
37     for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize);
38   }
39 
40   if (recvtype != MPI_DATATYPE_NULL) {
41     PetscMPIInt i, typesize;
42     PetscCallMPI(MPI_Type_size(recvtype, &typesize));
43     for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize);
44   }
45 #endif
46   PetscFunctionReturn(0);
47 }
48 
49 /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
50 static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm)
51 {
52   PetscSF_Neighbor  *dat = (PetscSF_Neighbor *)sf->data;
53   PetscInt           nrootranks, ndrootranks, nleafranks, ndleafranks;
54   const PetscMPIInt *rootranks, *leafranks;
55   MPI_Comm           comm;
56 
57   PetscFunctionBegin;
58   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL));       /* Which ranks will access my roots (I am a destination) */
59   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL)); /* My leaves will access whose roots (I am a source) */
60 
61   if (!dat->initialized[direction]) {
62     const PetscMPIInt indegree = nrootranks - ndrootranks, *sources = rootranks + ndrootranks;
63     const PetscMPIInt outdegree = nleafranks - ndleafranks, *destinations = leafranks + ndleafranks;
64     MPI_Comm         *mycomm = &dat->comms[direction];
65     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
66     if (direction == PETSCSF_LEAF2ROOT) {
67       PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
68     } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */
69       PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
70     }
71     dat->initialized[direction] = PETSC_TRUE;
72   }
73   *distcomm = dat->comms[direction];
74   PetscFunctionReturn(0);
75 }
76 
77 /*===================================================================================*/
78 /*              Implementations of SF public APIs                                    */
79 /*===================================================================================*/
80 static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
81 {
82   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
83   PetscInt          i, j, nrootranks, ndrootranks, nleafranks, ndleafranks;
84   const PetscInt   *rootoffset, *leafoffset;
85   PetscMPIInt       m, n;
86 
87   PetscFunctionBegin;
88   /* SFNeighbor inherits from Basic */
89   PetscCall(PetscSFSetUp_Basic(sf));
90   /* SFNeighbor specific */
91   sf->persistent = PETSC_FALSE;
92   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
93   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
94   dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks);
95   dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks);
96   sf->nleafreqs       = 0;
97   dat->nrootreqs      = 1;
98 
99   /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
100   PetscCall(PetscMalloc6(m, &dat->rootdispls, m, &dat->rootcounts, m, &dat->rootweights, n, &dat->leafdispls, n, &dat->leafcounts, n, &dat->leafweights));
101 
102 #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
103   for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
104     dat->rootdispls[j]  = rootoffset[i] - rootoffset[ndrootranks];
105     dat->rootcounts[j]  = rootoffset[i + 1] - rootoffset[i];
106     dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); /* Scale to range of PetscMPIInt */
107   }
108 
109   for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
110     dat->leafdispls[j]  = leafoffset[i] - leafoffset[ndleafranks];
111     dat->leafcounts[j]  = leafoffset[i + 1] - leafoffset[i];
112     dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647);
113   }
114 #else
115   for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
116     PetscCall(PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m));
117     dat->rootdispls[j] = m;
118     PetscCall(PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n));
119     dat->rootcounts[j]  = n;
120     dat->rootweights[j] = n;
121   }
122 
123   for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
124     PetscCall(PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m));
125     dat->leafdispls[j] = m;
126     PetscCall(PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n));
127     dat->leafcounts[j]  = n;
128     dat->leafweights[j] = n;
129   }
130 #endif
131   PetscFunctionReturn(0);
132 }
133 
134 static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
135 {
136   PetscInt          i;
137   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
138 
139   PetscFunctionBegin;
140   PetscCheck(!dat->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
141   PetscCall(PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights));
142   for (i = 0; i < 2; i++) {
143     if (dat->initialized[i]) {
144       PetscCallMPI(MPI_Comm_free(&dat->comms[i]));
145       dat->initialized[i] = PETSC_FALSE;
146     }
147   }
148   PetscCall(PetscSFReset_Basic(sf)); /* Common part */
149   PetscFunctionReturn(0);
150 }
151 
152 static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
153 {
154   PetscFunctionBegin;
155   PetscCall(PetscSFReset_Neighbor(sf));
156   PetscCall(PetscFree(sf->data));
157   PetscFunctionReturn(0);
158 }
159 
160 static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
161 {
162   PetscSFLink       link;
163   PetscSF_Neighbor *dat      = (PetscSF_Neighbor *)sf->data;
164   MPI_Comm          distcomm = MPI_COMM_NULL;
165   void             *rootbuf = NULL, *leafbuf = NULL;
166   MPI_Request      *req;
167 
168   PetscFunctionBegin;
169   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
170   PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
171   /* Do neighborhood alltoallv for remote ranks */
172   PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
173   PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_ROOT2LEAF, &distcomm));
174   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL));
175   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF));
176   /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */
177   if (dat->rootdegree || dat->leafdegree) PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, distcomm, req));
178   PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit));
179   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
180   PetscFunctionReturn(0);
181 }
182 
183 static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
184 {
185   PetscSFLink       link;
186   PetscSF_Neighbor *dat      = (PetscSF_Neighbor *)sf->data;
187   MPI_Comm          distcomm = MPI_COMM_NULL;
188   void             *rootbuf = NULL, *leafbuf = NULL;
189   MPI_Request      *req = NULL;
190 
191   PetscFunctionBegin;
192   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
193   PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
194   /* Do neighborhood alltoallv for remote ranks */
195   PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
196   PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_LEAF2ROOT, &distcomm));
197   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL));
198   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT));
199   if (dat->rootdegree || dat->leafdegree) PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, unit, rootbuf, dat->rootcounts, dat->rootdispls, unit, distcomm, req));
200   PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, unit, dat->rootdegree, dat->rootcounts, unit));
201   *out = link;
202   PetscFunctionReturn(0);
203 }
204 
205 static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
206 {
207   PetscSFLink link = NULL;
208 
209   PetscFunctionBegin;
210   PetscCall(PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
211   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
212   PetscFunctionReturn(0);
213 }
214 
215 static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
216 {
217   PetscSFLink link = NULL;
218 
219   PetscFunctionBegin;
220   PetscCall(PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
221   PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
222   PetscFunctionReturn(0);
223 }
224 
225 static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
226 {
227   PetscSFLink       link    = NULL;
228   MPI_Comm          comm    = MPI_COMM_NULL;
229   PetscSF_Neighbor *dat     = (PetscSF_Neighbor *)sf->data;
230   void             *rootbuf = NULL, *leafbuf = NULL;
231 
232   PetscFunctionBegin;
233   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
234   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
235   /* Process remote fetch-and-op */
236   PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
237   /* Bcast the updated rootbuf back to leaves */
238   PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
239   PetscCall(PetscSFGetDistComm_Neighbor(sf, PETSCSF_ROOT2LEAF, &comm));
240   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, NULL, NULL));
241   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF));
242   if (dat->rootdegree || dat->leafdegree) PetscCallMPI(MPIU_Neighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, comm));
243   PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit));
244   PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE /* host2device after recving */));
245   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
246   PetscCall(PetscSFLinkReclaim(sf, &link));
247   PetscFunctionReturn(0);
248 }
249 
250 PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
251 {
252   PetscSF_Neighbor *dat;
253 
254   PetscFunctionBegin;
255   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
256   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
257   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
258   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
259   sf->ops->View                 = PetscSFView_Basic;
260 
261   sf->ops->SetUp           = PetscSFSetUp_Neighbor;
262   sf->ops->Reset           = PetscSFReset_Neighbor;
263   sf->ops->Destroy         = PetscSFDestroy_Neighbor;
264   sf->ops->BcastBegin      = PetscSFBcastBegin_Neighbor;
265   sf->ops->ReduceBegin     = PetscSFReduceBegin_Neighbor;
266   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor;
267   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Neighbor;
268 
269   PetscCall(PetscNew(&dat));
270   sf->data = (void *)dat;
271   PetscFunctionReturn(0);
272 }
273