xref: /petsc/src/vec/is/sf/impls/basic/neighbor/sfneighbor.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
1027eff8aSJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
2027eff8aSJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3100ffedbSJunchao Zhang #include <petscpkg_version.h>
4027eff8aSJunchao Zhang 
56497c311SBarry Smith /* Convenience local types and wrappers */
6c87b50c4SJunchao Zhang #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
715279c80SJunchao Zhang typedef MPI_Count PetscSFCount;
815279c80SJunchao Zhang typedef MPI_Aint  PetscSFAint;
96497c311SBarry Smith   #define MPIU_Neighbor_alltoallv(a, b, c, d, e, f, g, h, i)            MPI_Neighbor_alltoallv_c(a, b, c, d, e, f, g, h, i)
106497c311SBarry Smith   #define MPIU_Neighbor_alltoallv_init(a, b, c, d, e, f, g, h, i, j, k) MPI_Neighbor_alltoallv_init_c(a, b, c, d, e, f, g, h, i, j, k)
116497c311SBarry Smith   #define MPIU_Ineighbor_alltoallv(a, b, c, d, e, f, g, h, i, j)        MPI_Ineighbor_alltoallv_c(a, b, c, d, e, f, g, h, i, j)
12c87b50c4SJunchao Zhang #else
13c87b50c4SJunchao Zhang typedef PetscMPIInt PetscSFCount;
1415279c80SJunchao Zhang typedef PetscMPIInt PetscSFAint;
156497c311SBarry Smith   #define MPIU_Neighbor_alltoallv(a, b, c, d, e, f, g, h, i)            MPI_Neighbor_alltoallv(a, b, c, d, e, f, g, h, i)
166497c311SBarry Smith   #define MPIU_Neighbor_alltoallv_init(a, b, c, d, e, f, g, h, i, j, k) MPI_Neighbor_alltoallv_init(a, b, c, d, e, f, g, h, i, j, k)
176497c311SBarry Smith   #define MPIU_Ineighbor_alltoallv(a, b, c, d, e, f, g, h, i, j)        MPI_Ineighbor_alltoallv(a, b, c, d, e, f, g, h, i, j)
18c87b50c4SJunchao Zhang #endif
19027eff8aSJunchao Zhang 
20027eff8aSJunchao Zhang typedef struct {
21027eff8aSJunchao Zhang   SFBASICHEADER;
22027eff8aSJunchao Zhang   MPI_Comm      comms[2];                /* Communicators with distributed topology in both directions */
23027eff8aSJunchao Zhang   PetscBool     initialized[2];          /* Are the two communicators initialized? */
2415279c80SJunchao Zhang   PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */
2515279c80SJunchao Zhang   PetscSFAint  *rootdispls, *leafdispls; /* displs for non-distinguished ranks */
26c87b50c4SJunchao Zhang   PetscMPIInt  *rootweights, *leafweights;
27cd620004SJunchao Zhang   PetscInt      rootdegree, leafdegree;
28027eff8aSJunchao Zhang } PetscSF_Neighbor;
29027eff8aSJunchao Zhang 
30027eff8aSJunchao Zhang /*===================================================================================*/
31027eff8aSJunchao Zhang /*              Internal utility routines                                            */
32027eff8aSJunchao Zhang /*===================================================================================*/
33027eff8aSJunchao Zhang 
PetscLogMPIMessages(PetscInt nsend,PetscSFCount * sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount * recvcnts,MPI_Datatype recvtype)34d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype)
35d71ae5a4SJacob Faibussowitsch {
36c87b50c4SJunchao Zhang   PetscFunctionBegin;
372611ad71SToby Isaac   if (PetscDefined(USE_LOG)) {
38c87b50c4SJunchao Zhang     petsc_isend_ct += (PetscLogDouble)nsend;
39c87b50c4SJunchao Zhang     petsc_irecv_ct += (PetscLogDouble)nrecv;
40c87b50c4SJunchao Zhang 
41c87b50c4SJunchao Zhang     if (sendtype != MPI_DATATYPE_NULL) {
42c87b50c4SJunchao Zhang       PetscMPIInt i, typesize;
439566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Type_size(sendtype, &typesize));
44c87b50c4SJunchao Zhang       for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize);
45c87b50c4SJunchao Zhang     }
46c87b50c4SJunchao Zhang 
47c87b50c4SJunchao Zhang     if (recvtype != MPI_DATATYPE_NULL) {
48c87b50c4SJunchao Zhang       PetscMPIInt i, typesize;
499566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Type_size(recvtype, &typesize));
50c87b50c4SJunchao Zhang       for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize);
51c87b50c4SJunchao Zhang     }
522611ad71SToby Isaac   }
533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54c87b50c4SJunchao Zhang }
55c87b50c4SJunchao Zhang 
56027eff8aSJunchao Zhang /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm * distcomm)57d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm)
58d71ae5a4SJacob Faibussowitsch {
59027eff8aSJunchao Zhang   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
60027eff8aSJunchao Zhang 
61027eff8aSJunchao Zhang   PetscFunctionBegin;
62f5d27ee7SJunchao Zhang   if (!dat->initialized[direction]) {
636497c311SBarry Smith     PetscMPIInt        nrootranks, ndrootranks, nleafranks, ndleafranks;
64f5d27ee7SJunchao Zhang     PetscMPIInt        indegree, outdegree;
65f5d27ee7SJunchao Zhang     const PetscMPIInt *rootranks, *leafranks, *sources, *destinations;
66f5d27ee7SJunchao Zhang     MPI_Comm           comm, *mycomm = &dat->comms[direction];
67f5d27ee7SJunchao Zhang 
689566063dSJacob Faibussowitsch     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL));       /* Which ranks will access my roots (I am a destination) */
699566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL)); /* My leaves will access whose roots (I am a source) */
70f5d27ee7SJunchao Zhang     indegree     = nrootranks - ndrootranks;
71f5d27ee7SJunchao Zhang     outdegree    = nleafranks - ndleafranks;
728e3a54c0SPierre Jolivet     sources      = PetscSafePointerPlusOffset(rootranks, ndrootranks);
738e3a54c0SPierre Jolivet     destinations = PetscSafePointerPlusOffset(leafranks, ndleafranks);
749566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
75cd620004SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
769566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
77cd620004SJunchao Zhang     } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */
789566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
79027eff8aSJunchao Zhang     }
80027eff8aSJunchao Zhang     dat->initialized[direction] = PETSC_TRUE;
81027eff8aSJunchao Zhang   }
82027eff8aSJunchao Zhang   *distcomm = dat->comms[direction];
833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
84027eff8aSJunchao Zhang }
85027eff8aSJunchao Zhang 
86f5d27ee7SJunchao Zhang // start MPI_Ineighbor_alltoallv (only used for inter-proccess communication)
PetscSFLinkStartCommunication_Neighbor(PetscSF sf,PetscSFLink link,PetscSFDirection direction)87f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
88f5d27ee7SJunchao Zhang {
89f5d27ee7SJunchao Zhang   PetscSF_Neighbor *dat      = (PetscSF_Neighbor *)sf->data;
90f5d27ee7SJunchao Zhang   MPI_Comm          distcomm = MPI_COMM_NULL;
91f5d27ee7SJunchao Zhang   void             *rootbuf = NULL, *leafbuf = NULL;
92f5d27ee7SJunchao Zhang   MPI_Request      *req = NULL;
93f5d27ee7SJunchao Zhang 
94f5d27ee7SJunchao Zhang   PetscFunctionBegin;
95f5d27ee7SJunchao Zhang   if (direction == PETSCSF_ROOT2LEAF) {
96f5d27ee7SJunchao Zhang     PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
97f5d27ee7SJunchao Zhang   } else {
98f5d27ee7SJunchao Zhang     PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
99f5d27ee7SJunchao Zhang   }
100f5d27ee7SJunchao Zhang 
101f5d27ee7SJunchao Zhang   PetscCall(PetscSFGetDistComm_Neighbor(sf, direction, &distcomm));
102f5d27ee7SJunchao Zhang   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, &rootbuf, &leafbuf, &req, NULL));
103646b835dSJunchao Zhang   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link));
104f5d27ee7SJunchao Zhang 
105f5d27ee7SJunchao Zhang   if (dat->rootdegree || dat->leafdegree) { // OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case
106f5d27ee7SJunchao Zhang     if (direction == PETSCSF_ROOT2LEAF) {
107f5d27ee7SJunchao Zhang       PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, link->unit, leafbuf, dat->leafcounts, dat->leafdispls, link->unit, distcomm, req));
108f5d27ee7SJunchao Zhang       PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, link->unit, dat->leafdegree, dat->leafcounts, link->unit));
109f5d27ee7SJunchao Zhang     } else {
110f5d27ee7SJunchao Zhang       PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, link->unit, rootbuf, dat->rootcounts, dat->rootdispls, link->unit, distcomm, req));
111f5d27ee7SJunchao Zhang       PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, link->unit, dat->rootdegree, dat->rootcounts, link->unit));
112f5d27ee7SJunchao Zhang     }
113f5d27ee7SJunchao Zhang   }
114f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
115f5d27ee7SJunchao Zhang }
116f5d27ee7SJunchao Zhang 
1176677b1c1SJunchao Zhang #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
PetscSFLinkInitMPIRequests_Persistent_Neighbor(PetscSF sf,PetscSFLink link,PetscSFDirection direction)1186677b1c1SJunchao Zhang static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
1196677b1c1SJunchao Zhang {
1206677b1c1SJunchao Zhang   PetscSF_Neighbor  *dat           = (PetscSF_Neighbor *)sf->data;
1216677b1c1SJunchao Zhang   MPI_Comm           distcomm      = MPI_COMM_NULL;
1226677b1c1SJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
1236677b1c1SJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi;
1246677b1c1SJunchao Zhang   MPI_Request       *req            = link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi];
1256677b1c1SJunchao Zhang   void              *rootbuf = link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi], *leafbuf = link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi];
1266677b1c1SJunchao Zhang   MPI_Info           info;
1276677b1c1SJunchao Zhang 
1286677b1c1SJunchao Zhang   PetscFunctionBegin;
1296677b1c1SJunchao Zhang   PetscCall(PetscSFGetDistComm_Neighbor(sf, direction, &distcomm));
1306677b1c1SJunchao Zhang   if (dat->rootdegree || dat->leafdegree) {
1316677b1c1SJunchao Zhang     if (!link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
1326677b1c1SJunchao Zhang       PetscCallMPI(MPI_Info_create(&info)); // currently, we don't use info
1336677b1c1SJunchao Zhang       if (direction == PETSCSF_ROOT2LEAF) {
1346677b1c1SJunchao Zhang         PetscCallMPI(MPIU_Neighbor_alltoallv_init(rootbuf, dat->rootcounts, dat->rootdispls, link->unit, leafbuf, dat->leafcounts, dat->leafdispls, link->unit, distcomm, info, req));
1356677b1c1SJunchao Zhang       } else {
1366677b1c1SJunchao Zhang         PetscCallMPI(MPIU_Neighbor_alltoallv_init(leafbuf, dat->leafcounts, dat->leafdispls, link->unit, rootbuf, dat->rootcounts, dat->rootdispls, link->unit, distcomm, info, req));
1376677b1c1SJunchao Zhang       }
1386677b1c1SJunchao Zhang       link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
1396677b1c1SJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
1406677b1c1SJunchao Zhang     }
1416677b1c1SJunchao Zhang   }
1426677b1c1SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
1436677b1c1SJunchao Zhang }
1446677b1c1SJunchao Zhang 
1456677b1c1SJunchao Zhang // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
PetscSFLinkStartCommunication_Persistent_Neighbor(PetscSF sf,PetscSFLink link,PetscSFDirection direction)1466677b1c1SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
1476677b1c1SJunchao Zhang {
1486677b1c1SJunchao Zhang   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
1496677b1c1SJunchao Zhang   MPI_Request      *req = NULL;
1506677b1c1SJunchao Zhang 
1516677b1c1SJunchao Zhang   PetscFunctionBegin;
1526677b1c1SJunchao Zhang   if (direction == PETSCSF_ROOT2LEAF) {
1536677b1c1SJunchao Zhang     PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
1546677b1c1SJunchao Zhang   } else {
1556677b1c1SJunchao Zhang     PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
1566677b1c1SJunchao Zhang   }
1576677b1c1SJunchao Zhang 
1586677b1c1SJunchao Zhang   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &req, NULL));
159646b835dSJunchao Zhang   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link));
1606677b1c1SJunchao Zhang   if (dat->rootdegree || dat->leafdegree) {
1616677b1c1SJunchao Zhang     PetscCallMPI(MPI_Start(req));
1626677b1c1SJunchao Zhang     if (direction == PETSCSF_ROOT2LEAF) {
1636677b1c1SJunchao Zhang       PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, link->unit, dat->leafdegree, dat->leafcounts, link->unit));
1646677b1c1SJunchao Zhang     } else {
1656677b1c1SJunchao Zhang       PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, link->unit, dat->rootdegree, dat->rootcounts, link->unit));
1666677b1c1SJunchao Zhang     }
1676677b1c1SJunchao Zhang   }
1686677b1c1SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
1696677b1c1SJunchao Zhang }
1706677b1c1SJunchao Zhang #endif
1716677b1c1SJunchao Zhang 
PetscSFSetCommunicationOps_Neighbor(PetscSF sf,PetscSFLink link)172f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFSetCommunicationOps_Neighbor(PetscSF sf, PetscSFLink link)
173f5d27ee7SJunchao Zhang {
174f5d27ee7SJunchao Zhang   PetscFunctionBegin;
1756677b1c1SJunchao Zhang #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
1766677b1c1SJunchao Zhang   if (sf->persistent) {
1776677b1c1SJunchao Zhang     link->InitMPIRequests    = PetscSFLinkInitMPIRequests_Persistent_Neighbor;
1786677b1c1SJunchao Zhang     link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Neighbor;
1796677b1c1SJunchao Zhang   } else
1806677b1c1SJunchao Zhang #endif
1816677b1c1SJunchao Zhang   {
182f5d27ee7SJunchao Zhang     link->StartCommunication = PetscSFLinkStartCommunication_Neighbor;
1836677b1c1SJunchao Zhang   }
184f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
185f5d27ee7SJunchao Zhang }
186f5d27ee7SJunchao Zhang 
187027eff8aSJunchao Zhang /*===================================================================================*/
188027eff8aSJunchao Zhang /*              Implementations of SF public APIs                                    */
189027eff8aSJunchao Zhang /*===================================================================================*/
PetscSFSetUp_Neighbor(PetscSF sf)190d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
191d71ae5a4SJacob Faibussowitsch {
192027eff8aSJunchao Zhang   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
1936497c311SBarry Smith   PetscMPIInt       nrootranks, ndrootranks, nleafranks, ndleafranks;
194027eff8aSJunchao Zhang   const PetscInt   *rootoffset, *leafoffset;
195100ffedbSJunchao Zhang   PetscMPIInt       m, n, m2, n2;
196027eff8aSJunchao Zhang 
197027eff8aSJunchao Zhang   PetscFunctionBegin;
198cd620004SJunchao Zhang   /* SFNeighbor inherits from Basic */
1999566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp_Basic(sf));
200cd620004SJunchao Zhang   /* SFNeighbor specific */
2019566063dSJacob Faibussowitsch   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
2029566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
203c87b50c4SJunchao Zhang   dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks);
204c87b50c4SJunchao Zhang   dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks);
205cd620004SJunchao Zhang   sf->nleafreqs       = 0;
206f5d27ee7SJunchao Zhang   dat->nrootreqs      = 1; // collectives only need one MPI_Request. We just put it in rootreqs[]
207eb02082bSJunchao Zhang 
208100ffedbSJunchao Zhang   m2 = m;
209100ffedbSJunchao Zhang   n2 = n;
210100ffedbSJunchao Zhang #if defined(PETSC_HAVE_OPENMPI) // workaround for an OpenMPI 5.0.x bug, https://github.com/open-mpi/ompi/pull/12614
211100ffedbSJunchao Zhang   #if PETSC_PKG_OPENMPI_VERSION_LE(5, 0, 3)
212100ffedbSJunchao Zhang   m2 = m ? m : 1;
213100ffedbSJunchao Zhang   n2 = n ? n : 1;
21441751ff9SJunchao Zhang   #endif
215100ffedbSJunchao Zhang #endif
216100ffedbSJunchao Zhang   // Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory
217100ffedbSJunchao Zhang   PetscCall(PetscMalloc6(m2, &dat->rootdispls, m2, &dat->rootcounts, m2, &dat->rootweights, n2, &dat->leafdispls, n2, &dat->leafcounts, n2, &dat->leafweights));
218c87b50c4SJunchao Zhang 
219c87b50c4SJunchao Zhang #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
2206497c311SBarry Smith   for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
221c87b50c4SJunchao Zhang     dat->rootdispls[j]  = rootoffset[i] - rootoffset[ndrootranks];
222c87b50c4SJunchao Zhang     dat->rootcounts[j]  = rootoffset[i + 1] - rootoffset[i];
223*1690c2aeSBarry Smith     dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_INT_MAX * 2147483647); /* Scale to range of PetscMPIInt */
224c87b50c4SJunchao Zhang   }
225c87b50c4SJunchao Zhang 
2266497c311SBarry Smith   for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
227c87b50c4SJunchao Zhang     dat->leafdispls[j]  = leafoffset[i] - leafoffset[ndleafranks];
228c87b50c4SJunchao Zhang     dat->leafcounts[j]  = leafoffset[i + 1] - leafoffset[i];
229*1690c2aeSBarry Smith     dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_INT_MAX * 2147483647);
230c87b50c4SJunchao Zhang   }
231c87b50c4SJunchao Zhang #else
2326497c311SBarry Smith   for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
2339371c9d4SSatish Balay     PetscCall(PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m));
2349371c9d4SSatish Balay     dat->rootdispls[j] = m;
2359371c9d4SSatish Balay     PetscCall(PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n));
2369371c9d4SSatish Balay     dat->rootcounts[j]  = n;
237c87b50c4SJunchao Zhang     dat->rootweights[j] = n;
238027eff8aSJunchao Zhang   }
239027eff8aSJunchao Zhang 
2406497c311SBarry Smith   for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
2419371c9d4SSatish Balay     PetscCall(PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m));
2429371c9d4SSatish Balay     dat->leafdispls[j] = m;
2439371c9d4SSatish Balay     PetscCall(PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n));
2449371c9d4SSatish Balay     dat->leafcounts[j]  = n;
245c87b50c4SJunchao Zhang     dat->leafweights[j] = n;
246027eff8aSJunchao Zhang   }
247c87b50c4SJunchao Zhang #endif
2483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
249027eff8aSJunchao Zhang }
250027eff8aSJunchao Zhang 
PetscSFReset_Neighbor(PetscSF sf)251d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
252d71ae5a4SJacob Faibussowitsch {
253027eff8aSJunchao Zhang   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
254027eff8aSJunchao Zhang 
255027eff8aSJunchao Zhang   PetscFunctionBegin;
25628b400f6SJacob Faibussowitsch   PetscCheck(!dat->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
2579566063dSJacob Faibussowitsch   PetscCall(PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights));
2586497c311SBarry Smith   for (int i = 0; i < 2; i++) {
259027eff8aSJunchao Zhang     if (dat->initialized[i]) {
2609566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_free(&dat->comms[i]));
261027eff8aSJunchao Zhang       dat->initialized[i] = PETSC_FALSE;
262027eff8aSJunchao Zhang     }
263027eff8aSJunchao Zhang   }
2649566063dSJacob Faibussowitsch   PetscCall(PetscSFReset_Basic(sf)); /* Common part */
2653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
266027eff8aSJunchao Zhang }
267027eff8aSJunchao Zhang 
PetscSFDestroy_Neighbor(PetscSF sf)268d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
269d71ae5a4SJacob Faibussowitsch {
270027eff8aSJunchao Zhang   PetscFunctionBegin;
2719566063dSJacob Faibussowitsch   PetscCall(PetscSFReset_Neighbor(sf));
2729566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->data));
2733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
274027eff8aSJunchao Zhang }
275027eff8aSJunchao Zhang 
PetscSFCreate_Neighbor(PetscSF sf)276d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
277d71ae5a4SJacob Faibussowitsch {
278027eff8aSJunchao Zhang   PetscSF_Neighbor *dat;
279027eff8aSJunchao Zhang 
280027eff8aSJunchao Zhang   PetscFunctionBegin;
28172502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
282f5d27ee7SJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
283ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
284f5d27ee7SJunchao Zhang   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
285027eff8aSJunchao Zhang   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
286f5d27ee7SJunchao Zhang   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
287f5d27ee7SJunchao Zhang   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
288027eff8aSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
289027eff8aSJunchao Zhang   sf->ops->View                 = PetscSFView_Basic;
290027eff8aSJunchao Zhang 
291027eff8aSJunchao Zhang   sf->ops->SetUp               = PetscSFSetUp_Neighbor;
292027eff8aSJunchao Zhang   sf->ops->Reset               = PetscSFReset_Neighbor;
293027eff8aSJunchao Zhang   sf->ops->Destroy             = PetscSFDestroy_Neighbor;
294f5d27ee7SJunchao Zhang   sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Neighbor;
295027eff8aSJunchao Zhang 
2966677b1c1SJunchao Zhang #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
2976677b1c1SJunchao Zhang   PetscObjectOptionsBegin((PetscObject)sf);
2986677b1c1SJunchao Zhang   PetscCall(PetscOptionsBool("-sf_neighbor_persistent", "Use MPI-4 persistent neighborhood collectives; used along with -sf_type neighbor", "PetscSFCreate", sf->persistent, &sf->persistent, NULL));
2996677b1c1SJunchao Zhang   PetscOptionsEnd();
3006677b1c1SJunchao Zhang #endif
3016677b1c1SJunchao Zhang   sf->collective = PETSC_TRUE;
3026677b1c1SJunchao Zhang 
3034dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&dat));
304027eff8aSJunchao Zhang   sf->data = (void *)dat;
3053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
306027eff8aSJunchao Zhang }
307