xref: /petsc/src/sys/utils/mpits.c (revision 3ba1676111f5c958fe6c2729b46ca4d523958bb3)
1f6ced4a3SJed Brown #include <petscsys.h> /*I  "petscsys.h"  I*/
295c0884eSLisandro Dalcin #include <petsc/private/petscimpl.h>
3f6ced4a3SJed Brown 
495c0884eSLisandro Dalcin PetscLogEvent PETSC_BuildTwoSided;
595c0884eSLisandro Dalcin PetscLogEvent PETSC_BuildTwoSidedF;
63b3561c8SJed Brown 
79371c9d4SSatish Balay const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL};
8f6ced4a3SJed Brown 
96145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
10f6ced4a3SJed Brown 
116145cd65SJed Brown /*@
126145cd65SJed Brown    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
136145cd65SJed Brown 
146145cd65SJed Brown    Logically Collective
156145cd65SJed Brown 
164165533cSJose E. Roman    Input Parameters:
17811af0c4SBarry Smith +  comm - `PETSC_COMM_WORLD`
18811af0c4SBarry Smith -  twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()`
196145cd65SJed Brown 
206145cd65SJed Brown    Level: developer
216145cd65SJed Brown 
226145cd65SJed Brown    Note:
236145cd65SJed Brown    This option is currently global, but could be made per-communicator.
246145cd65SJed Brown 
25811af0c4SBarry Smith .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType`
266145cd65SJed Brown @*/
27d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided)
28d71ae5a4SJacob Faibussowitsch {
296145cd65SJed Brown   PetscFunctionBegin;
3076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
316145cd65SJed Brown     PetscMPIInt b1[2], b2[2];
326145cd65SJed Brown     b1[0] = -(PetscMPIInt)twosided;
336145cd65SJed Brown     b1[1] = (PetscMPIInt)twosided;
341c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm));
3508401ef6SPierre Jolivet     PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes");
366145cd65SJed Brown   }
376145cd65SJed Brown   _twosided_type = twosided;
38*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
396145cd65SJed Brown }
406145cd65SJed Brown 
416145cd65SJed Brown /*@
42811af0c4SBarry Smith    PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication
436145cd65SJed Brown 
446145cd65SJed Brown    Logically Collective
456145cd65SJed Brown 
464165533cSJose E. Roman    Output Parameters:
476145cd65SJed Brown +  comm - communicator on which to query algorithm
48811af0c4SBarry Smith -  twosided - algorithm to use for `PetscCommBuildTwoSided()`
496145cd65SJed Brown 
506145cd65SJed Brown    Level: developer
516145cd65SJed Brown 
52811af0c4SBarry Smith .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType`
536145cd65SJed Brown @*/
54d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided)
55d71ae5a4SJacob Faibussowitsch {
5605342407SJunchao Zhang   PetscMPIInt size;
57f6ced4a3SJed Brown 
58f6ced4a3SJed Brown   PetscFunctionBegin;
596145cd65SJed Brown   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
606145cd65SJed Brown   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
619566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
6205342407SJunchao Zhang     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
638ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
6405342407SJunchao Zhang     if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
656145cd65SJed Brown #endif
669566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL));
67f6ced4a3SJed Brown   }
68f6ced4a3SJed Brown   *twosided = _twosided_type;
69*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
70f6ced4a3SJed Brown }
71f6ced4a3SJed Brown 
728ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
73d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
74d71ae5a4SJacob Faibussowitsch {
75c3a59b84SJed Brown   PetscMPIInt    nrecvs, tag, done, i;
76c3a59b84SJed Brown   MPI_Aint       lb, unitbytes;
77f6ced4a3SJed Brown   char          *tdata;
78f6ced4a3SJed Brown   MPI_Request   *sendreqs, barrier;
790f453b92SJed Brown   PetscSegBuffer segrank, segdata;
8046e50bb6SJed Brown   PetscBool      barrier_started;
81f6ced4a3SJed Brown 
82f6ced4a3SJed Brown   PetscFunctionBegin;
839566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm, &comm, &tag));
849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
8508401ef6SPierre Jolivet   PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
86f6ced4a3SJed Brown   tdata = (char *)todata;
879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nto, &sendreqs));
8848a46eb9SPierre Jolivet   for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
899566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank));
909566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata));
91f6ced4a3SJed Brown 
92f6ced4a3SJed Brown   nrecvs  = 0;
93f6ced4a3SJed Brown   barrier = MPI_REQUEST_NULL;
9446e50bb6SJed Brown   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
9546e50bb6SJed Brown    * but we need to work around it. */
9646e50bb6SJed Brown   barrier_started = PETSC_FALSE;
97f6ced4a3SJed Brown   for (done = 0; !done;) {
98f6ced4a3SJed Brown     PetscMPIInt flag;
99f6ced4a3SJed Brown     MPI_Status  status;
1009566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status));
101f6ced4a3SJed Brown     if (flag) { /* incoming message */
102f6ced4a3SJed Brown       PetscMPIInt *recvrank;
103f6ced4a3SJed Brown       void        *buf;
1049566063dSJacob Faibussowitsch       PetscCall(PetscSegBufferGet(segrank, 1, &recvrank));
1059566063dSJacob Faibussowitsch       PetscCall(PetscSegBufferGet(segdata, count, &buf));
106f6ced4a3SJed Brown       *recvrank = status.MPI_SOURCE;
1079566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE));
108f6ced4a3SJed Brown       nrecvs++;
109f6ced4a3SJed Brown     }
11046e50bb6SJed Brown     if (!barrier_started) {
1114dc2109aSBarry Smith       PetscMPIInt sent, nsends;
1129566063dSJacob Faibussowitsch       PetscCall(PetscMPIIntCast(nto, &nsends));
1139566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE));
114f6ced4a3SJed Brown       if (sent) {
1159566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Ibarrier(comm, &barrier));
11646e50bb6SJed Brown         barrier_started = PETSC_TRUE;
1179566063dSJacob Faibussowitsch         PetscCall(PetscFree(sendreqs));
118f6ced4a3SJed Brown       }
119f6ced4a3SJed Brown     } else {
1209566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE));
121f6ced4a3SJed Brown     }
122f6ced4a3SJed Brown   }
123f6ced4a3SJed Brown   *nfrom = nrecvs;
1249566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks));
1259566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&segrank));
1269566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata));
1279566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&segdata));
1289566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
129*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
130f6ced4a3SJed Brown }
131f6ced4a3SJed Brown #endif
132f6ced4a3SJed Brown 
133d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
134d71ae5a4SJacob Faibussowitsch {
13505342407SJunchao Zhang   PetscMPIInt       size, rank, *iflags, nrecvs, tag, *franks, i, flg;
136c3a59b84SJed Brown   MPI_Aint          lb, unitbytes;
137f6ced4a3SJed Brown   char             *tdata, *fdata;
138f6ced4a3SJed Brown   MPI_Request      *reqs, *sendreqs;
139f6ced4a3SJed Brown   MPI_Status       *statuses;
14005342407SJunchao Zhang   PetscCommCounter *counter;
141f6ced4a3SJed Brown 
142f6ced4a3SJed Brown   PetscFunctionBegin;
1439566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1459566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm, &comm, &tag));
1469566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
14728b400f6SJacob Faibussowitsch   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
14805342407SJunchao Zhang   if (!counter->iflags) {
1499566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(size, &counter->iflags));
15005342407SJunchao Zhang     iflags = counter->iflags;
15105342407SJunchao Zhang   } else {
15205342407SJunchao Zhang     iflags = counter->iflags;
1539566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(iflags, size));
15405342407SJunchao Zhang   }
15505342407SJunchao Zhang   for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
1561c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm));
15705342407SJunchao Zhang   nrecvs = iflags[rank];
1589566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
1595f80ce2aSJacob Faibussowitsch   PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
1609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata));
161f6ced4a3SJed Brown   tdata = (char *)todata;
1629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses));
16306926cafSJed Brown   sendreqs = reqs + nrecvs;
16448a46eb9SPierre Jolivet   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i));
16548a46eb9SPierre Jolivet   for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
1669566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses));
1679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrecvs, &franks));
168a297a907SKarl Rupp   for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
1699566063dSJacob Faibussowitsch   PetscCall(PetscFree2(reqs, statuses));
1709566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
171f6ced4a3SJed Brown 
172f6ced4a3SJed Brown   *nfrom             = nrecvs;
173f6ced4a3SJed Brown   *fromranks         = franks;
174f6ced4a3SJed Brown   *(void **)fromdata = fdata;
175*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
176f6ced4a3SJed Brown }
177f6ced4a3SJed Brown 
1781654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
179d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
180d71ae5a4SJacob Faibussowitsch {
18105342407SJunchao Zhang   PetscMPIInt       size, *iflags, nrecvs, tag, *franks, i, flg;
182c3a59b84SJed Brown   MPI_Aint          lb, unitbytes;
1831654bf6bSJed Brown   char             *tdata, *fdata;
1841654bf6bSJed Brown   MPI_Request      *reqs, *sendreqs;
1851654bf6bSJed Brown   MPI_Status       *statuses;
18605342407SJunchao Zhang   PetscCommCounter *counter;
1871654bf6bSJed Brown 
1881654bf6bSJed Brown   PetscFunctionBegin;
1899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1909566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm, &comm, &tag));
1919566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
19228b400f6SJacob Faibussowitsch   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
19305342407SJunchao Zhang   if (!counter->iflags) {
1949566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(size, &counter->iflags));
19505342407SJunchao Zhang     iflags = counter->iflags;
19605342407SJunchao Zhang   } else {
19705342407SJunchao Zhang     iflags = counter->iflags;
1989566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(iflags, size));
19905342407SJunchao Zhang   }
2001654bf6bSJed Brown   for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
2019566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm));
2029566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
20308401ef6SPierre Jolivet   PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
2049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata));
2051654bf6bSJed Brown   tdata = (char *)todata;
2069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses));
2071654bf6bSJed Brown   sendreqs = reqs + nrecvs;
20848a46eb9SPierre Jolivet   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i));
20948a46eb9SPierre Jolivet   for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
2109566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses));
2119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nrecvs, &franks));
2121654bf6bSJed Brown   for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
2139566063dSJacob Faibussowitsch   PetscCall(PetscFree2(reqs, statuses));
2149566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
2151654bf6bSJed Brown 
2161654bf6bSJed Brown   *nfrom             = nrecvs;
2171654bf6bSJed Brown   *fromranks         = franks;
2181654bf6bSJed Brown   *(void **)fromdata = fdata;
219*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2201654bf6bSJed Brown }
2211654bf6bSJed Brown #endif
2221654bf6bSJed Brown 
223f6ced4a3SJed Brown /*@C
224f6ced4a3SJed Brown    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
225f6ced4a3SJed Brown 
226d083f849SBarry Smith    Collective
227f6ced4a3SJed Brown 
2284165533cSJose E. Roman    Input Parameters:
229f6ced4a3SJed Brown +  comm - communicator
230f6ced4a3SJed Brown .  count - number of entries to send/receive (must match on all ranks)
231f6ced4a3SJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
232f6ced4a3SJed Brown .  nto - number of ranks to send data to
233f6ced4a3SJed Brown .  toranks - ranks to send to (array of length nto)
234f6ced4a3SJed Brown -  todata - data to send to each rank (packed)
235f6ced4a3SJed Brown 
2364165533cSJose E. Roman    Output Parameters:
237f6ced4a3SJed Brown +  nfrom - number of ranks receiving messages from
238811af0c4SBarry Smith .  fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
239811af0c4SBarry Smith -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
240f6ced4a3SJed Brown 
241f6ced4a3SJed Brown    Level: developer
242f6ced4a3SJed Brown 
243811af0c4SBarry Smith    Options Database Key:
24405342407SJunchao Zhang .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.
2451654bf6bSJed Brown 
246f6ced4a3SJed Brown    Notes:
247811af0c4SBarry Smith    This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
248811af0c4SBarry Smith    `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data.
249f6ced4a3SJed Brown 
250f6ced4a3SJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
251f6ced4a3SJed Brown 
252f6ced4a3SJed Brown    References:
253606c0280SSatish Balay .  * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
2544f02bc6aSBarry Smith    Scalable communication protocols for dynamic sparse data exchange, 2010.
255f6ced4a3SJed Brown 
256811af0c4SBarry Smith .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType`
257f6ced4a3SJed Brown @*/
258d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
259d71ae5a4SJacob Faibussowitsch {
2606145cd65SJed Brown   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
261f6ced4a3SJed Brown 
262f6ced4a3SJed Brown   PetscFunctionBegin;
2639566063dSJacob Faibussowitsch   PetscCall(PetscSysInitializePackage());
2649566063dSJacob Faibussowitsch   PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm));
2659566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0));
2669566063dSJacob Faibussowitsch   PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype));
267f6ced4a3SJed Brown   switch (buildtype) {
2686145cd65SJed Brown   case PETSC_BUILDTWOSIDED_IBARRIER:
2698ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
2709566063dSJacob Faibussowitsch     PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
271b458e8f1SJose E. Roman     break;
2726145cd65SJed Brown #else
2736145cd65SJed Brown     SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
274f6ced4a3SJed Brown #endif
275d71ae5a4SJacob Faibussowitsch   case PETSC_BUILDTWOSIDED_ALLREDUCE:
276d71ae5a4SJacob Faibussowitsch     PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
277d71ae5a4SJacob Faibussowitsch     break;
2781654bf6bSJed Brown   case PETSC_BUILDTWOSIDED_REDSCATTER:
2791654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
2809566063dSJacob Faibussowitsch     PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
281b458e8f1SJose E. Roman     break;
2821654bf6bSJed Brown #else
2831654bf6bSJed Brown     SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
2841654bf6bSJed Brown #endif
285d71ae5a4SJacob Faibussowitsch   default:
286d71ae5a4SJacob Faibussowitsch     SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
287f6ced4a3SJed Brown   }
2889566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0));
289*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
290f6ced4a3SJed Brown }
291d815da10SJed Brown 
292d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
293d71ae5a4SJacob Faibussowitsch {
294d815da10SJed Brown   PetscMPIInt  i, *tag;
295d815da10SJed Brown   MPI_Aint     lb, unitbytes;
296d815da10SJed Brown   MPI_Request *sendreq, *recvreq;
297d815da10SJed Brown 
298d815da10SJed Brown   PetscFunctionBegin;
2999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ntags, &tag));
30048a46eb9SPierre Jolivet   if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0]));
30148a46eb9SPierre Jolivet   for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
302d815da10SJed Brown 
303d815da10SJed Brown   /* Perform complete initial rendezvous */
3049566063dSJacob Faibussowitsch   PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
305d815da10SJed Brown 
3069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nto * ntags, &sendreq));
3079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq));
308d815da10SJed Brown 
3099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
31008401ef6SPierre Jolivet   PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
311d815da10SJed Brown   for (i = 0; i < nto; i++) {
31289157a57SJed Brown     PetscMPIInt k;
31389157a57SJed Brown     for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL;
3149566063dSJacob Faibussowitsch     PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx));
315d815da10SJed Brown   }
316d815da10SJed Brown   for (i = 0; i < *nfrom; i++) {
317d815da10SJed Brown     void       *header = (*(char **)fromdata) + count * unitbytes * i;
31889157a57SJed Brown     PetscMPIInt k;
31989157a57SJed Brown     for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL;
3209566063dSJacob Faibussowitsch     PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx));
321d815da10SJed Brown   }
3229566063dSJacob Faibussowitsch   PetscCall(PetscFree(tag));
3239566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
3246afbe7ddSJed Brown   *toreqs   = sendreq;
3256afbe7ddSJed Brown   *fromreqs = recvreq;
326*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
327d815da10SJed Brown }
328d815da10SJed Brown 
3298ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
330afb254cdSJed Brown 
331d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
332d71ae5a4SJacob Faibussowitsch {
333afb254cdSJed Brown   PetscMPIInt    nrecvs, tag, *tags, done, i;
334afb254cdSJed Brown   MPI_Aint       lb, unitbytes;
335afb254cdSJed Brown   char          *tdata;
336afb254cdSJed Brown   MPI_Request   *sendreqs, *usendreqs, *req, barrier;
337afb254cdSJed Brown   PetscSegBuffer segrank, segdata, segreq;
33846e50bb6SJed Brown   PetscBool      barrier_started;
339afb254cdSJed Brown 
340afb254cdSJed Brown   PetscFunctionBegin;
3419566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm, &comm, &tag));
3429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ntags, &tags));
34348a46eb9SPierre Jolivet   for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i]));
3449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
34508401ef6SPierre Jolivet   PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
346afb254cdSJed Brown   tdata = (char *)todata;
3479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nto, &sendreqs));
3489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nto * ntags, &usendreqs));
349afb254cdSJed Brown   /* Post synchronous sends */
35048a46eb9SPierre Jolivet   for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
351afb254cdSJed Brown   /* Post actual payloads.  These are typically larger messages.  Hopefully sending these later does not slow down the
352afb254cdSJed Brown    * synchronous messages above. */
353afb254cdSJed Brown   for (i = 0; i < nto; i++) {
35489157a57SJed Brown     PetscMPIInt k;
35589157a57SJed Brown     for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL;
3569566063dSJacob Faibussowitsch     PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx));
357afb254cdSJed Brown   }
358afb254cdSJed Brown 
3599566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank));
3609566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata));
3619566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq));
362afb254cdSJed Brown 
363afb254cdSJed Brown   nrecvs  = 0;
364afb254cdSJed Brown   barrier = MPI_REQUEST_NULL;
36546e50bb6SJed Brown   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
36646e50bb6SJed Brown    * but we need to work around it. */
36746e50bb6SJed Brown   barrier_started = PETSC_FALSE;
368afb254cdSJed Brown   for (done = 0; !done;) {
369afb254cdSJed Brown     PetscMPIInt flag;
370afb254cdSJed Brown     MPI_Status  status;
3719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status));
372afb254cdSJed Brown     if (flag) { /* incoming message */
37389157a57SJed Brown       PetscMPIInt *recvrank, k;
374afb254cdSJed Brown       void        *buf;
3759566063dSJacob Faibussowitsch       PetscCall(PetscSegBufferGet(segrank, 1, &recvrank));
3769566063dSJacob Faibussowitsch       PetscCall(PetscSegBufferGet(segdata, count, &buf));
377afb254cdSJed Brown       *recvrank = status.MPI_SOURCE;
3789566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE));
3799566063dSJacob Faibussowitsch       PetscCall(PetscSegBufferGet(segreq, ntags, &req));
38089157a57SJed Brown       for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL;
3819566063dSJacob Faibussowitsch       PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx));
382afb254cdSJed Brown       nrecvs++;
383afb254cdSJed Brown     }
38446e50bb6SJed Brown     if (!barrier_started) {
385afb254cdSJed Brown       PetscMPIInt sent, nsends;
3869566063dSJacob Faibussowitsch       PetscCall(PetscMPIIntCast(nto, &nsends));
3879566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE));
388afb254cdSJed Brown       if (sent) {
3899566063dSJacob Faibussowitsch         PetscCallMPI(MPI_Ibarrier(comm, &barrier));
39046e50bb6SJed Brown         barrier_started = PETSC_TRUE;
391afb254cdSJed Brown       }
392afb254cdSJed Brown     } else {
3939566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE));
394afb254cdSJed Brown     }
395afb254cdSJed Brown   }
396afb254cdSJed Brown   *nfrom = nrecvs;
3979566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks));
3989566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&segrank));
3999566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata));
4009566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&segdata));
4016afbe7ddSJed Brown   *toreqs = usendreqs;
4029566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs));
4039566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&segreq));
4049566063dSJacob Faibussowitsch   PetscCall(PetscFree(sendreqs));
4059566063dSJacob Faibussowitsch   PetscCall(PetscFree(tags));
4069566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
407*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
408afb254cdSJed Brown }
409afb254cdSJed Brown #endif
410afb254cdSJed Brown 
411d815da10SJed Brown /*@C
412d815da10SJed Brown    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
413d815da10SJed Brown 
414d083f849SBarry Smith    Collective
415d815da10SJed Brown 
4164165533cSJose E. Roman    Input Parameters:
417d815da10SJed Brown +  comm - communicator
418d815da10SJed Brown .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
419d815da10SJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
420d815da10SJed Brown .  nto - number of ranks to send data to
421d815da10SJed Brown .  toranks - ranks to send to (array of length nto)
422d815da10SJed Brown .  todata - data to send to each rank (packed)
423d815da10SJed Brown .  ntags - number of tags needed by send/recv callbacks
424d815da10SJed Brown .  send - callback invoked on sending process when ready to send primary payload
425d815da10SJed Brown .  recv - callback invoked on receiving process after delivery of rendezvous message
426d815da10SJed Brown -  ctx - context for callbacks
427d815da10SJed Brown 
4284165533cSJose E. Roman    Output Parameters:
429d815da10SJed Brown +  nfrom - number of ranks receiving messages from
430811af0c4SBarry Smith .  fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
431811af0c4SBarry Smith -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
432d815da10SJed Brown 
433d815da10SJed Brown    Level: developer
434d815da10SJed Brown 
435d815da10SJed Brown    Notes:
436811af0c4SBarry Smith    This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
437811af0c4SBarry Smith    `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data.
438d815da10SJed Brown 
439d815da10SJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
440d815da10SJed Brown 
441d815da10SJed Brown    References:
442606c0280SSatish Balay .  * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
4434f02bc6aSBarry Smith    Scalable communication protocols for dynamic sparse data exchange, 2010.
444d815da10SJed Brown 
445db781477SPatrick Sanan .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
446d815da10SJed Brown @*/
447d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
448d71ae5a4SJacob Faibussowitsch {
4496afbe7ddSJed Brown   MPI_Request *toreqs, *fromreqs;
4506afbe7ddSJed Brown 
4516afbe7ddSJed Brown   PetscFunctionBegin;
4529566063dSJacob Faibussowitsch   PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx));
4539566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE));
4549566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE));
4559566063dSJacob Faibussowitsch   PetscCall(PetscFree(toreqs));
4569566063dSJacob Faibussowitsch   PetscCall(PetscFree(fromreqs));
457*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4586afbe7ddSJed Brown }
4596afbe7ddSJed Brown 
4606afbe7ddSJed Brown /*@C
4616afbe7ddSJed Brown    PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
4626afbe7ddSJed Brown 
463d083f849SBarry Smith    Collective
4646afbe7ddSJed Brown 
4654165533cSJose E. Roman    Input Parameters:
4666afbe7ddSJed Brown +  comm - communicator
4676afbe7ddSJed Brown .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
4686afbe7ddSJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
4696afbe7ddSJed Brown .  nto - number of ranks to send data to
4706afbe7ddSJed Brown .  toranks - ranks to send to (array of length nto)
4716afbe7ddSJed Brown .  todata - data to send to each rank (packed)
4726afbe7ddSJed Brown .  ntags - number of tags needed by send/recv callbacks
4736afbe7ddSJed Brown .  send - callback invoked on sending process when ready to send primary payload
4746afbe7ddSJed Brown .  recv - callback invoked on receiving process after delivery of rendezvous message
4756afbe7ddSJed Brown -  ctx - context for callbacks
4766afbe7ddSJed Brown 
4774165533cSJose E. Roman    Output Parameters:
4786afbe7ddSJed Brown +  nfrom - number of ranks receiving messages from
479811af0c4SBarry Smith .  fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
480811af0c4SBarry Smith .  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
481811af0c4SBarry Smith .  toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`)
482811af0c4SBarry Smith -  fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`)
4836afbe7ddSJed Brown 
4846afbe7ddSJed Brown    Level: developer
4856afbe7ddSJed Brown 
4866afbe7ddSJed Brown    Notes:
487811af0c4SBarry Smith    This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
488811af0c4SBarry Smith    `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data.
4896afbe7ddSJed Brown 
4906afbe7ddSJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
4916afbe7ddSJed Brown 
4926afbe7ddSJed Brown    References:
493606c0280SSatish Balay .  * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
4944f02bc6aSBarry Smith    Scalable communication protocols for dynamic sparse data exchange, 2010.
4956afbe7ddSJed Brown 
496db781477SPatrick Sanan .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
4976afbe7ddSJed Brown @*/
498d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
499d71ae5a4SJacob Faibussowitsch {
5009371c9d4SSatish Balay   PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx);
501d815da10SJed Brown   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
5023461502dSJed Brown   PetscMPIInt            i, size;
503d815da10SJed Brown 
504d815da10SJed Brown   PetscFunctionBegin;
5059566063dSJacob Faibussowitsch   PetscCall(PetscSysInitializePackage());
5069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
507ad540459SPierre Jolivet   for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size);
5089566063dSJacob Faibussowitsch   PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm));
5099566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0));
5109566063dSJacob Faibussowitsch   PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype));
511d815da10SJed Brown   switch (buildtype) {
512d815da10SJed Brown   case PETSC_BUILDTWOSIDED_IBARRIER:
5138ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
5146afbe7ddSJed Brown     f = PetscCommBuildTwoSidedFReq_Ibarrier;
515b458e8f1SJose E. Roman     break;
516afb254cdSJed Brown #else
517afb254cdSJed Brown     SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
518afb254cdSJed Brown #endif
519d815da10SJed Brown   case PETSC_BUILDTWOSIDED_ALLREDUCE:
520d71ae5a4SJacob Faibussowitsch   case PETSC_BUILDTWOSIDED_REDSCATTER:
521d71ae5a4SJacob Faibussowitsch     f = PetscCommBuildTwoSidedFReq_Reference;
522d71ae5a4SJacob Faibussowitsch     break;
523d71ae5a4SJacob Faibussowitsch   default:
524d71ae5a4SJacob Faibussowitsch     SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
525d815da10SJed Brown   }
5269566063dSJacob Faibussowitsch   PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx));
5279566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0));
528*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
529d815da10SJed Brown }
530