xref: /petsc/src/sys/utils/mpits.c (revision d815da108924b212b90f2864c8b3d758d7270b63)
1f6ced4a3SJed Brown #include <petscsys.h>        /*I  "petscsys.h"  I*/
2f6ced4a3SJed Brown 
33b3561c8SJed Brown PetscLogEvent PETSC_BuildTwoSided;
43b3561c8SJed Brown 
56145cd65SJed Brown const char *const PetscBuildTwoSidedTypes[] = {
6f6ced4a3SJed Brown   "ALLREDUCE",
76145cd65SJed Brown   "IBARRIER",
81654bf6bSJed Brown   "REDSCATTER",
96145cd65SJed Brown   "PetscBuildTwoSidedType",
106145cd65SJed Brown   "PETSC_BUILDTWOSIDED_",
11f6ced4a3SJed Brown   0
12f6ced4a3SJed Brown };
13f6ced4a3SJed Brown 
146145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
15f6ced4a3SJed Brown 
16f6ced4a3SJed Brown #undef __FUNCT__
176145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
186145cd65SJed Brown /*@
196145cd65SJed Brown    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
206145cd65SJed Brown 
216145cd65SJed Brown    Logically Collective
226145cd65SJed Brown 
236145cd65SJed Brown    Input Arguments:
246145cd65SJed Brown +  comm - PETSC_COMM_WORLD
256145cd65SJed Brown -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
266145cd65SJed Brown 
276145cd65SJed Brown    Level: developer
286145cd65SJed Brown 
296145cd65SJed Brown    Note:
306145cd65SJed Brown    This option is currently global, but could be made per-communicator.
316145cd65SJed Brown 
326145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
336145cd65SJed Brown @*/
346145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
356145cd65SJed Brown {
366145cd65SJed Brown   PetscFunctionBegin;
376145cd65SJed Brown #if defined(PETSC_USE_DEBUG)
386145cd65SJed Brown   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
396145cd65SJed Brown     PetscMPIInt ierr;
406145cd65SJed Brown     PetscMPIInt b1[2],b2[2];
416145cd65SJed Brown     b1[0] = -(PetscMPIInt)twosided;
426145cd65SJed Brown     b1[1] = (PetscMPIInt)twosided;
436145cd65SJed Brown     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
446145cd65SJed Brown     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
456145cd65SJed Brown   }
466145cd65SJed Brown #endif
476145cd65SJed Brown   _twosided_type = twosided;
486145cd65SJed Brown   PetscFunctionReturn(0);
496145cd65SJed Brown }
506145cd65SJed Brown 
516145cd65SJed Brown #undef __FUNCT__
526145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
536145cd65SJed Brown /*@
546145cd65SJed Brown    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
556145cd65SJed Brown 
566145cd65SJed Brown    Logically Collective
576145cd65SJed Brown 
586145cd65SJed Brown    Output Arguments:
596145cd65SJed Brown +  comm - communicator on which to query algorithm
606145cd65SJed Brown -  twosided - algorithm to use for PetscCommBuildTwoSided()
616145cd65SJed Brown 
626145cd65SJed Brown    Level: developer
636145cd65SJed Brown 
646145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
656145cd65SJed Brown @*/
666145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
67f6ced4a3SJed Brown {
68f6ced4a3SJed Brown   PetscErrorCode ierr;
69f6ced4a3SJed Brown 
70f6ced4a3SJed Brown   PetscFunctionBegin;
716145cd65SJed Brown   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
726145cd65SJed Brown   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
73f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
746145cd65SJed Brown #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
756145cd65SJed Brown     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
766145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
77f6ced4a3SJed Brown #  else
786145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
79f6ced4a3SJed Brown #  endif
806145cd65SJed Brown #else
816145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
826145cd65SJed Brown #endif
830298fd71SBarry Smith     ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr);
84f6ced4a3SJed Brown   }
85f6ced4a3SJed Brown   *twosided = _twosided_type;
86f6ced4a3SJed Brown   PetscFunctionReturn(0);
87f6ced4a3SJed Brown }
88f6ced4a3SJed Brown 
89f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
90f6ced4a3SJed Brown 
91f6ced4a3SJed Brown #undef __FUNCT__
926145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
93cf4b5b4fSJed Brown static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
94f6ced4a3SJed Brown {
95f6ced4a3SJed Brown   PetscErrorCode ierr;
96c3a59b84SJed Brown   PetscMPIInt    nrecvs,tag,done,i;
97c3a59b84SJed Brown   MPI_Aint       lb,unitbytes;
98f6ced4a3SJed Brown   char           *tdata;
99f6ced4a3SJed Brown   MPI_Request    *sendreqs,barrier;
1000f453b92SJed Brown   PetscSegBuffer segrank,segdata;
101f6ced4a3SJed Brown 
102f6ced4a3SJed Brown   PetscFunctionBegin;
103a502f807SJed Brown   ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr);
104c3a59b84SJed Brown   ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr);
105c3a59b84SJed Brown   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
106f6ced4a3SJed Brown   tdata = (char*)todata;
107785e854fSJed Brown   ierr  = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr);
108f6ced4a3SJed Brown   for (i=0; i<nto; i++) {
109f6ced4a3SJed Brown     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
110f6ced4a3SJed Brown   }
1110f453b92SJed Brown   ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
1120f453b92SJed Brown   ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
113f6ced4a3SJed Brown 
114f6ced4a3SJed Brown   nrecvs  = 0;
115f6ced4a3SJed Brown   barrier = MPI_REQUEST_NULL;
116f6ced4a3SJed Brown   for (done=0; !done; ) {
117f6ced4a3SJed Brown     PetscMPIInt flag;
118f6ced4a3SJed Brown     MPI_Status  status;
119f6ced4a3SJed Brown     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
120f6ced4a3SJed Brown     if (flag) {                 /* incoming message */
121f6ced4a3SJed Brown       PetscMPIInt *recvrank;
122f6ced4a3SJed Brown       void        *buf;
123137cf7b6SJed Brown       ierr      = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr);
124137cf7b6SJed Brown       ierr      = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr);
125f6ced4a3SJed Brown       *recvrank = status.MPI_SOURCE;
126f6ced4a3SJed Brown       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
127f6ced4a3SJed Brown       nrecvs++;
128f6ced4a3SJed Brown     }
129f6ced4a3SJed Brown     if (barrier == MPI_REQUEST_NULL) {
1304dc2109aSBarry Smith       PetscMPIInt sent,nsends;
1314dc2109aSBarry Smith       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
132f6ced4a3SJed Brown       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
133f6ced4a3SJed Brown       if (sent) {
134f6ced4a3SJed Brown         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
135f6ced4a3SJed Brown         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
136f6ced4a3SJed Brown       }
137f6ced4a3SJed Brown     } else {
138f6ced4a3SJed Brown       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
139f6ced4a3SJed Brown     }
140f6ced4a3SJed Brown   }
141f6ced4a3SJed Brown   *nfrom = nrecvs;
142137cf7b6SJed Brown   ierr   = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr);
1430f453b92SJed Brown   ierr   = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr);
144137cf7b6SJed Brown   ierr   = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr);
1450f453b92SJed Brown   ierr   = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr);
146a502f807SJed Brown   ierr   = PetscCommDestroy(&comm);CHKERRQ(ierr);
147f6ced4a3SJed Brown   PetscFunctionReturn(0);
148f6ced4a3SJed Brown }
149f6ced4a3SJed Brown #endif
150f6ced4a3SJed Brown 
151f6ced4a3SJed Brown #undef __FUNCT__
1526145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
153cf4b5b4fSJed Brown static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
154f6ced4a3SJed Brown {
155f6ced4a3SJed Brown   PetscErrorCode ierr;
156c3a59b84SJed Brown   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i;
157c3a59b84SJed Brown   MPI_Aint       lb,unitbytes;
158f6ced4a3SJed Brown   char           *tdata,*fdata;
159f6ced4a3SJed Brown   MPI_Request    *reqs,*sendreqs;
160f6ced4a3SJed Brown   MPI_Status     *statuses;
161f6ced4a3SJed Brown 
162f6ced4a3SJed Brown   PetscFunctionBegin;
163f6ced4a3SJed Brown   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1641795a4d1SJed Brown   ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr);
165f6ced4a3SJed Brown   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
1660298fd71SBarry Smith   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr);
167f6ced4a3SJed Brown   ierr = PetscFree(iflags);CHKERRQ(ierr);
168f6ced4a3SJed Brown 
169a502f807SJed Brown   ierr     = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr);
170c3a59b84SJed Brown   ierr     = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr);
171c3a59b84SJed Brown   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
172f6ced4a3SJed Brown   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
173f6ced4a3SJed Brown   tdata    = (char*)todata;
174dcca6d9dSJed Brown   ierr     = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr);
17506926cafSJed Brown   sendreqs = reqs + nrecvs;
176f6ced4a3SJed Brown   for (i=0; i<nrecvs; i++) {
177f6ced4a3SJed Brown     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
178f6ced4a3SJed Brown   }
179f6ced4a3SJed Brown   for (i=0; i<nto; i++) {
180f6ced4a3SJed Brown     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
181f6ced4a3SJed Brown   }
182f6ced4a3SJed Brown   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
183785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr);
184a297a907SKarl Rupp   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
18506926cafSJed Brown   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
186a502f807SJed Brown   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
187f6ced4a3SJed Brown 
188f6ced4a3SJed Brown   *nfrom            = nrecvs;
189f6ced4a3SJed Brown   *fromranks        = franks;
190f6ced4a3SJed Brown   *(void**)fromdata = fdata;
191f6ced4a3SJed Brown   PetscFunctionReturn(0);
192f6ced4a3SJed Brown }
193f6ced4a3SJed Brown 
1941654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
1951654bf6bSJed Brown #undef __FUNCT__
1961654bf6bSJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter"
197cf4b5b4fSJed Brown static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
1981654bf6bSJed Brown {
1991654bf6bSJed Brown   PetscErrorCode ierr;
200c3a59b84SJed Brown   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i;
201c3a59b84SJed Brown   MPI_Aint       lb,unitbytes;
2021654bf6bSJed Brown   char           *tdata,*fdata;
2031654bf6bSJed Brown   MPI_Request    *reqs,*sendreqs;
2041654bf6bSJed Brown   MPI_Status     *statuses;
2051654bf6bSJed Brown 
2061654bf6bSJed Brown   PetscFunctionBegin;
2071654bf6bSJed Brown   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081654bf6bSJed Brown   ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr);
2091654bf6bSJed Brown   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
2101654bf6bSJed Brown   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
2111654bf6bSJed Brown   ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2121654bf6bSJed Brown   ierr = PetscFree(iflags);CHKERRQ(ierr);
2131654bf6bSJed Brown 
214a502f807SJed Brown   ierr     = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr);
215c3a59b84SJed Brown   ierr     = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr);
216c3a59b84SJed Brown   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
2171654bf6bSJed Brown   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
2181654bf6bSJed Brown   tdata    = (char*)todata;
2191654bf6bSJed Brown   ierr     = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr);
2201654bf6bSJed Brown   sendreqs = reqs + nrecvs;
2211654bf6bSJed Brown   for (i=0; i<nrecvs; i++) {
2221654bf6bSJed Brown     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
2231654bf6bSJed Brown   }
2241654bf6bSJed Brown   for (i=0; i<nto; i++) {
2251654bf6bSJed Brown     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
2261654bf6bSJed Brown   }
2271654bf6bSJed Brown   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
2281654bf6bSJed Brown   ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr);
2291654bf6bSJed Brown   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
2301654bf6bSJed Brown   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
231a502f807SJed Brown   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
2321654bf6bSJed Brown 
2331654bf6bSJed Brown   *nfrom            = nrecvs;
2341654bf6bSJed Brown   *fromranks        = franks;
2351654bf6bSJed Brown   *(void**)fromdata = fdata;
2361654bf6bSJed Brown   PetscFunctionReturn(0);
2371654bf6bSJed Brown }
2381654bf6bSJed Brown #endif
2391654bf6bSJed Brown 
240f6ced4a3SJed Brown #undef __FUNCT__
241f6ced4a3SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided"
242f6ced4a3SJed Brown /*@C
243f6ced4a3SJed Brown    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
244f6ced4a3SJed Brown 
245f6ced4a3SJed Brown    Collective on MPI_Comm
246f6ced4a3SJed Brown 
247f6ced4a3SJed Brown    Input Arguments:
248f6ced4a3SJed Brown +  comm - communicator
249f6ced4a3SJed Brown .  count - number of entries to send/receive (must match on all ranks)
250f6ced4a3SJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
251f6ced4a3SJed Brown .  nto - number of ranks to send data to
252f6ced4a3SJed Brown .  toranks - ranks to send to (array of length nto)
253f6ced4a3SJed Brown -  todata - data to send to each rank (packed)
254f6ced4a3SJed Brown 
255f6ced4a3SJed Brown    Output Arguments:
256f6ced4a3SJed Brown +  nfrom - number of ranks receiving messages from
257f6ced4a3SJed Brown .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
258f6ced4a3SJed Brown -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
259f6ced4a3SJed Brown 
260f6ced4a3SJed Brown    Level: developer
261f6ced4a3SJed Brown 
2621654bf6bSJed Brown    Options Database Keys:
2631654bf6bSJed Brown .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication
2641654bf6bSJed Brown 
265f6ced4a3SJed Brown    Notes:
266f6ced4a3SJed Brown    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
267f6ced4a3SJed Brown    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
268f6ced4a3SJed Brown 
269f6ced4a3SJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
270f6ced4a3SJed Brown 
271f6ced4a3SJed Brown    References:
272f6ced4a3SJed Brown    The MPI_Ibarrier implementation uses the algorithm in
273f6ced4a3SJed Brown    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
274f6ced4a3SJed Brown 
275f6ced4a3SJed Brown .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
276f6ced4a3SJed Brown @*/
277cf4b5b4fSJed Brown PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
278f6ced4a3SJed Brown {
279f6ced4a3SJed Brown   PetscErrorCode         ierr;
2806145cd65SJed Brown   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
281f6ced4a3SJed Brown 
282f6ced4a3SJed Brown   PetscFunctionBegin;
2833b3561c8SJed Brown   ierr = PetscSysInitializePackage();CHKERRQ(ierr);
2843b3561c8SJed Brown   ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
2856145cd65SJed Brown   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
286f6ced4a3SJed Brown   switch (buildtype) {
2876145cd65SJed Brown   case PETSC_BUILDTWOSIDED_IBARRIER:
288f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
2896145cd65SJed Brown     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
2906145cd65SJed Brown #else
2916145cd65SJed Brown     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
292f6ced4a3SJed Brown #endif
2936145cd65SJed Brown     break;
2946145cd65SJed Brown   case PETSC_BUILDTWOSIDED_ALLREDUCE:
2956145cd65SJed Brown     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
296f6ced4a3SJed Brown     break;
2971654bf6bSJed Brown   case PETSC_BUILDTWOSIDED_REDSCATTER:
2981654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
2991654bf6bSJed Brown     ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
3001654bf6bSJed Brown #else
3011654bf6bSJed Brown     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
3021654bf6bSJed Brown #endif
3031654bf6bSJed Brown     break;
304f6ced4a3SJed Brown   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
305f6ced4a3SJed Brown   }
3063b3561c8SJed Brown   ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
307f6ced4a3SJed Brown   PetscFunctionReturn(0);
308f6ced4a3SJed Brown }
309*d815da10SJed Brown 
310*d815da10SJed Brown #undef __FUNCT__
311*d815da10SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedF_Reference"
312*d815da10SJed Brown static PetscErrorCode PetscCommBuildTwoSidedF_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
313*d815da10SJed Brown                                                         PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
314*d815da10SJed Brown                                                         PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
315*d815da10SJed Brown {
316*d815da10SJed Brown   PetscErrorCode ierr;
317*d815da10SJed Brown   PetscMPIInt i,*tag;
318*d815da10SJed Brown   MPI_Aint    lb,unitbytes;
319*d815da10SJed Brown   MPI_Request *sendreq,*recvreq;
320*d815da10SJed Brown 
321*d815da10SJed Brown   PetscFunctionBegin;
322*d815da10SJed Brown   ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr);
323*d815da10SJed Brown   if (ntags > 0) {
324*d815da10SJed Brown     ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr);
325*d815da10SJed Brown   }
326*d815da10SJed Brown   for (i=1; i<ntags; i++) {
327*d815da10SJed Brown     ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr);
328*d815da10SJed Brown   }
329*d815da10SJed Brown 
330*d815da10SJed Brown   /* Perform complete initial rendezvous */
331*d815da10SJed Brown   ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
332*d815da10SJed Brown 
333*d815da10SJed Brown   ierr = PetscMalloc1((nto + *nfrom)*ntags,&sendreq);CHKERRQ(ierr);
334*d815da10SJed Brown   recvreq = sendreq + nto*ntags;
335*d815da10SJed Brown 
336*d815da10SJed Brown   ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr);
337*d815da10SJed Brown   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
338*d815da10SJed Brown   for (i=0; i<nto; i++) {
339*d815da10SJed Brown     ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr);
340*d815da10SJed Brown   }
341*d815da10SJed Brown   for (i=0; i<*nfrom; i++) {
342*d815da10SJed Brown     void *header = (*(char**)fromdata) + count*unitbytes*i;
343*d815da10SJed Brown     ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr);
344*d815da10SJed Brown   }
345*d815da10SJed Brown   ierr = MPI_Waitall((nto+*nfrom)*ntags,sendreq,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
346*d815da10SJed Brown   ierr = PetscFree(sendreq);CHKERRQ(ierr);
347*d815da10SJed Brown   ierr = PetscFree(tag);CHKERRQ(ierr);
348*d815da10SJed Brown   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
349*d815da10SJed Brown   PetscFunctionReturn(0);
350*d815da10SJed Brown }
351*d815da10SJed Brown 
352*d815da10SJed Brown #undef __FUNCT__
353*d815da10SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedF"
354*d815da10SJed Brown /*@C
355*d815da10SJed Brown    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
356*d815da10SJed Brown 
357*d815da10SJed Brown    Collective on MPI_Comm
358*d815da10SJed Brown 
359*d815da10SJed Brown    Input Arguments:
360*d815da10SJed Brown +  comm - communicator
361*d815da10SJed Brown .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
362*d815da10SJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
363*d815da10SJed Brown .  nto - number of ranks to send data to
364*d815da10SJed Brown .  toranks - ranks to send to (array of length nto)
365*d815da10SJed Brown .  todata - data to send to each rank (packed)
366*d815da10SJed Brown .  ntags - number of tags needed by send/recv callbacks
367*d815da10SJed Brown .  send - callback invoked on sending process when ready to send primary payload
368*d815da10SJed Brown .  recv - callback invoked on receiving process after delivery of rendezvous message
369*d815da10SJed Brown -  ctx - context for callbacks
370*d815da10SJed Brown 
371*d815da10SJed Brown    Output Arguments:
372*d815da10SJed Brown +  nfrom - number of ranks receiving messages from
373*d815da10SJed Brown .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
374*d815da10SJed Brown -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
375*d815da10SJed Brown 
376*d815da10SJed Brown    Level: developer
377*d815da10SJed Brown 
378*d815da10SJed Brown    Notes:
379*d815da10SJed Brown    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
380*d815da10SJed Brown    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
381*d815da10SJed Brown 
382*d815da10SJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
383*d815da10SJed Brown 
384*d815da10SJed Brown    References:
385*d815da10SJed Brown    The MPI_Ibarrier implementation uses the algorithm in
386*d815da10SJed Brown    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
387*d815da10SJed Brown 
388*d815da10SJed Brown .seealso: PetscCommBuildTwoSided(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
389*d815da10SJed Brown @*/
390*d815da10SJed Brown PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
391*d815da10SJed Brown                                        PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
392*d815da10SJed Brown                                        PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
393*d815da10SJed Brown {
394*d815da10SJed Brown   PetscErrorCode         ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,
395*d815da10SJed Brown                                    PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
396*d815da10SJed Brown                                    PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
397*d815da10SJed Brown   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
398*d815da10SJed Brown 
399*d815da10SJed Brown   PetscFunctionBegin;
400*d815da10SJed Brown   ierr = PetscSysInitializePackage();CHKERRQ(ierr);
401*d815da10SJed Brown   ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
402*d815da10SJed Brown   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
403*d815da10SJed Brown   switch (buildtype) {
404*d815da10SJed Brown   case PETSC_BUILDTWOSIDED_IBARRIER:
405*d815da10SJed Brown   case PETSC_BUILDTWOSIDED_ALLREDUCE:
406*d815da10SJed Brown   case PETSC_BUILDTWOSIDED_REDSCATTER:
407*d815da10SJed Brown     f = PetscCommBuildTwoSidedF_Reference;
408*d815da10SJed Brown     break;
409*d815da10SJed Brown   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
410*d815da10SJed Brown   }
411*d815da10SJed Brown   ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,send,recv,ctx);CHKERRQ(ierr);
412*d815da10SJed Brown   ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
413*d815da10SJed Brown   PetscFunctionReturn(0);
414*d815da10SJed Brown }
415