xref: /petsc/src/sys/utils/mpits.c (revision 3b3561c8b1660ea5cb073480ab5b031363170b10)
1f6ced4a3SJed Brown #include <petscsys.h>        /*I  "petscsys.h"  I*/
2f6ced4a3SJed Brown 
3*3b3561c8SJed Brown PetscLogEvent PETSC_BuildTwoSided;
4*3b3561c8SJed Brown 
56145cd65SJed Brown const char *const PetscBuildTwoSidedTypes[] = {
6f6ced4a3SJed Brown   "ALLREDUCE",
76145cd65SJed Brown   "IBARRIER",
86145cd65SJed Brown   "PetscBuildTwoSidedType",
96145cd65SJed Brown   "PETSC_BUILDTWOSIDED_",
10f6ced4a3SJed Brown   0
11f6ced4a3SJed Brown };
12f6ced4a3SJed Brown 
136145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
14f6ced4a3SJed Brown 
15f6ced4a3SJed Brown #undef __FUNCT__
166145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
176145cd65SJed Brown /*@
186145cd65SJed Brown    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
196145cd65SJed Brown 
206145cd65SJed Brown    Logically Collective
216145cd65SJed Brown 
226145cd65SJed Brown    Input Arguments:
236145cd65SJed Brown +  comm - PETSC_COMM_WORLD
246145cd65SJed Brown -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
256145cd65SJed Brown 
266145cd65SJed Brown    Level: developer
276145cd65SJed Brown 
286145cd65SJed Brown    Note:
296145cd65SJed Brown    This option is currently global, but could be made per-communicator.
306145cd65SJed Brown 
316145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
326145cd65SJed Brown @*/
336145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
346145cd65SJed Brown {
356145cd65SJed Brown   PetscFunctionBegin;
366145cd65SJed Brown #if defined(PETSC_USE_DEBUG)
376145cd65SJed Brown   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
386145cd65SJed Brown     PetscMPIInt ierr;
396145cd65SJed Brown     PetscMPIInt b1[2],b2[2];
406145cd65SJed Brown     b1[0] = -(PetscMPIInt)twosided;
416145cd65SJed Brown     b1[1] = (PetscMPIInt)twosided;
426145cd65SJed Brown     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
436145cd65SJed Brown     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
446145cd65SJed Brown   }
456145cd65SJed Brown #endif
466145cd65SJed Brown   _twosided_type = twosided;
476145cd65SJed Brown   PetscFunctionReturn(0);
486145cd65SJed Brown }
496145cd65SJed Brown 
506145cd65SJed Brown #undef __FUNCT__
516145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
526145cd65SJed Brown /*@
536145cd65SJed Brown    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
546145cd65SJed Brown 
556145cd65SJed Brown    Logically Collective
566145cd65SJed Brown 
576145cd65SJed Brown    Output Arguments:
586145cd65SJed Brown +  comm - communicator on which to query algorithm
596145cd65SJed Brown -  twosided - algorithm to use for PetscCommBuildTwoSided()
606145cd65SJed Brown 
616145cd65SJed Brown    Level: developer
626145cd65SJed Brown 
636145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
646145cd65SJed Brown @*/
656145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
66f6ced4a3SJed Brown {
67f6ced4a3SJed Brown   PetscErrorCode ierr;
68f6ced4a3SJed Brown 
69f6ced4a3SJed Brown   PetscFunctionBegin;
706145cd65SJed Brown   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
716145cd65SJed Brown   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
72f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
736145cd65SJed Brown #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
746145cd65SJed Brown     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
756145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
76f6ced4a3SJed Brown #  else
776145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
78f6ced4a3SJed Brown #  endif
796145cd65SJed Brown #else
806145cd65SJed Brown     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
816145cd65SJed Brown #endif
820298fd71SBarry Smith     ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr);
83f6ced4a3SJed Brown   }
84f6ced4a3SJed Brown   *twosided = _twosided_type;
85f6ced4a3SJed Brown   PetscFunctionReturn(0);
86f6ced4a3SJed Brown }
87f6ced4a3SJed Brown 
88f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
89f6ced4a3SJed Brown 
90f6ced4a3SJed Brown #undef __FUNCT__
916145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
926145cd65SJed Brown static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
93f6ced4a3SJed Brown {
94f6ced4a3SJed Brown   PetscErrorCode ierr;
95f6ced4a3SJed Brown   PetscMPIInt    nrecvs,tag,unitbytes,done;
96f6ced4a3SJed Brown   PetscInt       i;
97f6ced4a3SJed Brown   char           *tdata;
98f6ced4a3SJed Brown   MPI_Request    *sendreqs,barrier;
990f453b92SJed Brown   PetscSegBuffer segrank,segdata;
100f6ced4a3SJed Brown 
101f6ced4a3SJed Brown   PetscFunctionBegin;
102f6ced4a3SJed Brown   ierr  = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
103f6ced4a3SJed Brown   ierr  = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
104f6ced4a3SJed Brown   tdata = (char*)todata;
105785e854fSJed Brown   ierr  = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr);
106f6ced4a3SJed Brown   for (i=0; i<nto; i++) {
107f6ced4a3SJed Brown     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
108f6ced4a3SJed Brown   }
1090f453b92SJed Brown   ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
1100f453b92SJed Brown   ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
111f6ced4a3SJed Brown 
112f6ced4a3SJed Brown   nrecvs  = 0;
113f6ced4a3SJed Brown   barrier = MPI_REQUEST_NULL;
114f6ced4a3SJed Brown   for (done=0; !done; ) {
115f6ced4a3SJed Brown     PetscMPIInt flag;
116f6ced4a3SJed Brown     MPI_Status  status;
117f6ced4a3SJed Brown     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
118f6ced4a3SJed Brown     if (flag) {                 /* incoming message */
119f6ced4a3SJed Brown       PetscMPIInt *recvrank;
120f6ced4a3SJed Brown       void        *buf;
121137cf7b6SJed Brown       ierr      = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr);
122137cf7b6SJed Brown       ierr      = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr);
123f6ced4a3SJed Brown       *recvrank = status.MPI_SOURCE;
124f6ced4a3SJed Brown       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
125f6ced4a3SJed Brown       nrecvs++;
126f6ced4a3SJed Brown     }
127f6ced4a3SJed Brown     if (barrier == MPI_REQUEST_NULL) {
1284dc2109aSBarry Smith       PetscMPIInt sent,nsends;
1294dc2109aSBarry Smith       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
130f6ced4a3SJed Brown       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
131f6ced4a3SJed Brown       if (sent) {
132f6ced4a3SJed Brown         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
133f6ced4a3SJed Brown         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
134f6ced4a3SJed Brown       }
135f6ced4a3SJed Brown     } else {
136f6ced4a3SJed Brown       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
137f6ced4a3SJed Brown     }
138f6ced4a3SJed Brown   }
139f6ced4a3SJed Brown   *nfrom = nrecvs;
140137cf7b6SJed Brown   ierr   = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr);
1410f453b92SJed Brown   ierr   = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr);
142137cf7b6SJed Brown   ierr   = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr);
1430f453b92SJed Brown   ierr   = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr);
144f6ced4a3SJed Brown   PetscFunctionReturn(0);
145f6ced4a3SJed Brown }
146f6ced4a3SJed Brown #endif
147f6ced4a3SJed Brown 
148f6ced4a3SJed Brown #undef __FUNCT__
1496145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
1506145cd65SJed Brown static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
151f6ced4a3SJed Brown {
152f6ced4a3SJed Brown   PetscErrorCode ierr;
153f6ced4a3SJed Brown   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
154f6ced4a3SJed Brown   PetscInt       i;
155f6ced4a3SJed Brown   char           *tdata,*fdata;
156f6ced4a3SJed Brown   MPI_Request    *reqs,*sendreqs;
157f6ced4a3SJed Brown   MPI_Status     *statuses;
158f6ced4a3SJed Brown 
159f6ced4a3SJed Brown   PetscFunctionBegin;
160f6ced4a3SJed Brown   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1611795a4d1SJed Brown   ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr);
162f6ced4a3SJed Brown   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
1630298fd71SBarry Smith   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr);
164f6ced4a3SJed Brown   ierr = PetscFree(iflags);CHKERRQ(ierr);
165f6ced4a3SJed Brown 
166f6ced4a3SJed Brown   ierr     = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
167f6ced4a3SJed Brown   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
168f6ced4a3SJed Brown   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
169f6ced4a3SJed Brown   tdata    = (char*)todata;
170dcca6d9dSJed Brown   ierr     = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr);
17106926cafSJed Brown   sendreqs = reqs + nrecvs;
172f6ced4a3SJed Brown   for (i=0; i<nrecvs; i++) {
173f6ced4a3SJed Brown     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
174f6ced4a3SJed Brown   }
175f6ced4a3SJed Brown   for (i=0; i<nto; i++) {
176f6ced4a3SJed Brown     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
177f6ced4a3SJed Brown   }
178f6ced4a3SJed Brown   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
179785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr);
180a297a907SKarl Rupp   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
18106926cafSJed Brown   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
182f6ced4a3SJed Brown 
183f6ced4a3SJed Brown   *nfrom            = nrecvs;
184f6ced4a3SJed Brown   *fromranks        = franks;
185f6ced4a3SJed Brown   *(void**)fromdata = fdata;
186f6ced4a3SJed Brown   PetscFunctionReturn(0);
187f6ced4a3SJed Brown }
188f6ced4a3SJed Brown 
189f6ced4a3SJed Brown #undef __FUNCT__
190f6ced4a3SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided"
191f6ced4a3SJed Brown /*@C
192f6ced4a3SJed Brown    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
193f6ced4a3SJed Brown 
194f6ced4a3SJed Brown    Collective on MPI_Comm
195f6ced4a3SJed Brown 
196f6ced4a3SJed Brown    Input Arguments:
197f6ced4a3SJed Brown +  comm - communicator
198f6ced4a3SJed Brown .  count - number of entries to send/receive (must match on all ranks)
199f6ced4a3SJed Brown .  dtype - datatype to send/receive from each rank (must match on all ranks)
200f6ced4a3SJed Brown .  nto - number of ranks to send data to
201f6ced4a3SJed Brown .  toranks - ranks to send to (array of length nto)
202f6ced4a3SJed Brown -  todata - data to send to each rank (packed)
203f6ced4a3SJed Brown 
204f6ced4a3SJed Brown    Output Arguments:
205f6ced4a3SJed Brown +  nfrom - number of ranks receiving messages from
206f6ced4a3SJed Brown .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
207f6ced4a3SJed Brown -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
208f6ced4a3SJed Brown 
209f6ced4a3SJed Brown    Level: developer
210f6ced4a3SJed Brown 
211f6ced4a3SJed Brown    Notes:
212f6ced4a3SJed Brown    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
213f6ced4a3SJed Brown    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
214f6ced4a3SJed Brown 
215f6ced4a3SJed Brown    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
216f6ced4a3SJed Brown 
217f6ced4a3SJed Brown    References:
218f6ced4a3SJed Brown    The MPI_Ibarrier implementation uses the algorithm in
219f6ced4a3SJed Brown    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
220f6ced4a3SJed Brown 
221f6ced4a3SJed Brown .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
222f6ced4a3SJed Brown @*/
223f6ced4a3SJed Brown PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
224f6ced4a3SJed Brown {
225f6ced4a3SJed Brown   PetscErrorCode         ierr;
2266145cd65SJed Brown   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
227f6ced4a3SJed Brown 
228f6ced4a3SJed Brown   PetscFunctionBegin;
229*3b3561c8SJed Brown   ierr = PetscSysInitializePackage();CHKERRQ(ierr);
230*3b3561c8SJed Brown   ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
2316145cd65SJed Brown   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
232f6ced4a3SJed Brown   switch (buildtype) {
2336145cd65SJed Brown   case PETSC_BUILDTWOSIDED_IBARRIER:
234f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER)
2356145cd65SJed Brown     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
2366145cd65SJed Brown #else
2376145cd65SJed Brown     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
238f6ced4a3SJed Brown #endif
2396145cd65SJed Brown     break;
2406145cd65SJed Brown   case PETSC_BUILDTWOSIDED_ALLREDUCE:
2416145cd65SJed Brown     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
242f6ced4a3SJed Brown     break;
243f6ced4a3SJed Brown   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
244f6ced4a3SJed Brown   }
245*3b3561c8SJed Brown   ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
246f6ced4a3SJed Brown   PetscFunctionReturn(0);
247f6ced4a3SJed Brown }
248