1f6ced4a3SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 2f6ced4a3SJed Brown 3*3b3561c8SJed Brown PetscLogEvent PETSC_BuildTwoSided; 4*3b3561c8SJed Brown 56145cd65SJed Brown const char *const PetscBuildTwoSidedTypes[] = { 6f6ced4a3SJed Brown "ALLREDUCE", 76145cd65SJed Brown "IBARRIER", 86145cd65SJed Brown "PetscBuildTwoSidedType", 96145cd65SJed Brown "PETSC_BUILDTWOSIDED_", 10f6ced4a3SJed Brown 0 11f6ced4a3SJed Brown }; 12f6ced4a3SJed Brown 136145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 14f6ced4a3SJed Brown 15f6ced4a3SJed Brown #undef __FUNCT__ 166145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 176145cd65SJed Brown /*@ 186145cd65SJed Brown PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 196145cd65SJed Brown 206145cd65SJed Brown Logically Collective 216145cd65SJed Brown 226145cd65SJed Brown Input Arguments: 236145cd65SJed Brown + comm - PETSC_COMM_WORLD 246145cd65SJed Brown - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 256145cd65SJed Brown 266145cd65SJed Brown Level: developer 276145cd65SJed Brown 286145cd65SJed Brown Note: 296145cd65SJed Brown This option is currently global, but could be made per-communicator. 306145cd65SJed Brown 316145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 326145cd65SJed Brown @*/ 336145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 346145cd65SJed Brown { 356145cd65SJed Brown PetscFunctionBegin; 366145cd65SJed Brown #if defined(PETSC_USE_DEBUG) 376145cd65SJed Brown { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 386145cd65SJed Brown PetscMPIInt ierr; 396145cd65SJed Brown PetscMPIInt b1[2],b2[2]; 406145cd65SJed Brown b1[0] = -(PetscMPIInt)twosided; 416145cd65SJed Brown b1[1] = (PetscMPIInt)twosided; 426145cd65SJed Brown ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 436145cd65SJed Brown if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 446145cd65SJed Brown } 456145cd65SJed Brown #endif 466145cd65SJed Brown _twosided_type = twosided; 476145cd65SJed Brown PetscFunctionReturn(0); 486145cd65SJed Brown } 496145cd65SJed Brown 506145cd65SJed Brown #undef __FUNCT__ 516145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 526145cd65SJed Brown /*@ 536145cd65SJed Brown PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 546145cd65SJed Brown 556145cd65SJed Brown Logically Collective 566145cd65SJed Brown 576145cd65SJed Brown Output Arguments: 586145cd65SJed Brown + comm - communicator on which to query algorithm 596145cd65SJed Brown - twosided - algorithm to use for PetscCommBuildTwoSided() 606145cd65SJed Brown 616145cd65SJed Brown Level: developer 626145cd65SJed Brown 636145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 646145cd65SJed Brown @*/ 656145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 66f6ced4a3SJed Brown { 67f6ced4a3SJed Brown PetscErrorCode ierr; 68f6ced4a3SJed Brown 69f6ced4a3SJed Brown PetscFunctionBegin; 706145cd65SJed Brown *twosided = PETSC_BUILDTWOSIDED_NOTSET; 716145cd65SJed Brown if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 72f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER) 736145cd65SJed Brown # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 746145cd65SJed Brown /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 756145cd65SJed Brown _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 76f6ced4a3SJed Brown # else 776145cd65SJed Brown _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 78f6ced4a3SJed Brown # endif 796145cd65SJed Brown #else 806145cd65SJed Brown _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 816145cd65SJed Brown #endif 820298fd71SBarry Smith ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 83f6ced4a3SJed Brown } 84f6ced4a3SJed Brown *twosided = _twosided_type; 85f6ced4a3SJed Brown PetscFunctionReturn(0); 86f6ced4a3SJed Brown } 87f6ced4a3SJed Brown 88f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER) 89f6ced4a3SJed Brown 90f6ced4a3SJed Brown #undef __FUNCT__ 916145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 926145cd65SJed Brown static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 93f6ced4a3SJed Brown { 94f6ced4a3SJed Brown PetscErrorCode ierr; 95f6ced4a3SJed Brown PetscMPIInt nrecvs,tag,unitbytes,done; 96f6ced4a3SJed Brown PetscInt i; 97f6ced4a3SJed Brown char *tdata; 98f6ced4a3SJed Brown MPI_Request *sendreqs,barrier; 990f453b92SJed Brown PetscSegBuffer segrank,segdata; 100f6ced4a3SJed Brown 101f6ced4a3SJed Brown PetscFunctionBegin; 102f6ced4a3SJed Brown ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 103f6ced4a3SJed Brown ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 104f6ced4a3SJed Brown tdata = (char*)todata; 105785e854fSJed Brown ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 106f6ced4a3SJed Brown for (i=0; i<nto; i++) { 107f6ced4a3SJed Brown ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 108f6ced4a3SJed Brown } 1090f453b92SJed Brown ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 1100f453b92SJed Brown ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 111f6ced4a3SJed Brown 112f6ced4a3SJed Brown nrecvs = 0; 113f6ced4a3SJed Brown barrier = MPI_REQUEST_NULL; 114f6ced4a3SJed Brown for (done=0; !done; ) { 115f6ced4a3SJed Brown PetscMPIInt flag; 116f6ced4a3SJed Brown MPI_Status status; 117f6ced4a3SJed Brown ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 118f6ced4a3SJed Brown if (flag) { /* incoming message */ 119f6ced4a3SJed Brown PetscMPIInt *recvrank; 120f6ced4a3SJed Brown void *buf; 121137cf7b6SJed Brown ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 122137cf7b6SJed Brown ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 123f6ced4a3SJed Brown *recvrank = status.MPI_SOURCE; 124f6ced4a3SJed Brown ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 125f6ced4a3SJed Brown nrecvs++; 126f6ced4a3SJed Brown } 127f6ced4a3SJed Brown if (barrier == MPI_REQUEST_NULL) { 1284dc2109aSBarry Smith PetscMPIInt sent,nsends; 1294dc2109aSBarry Smith ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 130f6ced4a3SJed Brown ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 131f6ced4a3SJed Brown if (sent) { 132f6ced4a3SJed Brown ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 133f6ced4a3SJed Brown ierr = PetscFree(sendreqs);CHKERRQ(ierr); 134f6ced4a3SJed Brown } 135f6ced4a3SJed Brown } else { 136f6ced4a3SJed Brown ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 137f6ced4a3SJed Brown } 138f6ced4a3SJed Brown } 139f6ced4a3SJed Brown *nfrom = nrecvs; 140137cf7b6SJed Brown ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 1410f453b92SJed Brown ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 142137cf7b6SJed Brown ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 1430f453b92SJed Brown ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 144f6ced4a3SJed Brown PetscFunctionReturn(0); 145f6ced4a3SJed Brown } 146f6ced4a3SJed Brown #endif 147f6ced4a3SJed Brown 148f6ced4a3SJed Brown #undef __FUNCT__ 1496145cd65SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 1506145cd65SJed Brown static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 151f6ced4a3SJed Brown { 152f6ced4a3SJed Brown PetscErrorCode ierr; 153f6ced4a3SJed Brown PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 154f6ced4a3SJed Brown PetscInt i; 155f6ced4a3SJed Brown char *tdata,*fdata; 156f6ced4a3SJed Brown MPI_Request *reqs,*sendreqs; 157f6ced4a3SJed Brown MPI_Status *statuses; 158f6ced4a3SJed Brown 159f6ced4a3SJed Brown PetscFunctionBegin; 160f6ced4a3SJed Brown ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1611795a4d1SJed Brown ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 162f6ced4a3SJed Brown for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 1630298fd71SBarry Smith ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 164f6ced4a3SJed Brown ierr = PetscFree(iflags);CHKERRQ(ierr); 165f6ced4a3SJed Brown 166f6ced4a3SJed Brown ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 167f6ced4a3SJed Brown ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 168f6ced4a3SJed Brown ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 169f6ced4a3SJed Brown tdata = (char*)todata; 170dcca6d9dSJed Brown ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 17106926cafSJed Brown sendreqs = reqs + nrecvs; 172f6ced4a3SJed Brown for (i=0; i<nrecvs; i++) { 173f6ced4a3SJed Brown ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 174f6ced4a3SJed Brown } 175f6ced4a3SJed Brown for (i=0; i<nto; i++) { 176f6ced4a3SJed Brown ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 177f6ced4a3SJed Brown } 178f6ced4a3SJed Brown ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 179785e854fSJed Brown ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 180a297a907SKarl Rupp for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 18106926cafSJed Brown ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 182f6ced4a3SJed Brown 183f6ced4a3SJed Brown *nfrom = nrecvs; 184f6ced4a3SJed Brown *fromranks = franks; 185f6ced4a3SJed Brown *(void**)fromdata = fdata; 186f6ced4a3SJed Brown PetscFunctionReturn(0); 187f6ced4a3SJed Brown } 188f6ced4a3SJed Brown 189f6ced4a3SJed Brown #undef __FUNCT__ 190f6ced4a3SJed Brown #define __FUNCT__ "PetscCommBuildTwoSided" 191f6ced4a3SJed Brown /*@C 192f6ced4a3SJed Brown PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 193f6ced4a3SJed Brown 194f6ced4a3SJed Brown Collective on MPI_Comm 195f6ced4a3SJed Brown 196f6ced4a3SJed Brown Input Arguments: 197f6ced4a3SJed Brown + comm - communicator 198f6ced4a3SJed Brown . count - number of entries to send/receive (must match on all ranks) 199f6ced4a3SJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 200f6ced4a3SJed Brown . nto - number of ranks to send data to 201f6ced4a3SJed Brown . toranks - ranks to send to (array of length nto) 202f6ced4a3SJed Brown - todata - data to send to each rank (packed) 203f6ced4a3SJed Brown 204f6ced4a3SJed Brown Output Arguments: 205f6ced4a3SJed Brown + nfrom - number of ranks receiving messages from 206f6ced4a3SJed Brown . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 207f6ced4a3SJed Brown - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 208f6ced4a3SJed Brown 209f6ced4a3SJed Brown Level: developer 210f6ced4a3SJed Brown 211f6ced4a3SJed Brown Notes: 212f6ced4a3SJed Brown This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 213f6ced4a3SJed Brown PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 214f6ced4a3SJed Brown 215f6ced4a3SJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 216f6ced4a3SJed Brown 217f6ced4a3SJed Brown References: 218f6ced4a3SJed Brown The MPI_Ibarrier implementation uses the algorithm in 219f6ced4a3SJed Brown Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 220f6ced4a3SJed Brown 221f6ced4a3SJed Brown .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 222f6ced4a3SJed Brown @*/ 223f6ced4a3SJed Brown PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 224f6ced4a3SJed Brown { 225f6ced4a3SJed Brown PetscErrorCode ierr; 2266145cd65SJed Brown PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 227f6ced4a3SJed Brown 228f6ced4a3SJed Brown PetscFunctionBegin; 229*3b3561c8SJed Brown ierr = PetscSysInitializePackage();CHKERRQ(ierr); 230*3b3561c8SJed Brown ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 2316145cd65SJed Brown ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 232f6ced4a3SJed Brown switch (buildtype) { 2336145cd65SJed Brown case PETSC_BUILDTWOSIDED_IBARRIER: 234f6ced4a3SJed Brown #if defined(PETSC_HAVE_MPI_IBARRIER) 2356145cd65SJed Brown ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 2366145cd65SJed Brown #else 2376145cd65SJed Brown SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 238f6ced4a3SJed Brown #endif 2396145cd65SJed Brown break; 2406145cd65SJed Brown case PETSC_BUILDTWOSIDED_ALLREDUCE: 2416145cd65SJed Brown ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 242f6ced4a3SJed Brown break; 243f6ced4a3SJed Brown default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 244f6ced4a3SJed Brown } 245*3b3561c8SJed Brown ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 246f6ced4a3SJed Brown PetscFunctionReturn(0); 247f6ced4a3SJed Brown } 248