1e5c89e4eSSatish Balay 2c6db04a5SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 3e5c89e4eSSatish Balay 4e5c89e4eSSatish Balay /*@C 5e5c89e4eSSatish Balay PetscGatherNumberOfMessages - Computes the number of messages a node expects to receive 6e5c89e4eSSatish Balay 7d083f849SBarry Smith Collective 8e5c89e4eSSatish Balay 9e5c89e4eSSatish Balay Input Parameters: 10e5c89e4eSSatish Balay + comm - Communicator 11e5c89e4eSSatish Balay . iflags - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a 120298fd71SBarry Smith message from current node to ith node. Optionally NULL 13e5c89e4eSSatish Balay - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i]. 140298fd71SBarry Smith Optionally NULL. 15e5c89e4eSSatish Balay 16e5c89e4eSSatish Balay Output Parameters: 17e5c89e4eSSatish Balay . nrecvs - number of messages received 18e5c89e4eSSatish Balay 19e5c89e4eSSatish Balay Level: developer 20e5c89e4eSSatish Balay 21e5c89e4eSSatish Balay Notes: 22e5c89e4eSSatish Balay With this info, the correct message lengths can be determined using 23e5c89e4eSSatish Balay PetscGatherMessageLengths() 24e5c89e4eSSatish Balay 25e5c89e4eSSatish Balay Either iflags or ilengths should be provided. If iflags is not 260298fd71SBarry Smith provided (NULL) it can be computed from ilengths. If iflags is 27e5c89e4eSSatish Balay provided, ilengths is not required. 28e5c89e4eSSatish Balay 29e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths() 30e5c89e4eSSatish Balay @*/ 317087cfbeSBarry Smith PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs) 32e5c89e4eSSatish Balay { 330298fd71SBarry Smith PetscMPIInt size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL; 34e5c89e4eSSatish Balay PetscErrorCode ierr; 35e5c89e4eSSatish Balay 36e5c89e4eSSatish Balay PetscFunctionBegin; 37ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 38ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 39e5c89e4eSSatish Balay 40dcca6d9dSJed Brown ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr); 41e5c89e4eSSatish Balay 42e5c89e4eSSatish Balay /* If iflags not provided, compute iflags from ilengths */ 43e5c89e4eSSatish Balay if (!iflags) { 44e32f2f54SBarry Smith if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided"); 45e5c89e4eSSatish Balay iflags_local = iflags_localm; 46e5c89e4eSSatish Balay for (i=0; i<size; i++) { 47e5c89e4eSSatish Balay if (ilengths[i]) iflags_local[i] = 1; 48e5c89e4eSSatish Balay else iflags_local[i] = 0; 49e5c89e4eSSatish Balay } 50a297a907SKarl Rupp } else iflags_local = (PetscMPIInt*) iflags; 51e5c89e4eSSatish Balay 52e5c89e4eSSatish Balay /* Post an allreduce to determine the numer of messages the current node will receive */ 53820f2d46SBarry Smith ierr = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 54e5c89e4eSSatish Balay *nrecvs = recv_buf[rank]; 55e5c89e4eSSatish Balay 56e5c89e4eSSatish Balay ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr); 57e5c89e4eSSatish Balay PetscFunctionReturn(0); 58e5c89e4eSSatish Balay } 59e5c89e4eSSatish Balay 60e5c89e4eSSatish Balay /*@C 61e5c89e4eSSatish Balay PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive, 62e5c89e4eSSatish Balay including (from-id,length) pairs for each message. 63e5c89e4eSSatish Balay 64d083f849SBarry Smith Collective 65e5c89e4eSSatish Balay 66e5c89e4eSSatish Balay Input Parameters: 67e5c89e4eSSatish Balay + comm - Communicator 68e5c89e4eSSatish Balay . nsends - number of messages that are to be sent. 69e5c89e4eSSatish Balay . nrecvs - number of messages being received 70e5c89e4eSSatish Balay - ilengths - an array of integers of length sizeof(comm) 71e5c89e4eSSatish Balay a non zero ilengths[i] represent a message to i of length ilengths[i] 72e5c89e4eSSatish Balay 73e5c89e4eSSatish Balay Output Parameters: 74e5c89e4eSSatish Balay + onodes - list of node-ids from which messages are expected 75e5c89e4eSSatish Balay - olengths - corresponding message lengths 76e5c89e4eSSatish Balay 77e5c89e4eSSatish Balay Level: developer 78e5c89e4eSSatish Balay 79e5c89e4eSSatish Balay Notes: 80e5c89e4eSSatish Balay With this info, the correct MPI_Irecv() can be posted with the correct 81e5c89e4eSSatish Balay from-id, with a buffer with the right amount of memory required. 82e5c89e4eSSatish Balay 83e5c89e4eSSatish Balay The calling function deallocates the memory in onodes and olengths 84e5c89e4eSSatish Balay 85c2916339SPierre Jolivet To determine nrecvs, one can use PetscGatherNumberOfMessages() 86e5c89e4eSSatish Balay 87e5c89e4eSSatish Balay .seealso: PetscGatherNumberOfMessages() 88e5c89e4eSSatish Balay @*/ 897087cfbeSBarry Smith PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths) 90e5c89e4eSSatish Balay { 91e5c89e4eSSatish Balay PetscErrorCode ierr; 926bfd7d4fSJunchao Zhang PetscMPIInt size,rank,tag,i,j; 930298fd71SBarry Smith MPI_Request *s_waits = NULL,*r_waits = NULL; 940298fd71SBarry Smith MPI_Status *w_status = NULL; 95e5c89e4eSSatish Balay 96e5c89e4eSSatish Balay PetscFunctionBegin; 97ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 98ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 99e5c89e4eSSatish Balay ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 100e5c89e4eSSatish Balay 101e5c89e4eSSatish Balay /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 102dcca6d9dSJed Brown ierr = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr); 103e5c89e4eSSatish Balay s_waits = r_waits+nrecvs; 104e5c89e4eSSatish Balay 105e5c89e4eSSatish Balay /* Post the Irecv to get the message length-info */ 106785e854fSJed Brown ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr); 107e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) { 108ffc4695bSBarry Smith ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr); 109e5c89e4eSSatish Balay } 110e5c89e4eSSatish Balay 111e5c89e4eSSatish Balay /* Post the Isends with the message length-info */ 112e5c89e4eSSatish Balay for (i=0,j=0; i<size; ++i) { 113e5c89e4eSSatish Balay if (ilengths[i]) { 114ffc4695bSBarry Smith ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr); 115e5c89e4eSSatish Balay j++; 116e5c89e4eSSatish Balay } 117e5c89e4eSSatish Balay } 118e5c89e4eSSatish Balay 119e5c89e4eSSatish Balay /* Post waits on sends and receivs */ 120ffc4695bSBarry Smith if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);} 121e5c89e4eSSatish Balay 122e5c89e4eSSatish Balay /* Pack up the received data */ 123785e854fSJed Brown ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr); 1246bfd7d4fSJunchao Zhang for (i=0; i<nrecvs; ++i) { 1256bfd7d4fSJunchao Zhang (*onodes)[i] = w_status[i].MPI_SOURCE; 1266bfd7d4fSJunchao Zhang #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 1276bfd7d4fSJunchao Zhang /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 1286bfd7d4fSJunchao Zhang It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 1296bfd7d4fSJunchao Zhang does not put correct value in recv buffer. See also 1306bfd7d4fSJunchao Zhang https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 1316bfd7d4fSJunchao Zhang https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 1326bfd7d4fSJunchao Zhang */ 1336bfd7d4fSJunchao Zhang if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 1346bfd7d4fSJunchao Zhang #endif 1356bfd7d4fSJunchao Zhang } 136e5c89e4eSSatish Balay ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr); 137e5c89e4eSSatish Balay PetscFunctionReturn(0); 138e5c89e4eSSatish Balay } 139dd6ea824SBarry Smith 140e5c89e4eSSatish Balay /*@C 141e5c89e4eSSatish Balay PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, 142e5c89e4eSSatish Balay including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() 143e5c89e4eSSatish Balay except it takes TWO ilenths and output TWO olengths. 144e5c89e4eSSatish Balay 145d083f849SBarry Smith Collective 146e5c89e4eSSatish Balay 147e5c89e4eSSatish Balay Input Parameters: 148e5c89e4eSSatish Balay + comm - Communicator 149e5c89e4eSSatish Balay . nsends - number of messages that are to be sent. 150e5c89e4eSSatish Balay . nrecvs - number of messages being received 151e5c89e4eSSatish Balay - ilengths1, ilengths2 - array of integers of length sizeof(comm) 152e5c89e4eSSatish Balay a non zero ilengths[i] represent a message to i of length ilengths[i] 153e5c89e4eSSatish Balay 154e5c89e4eSSatish Balay Output Parameters: 155e5c89e4eSSatish Balay + onodes - list of node-ids from which messages are expected 156e5c89e4eSSatish Balay - olengths1, olengths2 - corresponding message lengths 157e5c89e4eSSatish Balay 158e5c89e4eSSatish Balay Level: developer 159e5c89e4eSSatish Balay 160e5c89e4eSSatish Balay Notes: 161e5c89e4eSSatish Balay With this info, the correct MPI_Irecv() can be posted with the correct 162e5c89e4eSSatish Balay from-id, with a buffer with the right amount of memory required. 163e5c89e4eSSatish Balay 164e5c89e4eSSatish Balay The calling function deallocates the memory in onodes and olengths 165e5c89e4eSSatish Balay 166c2916339SPierre Jolivet To determine nrecvs, one can use PetscGatherNumberOfMessages() 167e5c89e4eSSatish Balay 168e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages() 169e5c89e4eSSatish Balay @*/ 1707087cfbeSBarry Smith PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) 171e5c89e4eSSatish Balay { 172e5c89e4eSSatish Balay PetscErrorCode ierr; 1730298fd71SBarry Smith PetscMPIInt size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL; 1740298fd71SBarry Smith MPI_Request *s_waits = NULL,*r_waits = NULL; 1750298fd71SBarry Smith MPI_Status *w_status = NULL; 176e5c89e4eSSatish Balay 177e5c89e4eSSatish Balay PetscFunctionBegin; 178ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 179e5c89e4eSSatish Balay ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 180e5c89e4eSSatish Balay 1813bf92927SBarry Smith /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 182dcca6d9dSJed Brown ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr); 183e5c89e4eSSatish Balay s_waits = r_waits + nrecvs; 184e5c89e4eSSatish Balay 185e5c89e4eSSatish Balay /* Post the Irecv to get the message length-info */ 186854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr); 187854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr); 188e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) { 189e5c89e4eSSatish Balay buf_j = buf_r + (2*i); 19055b25c41SPierre Jolivet ierr = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr); 191e5c89e4eSSatish Balay } 192e5c89e4eSSatish Balay 193e5c89e4eSSatish Balay /* Post the Isends with the message length-info */ 194e5c89e4eSSatish Balay for (i=0,j=0; i<size; ++i) { 195e5c89e4eSSatish Balay if (ilengths1[i]) { 196e5c89e4eSSatish Balay buf_j = buf_s + (2*j); 197e5c89e4eSSatish Balay buf_j[0] = *(ilengths1+i); 198e5c89e4eSSatish Balay buf_j[1] = *(ilengths2+i); 199ffc4695bSBarry Smith ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr); 200e5c89e4eSSatish Balay j++; 201e5c89e4eSSatish Balay } 202e5c89e4eSSatish Balay } 203f327f304SBarry Smith if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends); 204e5c89e4eSSatish Balay 205e5c89e4eSSatish Balay /* Post waits on sends and receivs */ 206ffc4695bSBarry Smith if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);} 207e5c89e4eSSatish Balay 208e5c89e4eSSatish Balay /* Pack up the received data */ 209854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr); 210e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 211e5c89e4eSSatish Balay (*onodes)[i] = w_status[i].MPI_SOURCE; 212e5c89e4eSSatish Balay buf_j = buf_r + (2*i); 213e5c89e4eSSatish Balay (*olengths1)[i] = buf_j[0]; 214e5c89e4eSSatish Balay (*olengths2)[i] = buf_j[1]; 215e5c89e4eSSatish Balay } 216e5c89e4eSSatish Balay 217e5c89e4eSSatish Balay ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr); 218e5c89e4eSSatish Balay PetscFunctionReturn(0); 219e5c89e4eSSatish Balay } 220e5c89e4eSSatish Balay 221e5c89e4eSSatish Balay /* 222e5c89e4eSSatish Balay 223*a5b23f4aSJose E. Roman Allocate a buffer sufficient to hold messages of size specified in olengths. 224e5c89e4eSSatish Balay And post Irecvs on these buffers using node info from onodes 225e5c89e4eSSatish Balay 226e5c89e4eSSatish Balay */ 2277087cfbeSBarry Smith PetscErrorCode PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits) 228e5c89e4eSSatish Balay { 229e5c89e4eSSatish Balay PetscErrorCode ierr; 230c05d87d6SBarry Smith PetscInt **rbuf_t,i,len = 0; 231e5c89e4eSSatish Balay MPI_Request *r_waits_t; 232e5c89e4eSSatish Balay 233e5c89e4eSSatish Balay PetscFunctionBegin; 234e5c89e4eSSatish Balay /* compute memory required for recv buffers */ 235e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 236e5c89e4eSSatish Balay 237e5c89e4eSSatish Balay /* allocate memory for recv buffers */ 238854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr); 239785e854fSJed Brown ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr); 240e5c89e4eSSatish Balay for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 241e5c89e4eSSatish Balay 242e5c89e4eSSatish Balay /* Post the receives */ 243785e854fSJed Brown ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr); 244e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 245ffc4695bSBarry Smith ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr); 246e5c89e4eSSatish Balay } 247e5c89e4eSSatish Balay 248e5c89e4eSSatish Balay *rbuf = rbuf_t; 249e5c89e4eSSatish Balay *r_waits = r_waits_t; 250e5c89e4eSSatish Balay PetscFunctionReturn(0); 251e5c89e4eSSatish Balay } 252e5c89e4eSSatish Balay 2537087cfbeSBarry Smith PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits) 254e5c89e4eSSatish Balay { 255e5c89e4eSSatish Balay PetscErrorCode ierr; 256052f0c41SBarry Smith PetscMPIInt i; 257e5c89e4eSSatish Balay PetscScalar **rbuf_t; 258e5c89e4eSSatish Balay MPI_Request *r_waits_t; 259c05d87d6SBarry Smith PetscInt len = 0; 260e5c89e4eSSatish Balay 261fe28d99cSBarry Smith PetscFunctionBegin; 262e5c89e4eSSatish Balay /* compute memory required for recv buffers */ 263e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 264e5c89e4eSSatish Balay 265e5c89e4eSSatish Balay /* allocate memory for recv buffers */ 266854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr); 267785e854fSJed Brown ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr); 268e5c89e4eSSatish Balay for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 269e5c89e4eSSatish Balay 270e5c89e4eSSatish Balay /* Post the receives */ 271785e854fSJed Brown ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr); 272e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 273ffc4695bSBarry Smith ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr); 274e5c89e4eSSatish Balay } 275e5c89e4eSSatish Balay 276e5c89e4eSSatish Balay *rbuf = rbuf_t; 277e5c89e4eSSatish Balay *r_waits = r_waits_t; 278e5c89e4eSSatish Balay PetscFunctionReturn(0); 279e5c89e4eSSatish Balay } 280