xref: /petsc/src/sys/utils/mpimesg.c (revision 6b867d5ac32ed0c728f185df9d084acdf26f70bf)
1e5c89e4eSSatish Balay 
2c6db04a5SJed Brown #include <petscsys.h>        /*I  "petscsys.h"  I*/
3e5c89e4eSSatish Balay 
4e5c89e4eSSatish Balay /*@C
5e5c89e4eSSatish Balay   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
6e5c89e4eSSatish Balay 
7d083f849SBarry Smith   Collective
8e5c89e4eSSatish Balay 
9e5c89e4eSSatish Balay   Input Parameters:
10e5c89e4eSSatish Balay + comm     - Communicator
11e5c89e4eSSatish Balay . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
120298fd71SBarry Smith              message from current node to ith node. Optionally NULL
13e5c89e4eSSatish Balay - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
140298fd71SBarry Smith              Optionally NULL.
15e5c89e4eSSatish Balay 
16e5c89e4eSSatish Balay   Output Parameters:
17e5c89e4eSSatish Balay . nrecvs    - number of messages received
18e5c89e4eSSatish Balay 
19e5c89e4eSSatish Balay   Level: developer
20e5c89e4eSSatish Balay 
21e5c89e4eSSatish Balay   Notes:
22e5c89e4eSSatish Balay   With this info, the correct message lengths can be determined using
23e5c89e4eSSatish Balay   PetscGatherMessageLengths()
24e5c89e4eSSatish Balay 
25e5c89e4eSSatish Balay   Either iflags or ilengths should be provided.  If iflags is not
260298fd71SBarry Smith   provided (NULL) it can be computed from ilengths. If iflags is
27e5c89e4eSSatish Balay   provided, ilengths is not required.
28e5c89e4eSSatish Balay 
29e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths()
30e5c89e4eSSatish Balay @*/
317087cfbeSBarry Smith PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
32e5c89e4eSSatish Balay {
330298fd71SBarry Smith   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
34e5c89e4eSSatish Balay   PetscErrorCode ierr;
35e5c89e4eSSatish Balay 
36e5c89e4eSSatish Balay   PetscFunctionBegin;
37ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
38ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
39e5c89e4eSSatish Balay 
40dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
41e5c89e4eSSatish Balay 
42e5c89e4eSSatish Balay   /* If iflags not provided, compute iflags from ilengths */
43e5c89e4eSSatish Balay   if (!iflags) {
44e32f2f54SBarry Smith     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
45e5c89e4eSSatish Balay     iflags_local = iflags_localm;
46e5c89e4eSSatish Balay     for (i=0; i<size; i++) {
47e5c89e4eSSatish Balay       if (ilengths[i]) iflags_local[i] = 1;
48e5c89e4eSSatish Balay       else iflags_local[i] = 0;
49e5c89e4eSSatish Balay     }
50a297a907SKarl Rupp   } else iflags_local = (PetscMPIInt*) iflags;
51e5c89e4eSSatish Balay 
52e5c89e4eSSatish Balay   /* Post an allreduce to determine the numer of messages the current node will receive */
53820f2d46SBarry Smith   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr);
54e5c89e4eSSatish Balay   *nrecvs = recv_buf[rank];
55e5c89e4eSSatish Balay 
56e5c89e4eSSatish Balay   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
57e5c89e4eSSatish Balay   PetscFunctionReturn(0);
58e5c89e4eSSatish Balay }
59e5c89e4eSSatish Balay 
60e5c89e4eSSatish Balay /*@C
61e5c89e4eSSatish Balay   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
62e5c89e4eSSatish Balay   including (from-id,length) pairs for each message.
63e5c89e4eSSatish Balay 
64d083f849SBarry Smith   Collective
65e5c89e4eSSatish Balay 
66e5c89e4eSSatish Balay   Input Parameters:
67e5c89e4eSSatish Balay + comm      - Communicator
68e5c89e4eSSatish Balay . nsends    - number of messages that are to be sent.
69e5c89e4eSSatish Balay . nrecvs    - number of messages being received
70e5c89e4eSSatish Balay - ilengths  - an array of integers of length sizeof(comm)
71e5c89e4eSSatish Balay               a non zero ilengths[i] represent a message to i of length ilengths[i]
72e5c89e4eSSatish Balay 
73e5c89e4eSSatish Balay   Output Parameters:
74e5c89e4eSSatish Balay + onodes    - list of node-ids from which messages are expected
75e5c89e4eSSatish Balay - olengths  - corresponding message lengths
76e5c89e4eSSatish Balay 
77e5c89e4eSSatish Balay   Level: developer
78e5c89e4eSSatish Balay 
79e5c89e4eSSatish Balay   Notes:
80e5c89e4eSSatish Balay   With this info, the correct MPI_Irecv() can be posted with the correct
81e5c89e4eSSatish Balay   from-id, with a buffer with the right amount of memory required.
82e5c89e4eSSatish Balay 
83e5c89e4eSSatish Balay   The calling function deallocates the memory in onodes and olengths
84e5c89e4eSSatish Balay 
85c2916339SPierre Jolivet   To determine nrecvs, one can use PetscGatherNumberOfMessages()
86e5c89e4eSSatish Balay 
87e5c89e4eSSatish Balay .seealso: PetscGatherNumberOfMessages()
88e5c89e4eSSatish Balay @*/
897087cfbeSBarry Smith PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
90e5c89e4eSSatish Balay {
91e5c89e4eSSatish Balay   PetscErrorCode ierr;
926bfd7d4fSJunchao Zhang   PetscMPIInt    size,rank,tag,i,j;
930298fd71SBarry Smith   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
940298fd71SBarry Smith   MPI_Status     *w_status = NULL;
95e5c89e4eSSatish Balay 
96e5c89e4eSSatish Balay   PetscFunctionBegin;
97ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
98ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
99e5c89e4eSSatish Balay   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
100e5c89e4eSSatish Balay 
101e5c89e4eSSatish Balay   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102dcca6d9dSJed Brown   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
103e5c89e4eSSatish Balay   s_waits = r_waits+nrecvs;
104e5c89e4eSSatish Balay 
105e5c89e4eSSatish Balay   /* Post the Irecv to get the message length-info */
106785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
107e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) {
108ffc4695bSBarry Smith     ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
109e5c89e4eSSatish Balay   }
110e5c89e4eSSatish Balay 
111e5c89e4eSSatish Balay   /* Post the Isends with the message length-info */
112e5c89e4eSSatish Balay   for (i=0,j=0; i<size; ++i) {
113e5c89e4eSSatish Balay     if (ilengths[i]) {
114ffc4695bSBarry Smith       ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
115e5c89e4eSSatish Balay       j++;
116e5c89e4eSSatish Balay     }
117e5c89e4eSSatish Balay   }
118e5c89e4eSSatish Balay 
119e5c89e4eSSatish Balay   /* Post waits on sends and receivs */
120ffc4695bSBarry Smith   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
121e5c89e4eSSatish Balay 
122e5c89e4eSSatish Balay   /* Pack up the received data */
123785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
1246bfd7d4fSJunchao Zhang   for (i=0; i<nrecvs; ++i) {
1256bfd7d4fSJunchao Zhang     (*onodes)[i] = w_status[i].MPI_SOURCE;
1266bfd7d4fSJunchao Zhang #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1276bfd7d4fSJunchao Zhang     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
1286bfd7d4fSJunchao Zhang        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
1296bfd7d4fSJunchao Zhang        does not put correct value in recv buffer. See also
1306bfd7d4fSJunchao Zhang        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
1316bfd7d4fSJunchao Zhang        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
1326bfd7d4fSJunchao Zhang      */
1336bfd7d4fSJunchao Zhang     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
1346bfd7d4fSJunchao Zhang #endif
1356bfd7d4fSJunchao Zhang   }
136e5c89e4eSSatish Balay   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
137e5c89e4eSSatish Balay   PetscFunctionReturn(0);
138e5c89e4eSSatish Balay }
139dd6ea824SBarry Smith 
140e5c89e4eSSatish Balay /*@C
141e5c89e4eSSatish Balay   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
142e5c89e4eSSatish Balay   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
143e5c89e4eSSatish Balay   except it takes TWO ilenths and output TWO olengths.
144e5c89e4eSSatish Balay 
145d083f849SBarry Smith   Collective
146e5c89e4eSSatish Balay 
147e5c89e4eSSatish Balay   Input Parameters:
148e5c89e4eSSatish Balay + comm      - Communicator
149e5c89e4eSSatish Balay . nsends    - number of messages that are to be sent.
150e5c89e4eSSatish Balay . nrecvs    - number of messages being received
151*6b867d5aSJose E. Roman . ilengths1 - first array of integers of length sizeof(comm)
152*6b867d5aSJose E. Roman - ilengths2 - second array of integers of length sizeof(comm)
153e5c89e4eSSatish Balay 
154e5c89e4eSSatish Balay   Output Parameters:
155e5c89e4eSSatish Balay + onodes    - list of node-ids from which messages are expected
156*6b867d5aSJose E. Roman . olengths1 - first corresponding message lengths
157*6b867d5aSJose E. Roman - olengths2 - second  message lengths
158e5c89e4eSSatish Balay 
159e5c89e4eSSatish Balay   Level: developer
160e5c89e4eSSatish Balay 
161e5c89e4eSSatish Balay   Notes:
162e5c89e4eSSatish Balay   With this info, the correct MPI_Irecv() can be posted with the correct
163e5c89e4eSSatish Balay   from-id, with a buffer with the right amount of memory required.
164e5c89e4eSSatish Balay 
165e5c89e4eSSatish Balay   The calling function deallocates the memory in onodes and olengths
166e5c89e4eSSatish Balay 
167c2916339SPierre Jolivet   To determine nrecvs, one can use PetscGatherNumberOfMessages()
168e5c89e4eSSatish Balay 
169e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
170e5c89e4eSSatish Balay @*/
1717087cfbeSBarry Smith PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
172e5c89e4eSSatish Balay {
173e5c89e4eSSatish Balay   PetscErrorCode ierr;
1740298fd71SBarry Smith   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
1750298fd71SBarry Smith   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
1760298fd71SBarry Smith   MPI_Status     *w_status = NULL;
177e5c89e4eSSatish Balay 
178e5c89e4eSSatish Balay   PetscFunctionBegin;
179ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
180e5c89e4eSSatish Balay   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
181e5c89e4eSSatish Balay 
1823bf92927SBarry Smith   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
183dcca6d9dSJed Brown   ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
184e5c89e4eSSatish Balay   s_waits = r_waits + nrecvs;
185e5c89e4eSSatish Balay 
186e5c89e4eSSatish Balay   /* Post the Irecv to get the message length-info */
187854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
188854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
189e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) {
190e5c89e4eSSatish Balay     buf_j = buf_r + (2*i);
19155b25c41SPierre Jolivet     ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
192e5c89e4eSSatish Balay   }
193e5c89e4eSSatish Balay 
194e5c89e4eSSatish Balay   /* Post the Isends with the message length-info */
195e5c89e4eSSatish Balay   for (i=0,j=0; i<size; ++i) {
196e5c89e4eSSatish Balay     if (ilengths1[i]) {
197e5c89e4eSSatish Balay       buf_j    = buf_s + (2*j);
198e5c89e4eSSatish Balay       buf_j[0] = *(ilengths1+i);
199e5c89e4eSSatish Balay       buf_j[1] = *(ilengths2+i);
200ffc4695bSBarry Smith       ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
201e5c89e4eSSatish Balay       j++;
202e5c89e4eSSatish Balay     }
203e5c89e4eSSatish Balay   }
204f327f304SBarry Smith   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);
205e5c89e4eSSatish Balay 
206e5c89e4eSSatish Balay   /* Post waits on sends and receivs */
207ffc4695bSBarry Smith   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
208e5c89e4eSSatish Balay 
209e5c89e4eSSatish Balay   /* Pack up the received data */
210854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
211e5c89e4eSSatish Balay   for (i=0; i<nrecvs; ++i) {
212e5c89e4eSSatish Balay     (*onodes)[i]    = w_status[i].MPI_SOURCE;
213e5c89e4eSSatish Balay     buf_j           = buf_r + (2*i);
214e5c89e4eSSatish Balay     (*olengths1)[i] = buf_j[0];
215e5c89e4eSSatish Balay     (*olengths2)[i] = buf_j[1];
216e5c89e4eSSatish Balay   }
217e5c89e4eSSatish Balay 
218e5c89e4eSSatish Balay   ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
219e5c89e4eSSatish Balay   PetscFunctionReturn(0);
220e5c89e4eSSatish Balay }
221e5c89e4eSSatish Balay 
222e5c89e4eSSatish Balay /*
223e5c89e4eSSatish Balay 
224a5b23f4aSJose E. Roman   Allocate a buffer sufficient to hold messages of size specified in olengths.
225e5c89e4eSSatish Balay   And post Irecvs on these buffers using node info from onodes
226e5c89e4eSSatish Balay 
227e5c89e4eSSatish Balay  */
2287087cfbeSBarry Smith PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
229e5c89e4eSSatish Balay {
230e5c89e4eSSatish Balay   PetscErrorCode ierr;
231c05d87d6SBarry Smith   PetscInt       **rbuf_t,i,len = 0;
232e5c89e4eSSatish Balay   MPI_Request    *r_waits_t;
233e5c89e4eSSatish Balay 
234e5c89e4eSSatish Balay   PetscFunctionBegin;
235e5c89e4eSSatish Balay   /* compute memory required for recv buffers */
236e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
237e5c89e4eSSatish Balay 
238e5c89e4eSSatish Balay   /* allocate memory for recv buffers */
239854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
240785e854fSJed Brown   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
241e5c89e4eSSatish Balay   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
242e5c89e4eSSatish Balay 
243e5c89e4eSSatish Balay   /* Post the receives */
244785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
245e5c89e4eSSatish Balay   for (i=0; i<nrecvs; ++i) {
246ffc4695bSBarry Smith     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
247e5c89e4eSSatish Balay   }
248e5c89e4eSSatish Balay 
249e5c89e4eSSatish Balay   *rbuf    = rbuf_t;
250e5c89e4eSSatish Balay   *r_waits = r_waits_t;
251e5c89e4eSSatish Balay   PetscFunctionReturn(0);
252e5c89e4eSSatish Balay }
253e5c89e4eSSatish Balay 
2547087cfbeSBarry Smith PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
255e5c89e4eSSatish Balay {
256e5c89e4eSSatish Balay   PetscErrorCode ierr;
257052f0c41SBarry Smith   PetscMPIInt    i;
258e5c89e4eSSatish Balay   PetscScalar    **rbuf_t;
259e5c89e4eSSatish Balay   MPI_Request    *r_waits_t;
260c05d87d6SBarry Smith   PetscInt       len = 0;
261e5c89e4eSSatish Balay 
262fe28d99cSBarry Smith   PetscFunctionBegin;
263e5c89e4eSSatish Balay   /* compute memory required for recv buffers */
264e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
265e5c89e4eSSatish Balay 
266e5c89e4eSSatish Balay   /* allocate memory for recv buffers */
267854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
268785e854fSJed Brown   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
269e5c89e4eSSatish Balay   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
270e5c89e4eSSatish Balay 
271e5c89e4eSSatish Balay   /* Post the receives */
272785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
273e5c89e4eSSatish Balay   for (i=0; i<nrecvs; ++i) {
274ffc4695bSBarry Smith     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
275e5c89e4eSSatish Balay   }
276e5c89e4eSSatish Balay 
277e5c89e4eSSatish Balay   *rbuf    = rbuf_t;
278e5c89e4eSSatish Balay   *r_waits = r_waits_t;
279e5c89e4eSSatish Balay   PetscFunctionReturn(0);
280e5c89e4eSSatish Balay }
281