xref: /petsc/src/sys/utils/mpimesg.c (revision a5b23f4acc7afc99d3844ebd5fb65a81c16e8b8c)
1e5c89e4eSSatish Balay 
2c6db04a5SJed Brown #include <petscsys.h>        /*I  "petscsys.h"  I*/
3e5c89e4eSSatish Balay 
4e5c89e4eSSatish Balay /*@C
5e5c89e4eSSatish Balay   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
6e5c89e4eSSatish Balay 
7d083f849SBarry Smith   Collective
8e5c89e4eSSatish Balay 
9e5c89e4eSSatish Balay   Input Parameters:
10e5c89e4eSSatish Balay + comm     - Communicator
11e5c89e4eSSatish Balay . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
120298fd71SBarry Smith              message from current node to ith node. Optionally NULL
13e5c89e4eSSatish Balay - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
140298fd71SBarry Smith              Optionally NULL.
15e5c89e4eSSatish Balay 
16e5c89e4eSSatish Balay   Output Parameters:
17e5c89e4eSSatish Balay . nrecvs    - number of messages received
18e5c89e4eSSatish Balay 
19e5c89e4eSSatish Balay   Level: developer
20e5c89e4eSSatish Balay 
21e5c89e4eSSatish Balay   Notes:
22e5c89e4eSSatish Balay   With this info, the correct message lengths can be determined using
23e5c89e4eSSatish Balay   PetscGatherMessageLengths()
24e5c89e4eSSatish Balay 
25e5c89e4eSSatish Balay   Either iflags or ilengths should be provided.  If iflags is not
260298fd71SBarry Smith   provided (NULL) it can be computed from ilengths. If iflags is
27e5c89e4eSSatish Balay   provided, ilengths is not required.
28e5c89e4eSSatish Balay 
29e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths()
30e5c89e4eSSatish Balay @*/
317087cfbeSBarry Smith PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
32e5c89e4eSSatish Balay {
330298fd71SBarry Smith   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
34e5c89e4eSSatish Balay   PetscErrorCode ierr;
35e5c89e4eSSatish Balay 
36e5c89e4eSSatish Balay   PetscFunctionBegin;
37ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
38ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
39e5c89e4eSSatish Balay 
40dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
41e5c89e4eSSatish Balay 
42e5c89e4eSSatish Balay   /* If iflags not provided, compute iflags from ilengths */
43e5c89e4eSSatish Balay   if (!iflags) {
44e32f2f54SBarry Smith     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
45e5c89e4eSSatish Balay     iflags_local = iflags_localm;
46e5c89e4eSSatish Balay     for (i=0; i<size; i++) {
47e5c89e4eSSatish Balay       if (ilengths[i]) iflags_local[i] = 1;
48e5c89e4eSSatish Balay       else iflags_local[i] = 0;
49e5c89e4eSSatish Balay     }
50a297a907SKarl Rupp   } else iflags_local = (PetscMPIInt*) iflags;
51e5c89e4eSSatish Balay 
52e5c89e4eSSatish Balay   /* Post an allreduce to determine the numer of messages the current node will receive */
53820f2d46SBarry Smith   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr);
54e5c89e4eSSatish Balay   *nrecvs = recv_buf[rank];
55e5c89e4eSSatish Balay 
56e5c89e4eSSatish Balay   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
57e5c89e4eSSatish Balay   PetscFunctionReturn(0);
58e5c89e4eSSatish Balay }
59e5c89e4eSSatish Balay 
60e5c89e4eSSatish Balay /*@C
61e5c89e4eSSatish Balay   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
62e5c89e4eSSatish Balay   including (from-id,length) pairs for each message.
63e5c89e4eSSatish Balay 
64d083f849SBarry Smith   Collective
65e5c89e4eSSatish Balay 
66e5c89e4eSSatish Balay   Input Parameters:
67e5c89e4eSSatish Balay + comm      - Communicator
68e5c89e4eSSatish Balay . nsends    - number of messages that are to be sent.
69e5c89e4eSSatish Balay . nrecvs    - number of messages being received
70e5c89e4eSSatish Balay - ilengths  - an array of integers of length sizeof(comm)
71e5c89e4eSSatish Balay               a non zero ilengths[i] represent a message to i of length ilengths[i]
72e5c89e4eSSatish Balay 
73e5c89e4eSSatish Balay   Output Parameters:
74e5c89e4eSSatish Balay + onodes    - list of node-ids from which messages are expected
75e5c89e4eSSatish Balay - olengths  - corresponding message lengths
76e5c89e4eSSatish Balay 
77e5c89e4eSSatish Balay   Level: developer
78e5c89e4eSSatish Balay 
79e5c89e4eSSatish Balay   Notes:
80e5c89e4eSSatish Balay   With this info, the correct MPI_Irecv() can be posted with the correct
81e5c89e4eSSatish Balay   from-id, with a buffer with the right amount of memory required.
82e5c89e4eSSatish Balay 
83e5c89e4eSSatish Balay   The calling function deallocates the memory in onodes and olengths
84e5c89e4eSSatish Balay 
85c2916339SPierre Jolivet   To determine nrecvs, one can use PetscGatherNumberOfMessages()
86e5c89e4eSSatish Balay 
87e5c89e4eSSatish Balay .seealso: PetscGatherNumberOfMessages()
88e5c89e4eSSatish Balay @*/
897087cfbeSBarry Smith PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
90e5c89e4eSSatish Balay {
91e5c89e4eSSatish Balay   PetscErrorCode ierr;
926bfd7d4fSJunchao Zhang   PetscMPIInt    size,rank,tag,i,j;
930298fd71SBarry Smith   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
940298fd71SBarry Smith   MPI_Status     *w_status = NULL;
95e5c89e4eSSatish Balay 
96e5c89e4eSSatish Balay   PetscFunctionBegin;
97ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
98ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
99e5c89e4eSSatish Balay   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
100e5c89e4eSSatish Balay 
101e5c89e4eSSatish Balay   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102dcca6d9dSJed Brown   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
103e5c89e4eSSatish Balay   s_waits = r_waits+nrecvs;
104e5c89e4eSSatish Balay 
105e5c89e4eSSatish Balay   /* Post the Irecv to get the message length-info */
106785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
107e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) {
108ffc4695bSBarry Smith     ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
109e5c89e4eSSatish Balay   }
110e5c89e4eSSatish Balay 
111e5c89e4eSSatish Balay   /* Post the Isends with the message length-info */
112e5c89e4eSSatish Balay   for (i=0,j=0; i<size; ++i) {
113e5c89e4eSSatish Balay     if (ilengths[i]) {
114ffc4695bSBarry Smith       ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
115e5c89e4eSSatish Balay       j++;
116e5c89e4eSSatish Balay     }
117e5c89e4eSSatish Balay   }
118e5c89e4eSSatish Balay 
119e5c89e4eSSatish Balay   /* Post waits on sends and receivs */
120ffc4695bSBarry Smith   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
121e5c89e4eSSatish Balay 
122e5c89e4eSSatish Balay   /* Pack up the received data */
123785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
1246bfd7d4fSJunchao Zhang   for (i=0; i<nrecvs; ++i) {
1256bfd7d4fSJunchao Zhang     (*onodes)[i] = w_status[i].MPI_SOURCE;
1266bfd7d4fSJunchao Zhang #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1276bfd7d4fSJunchao Zhang     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
1286bfd7d4fSJunchao Zhang        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
1296bfd7d4fSJunchao Zhang        does not put correct value in recv buffer. See also
1306bfd7d4fSJunchao Zhang        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
1316bfd7d4fSJunchao Zhang        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
1326bfd7d4fSJunchao Zhang      */
1336bfd7d4fSJunchao Zhang     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
1346bfd7d4fSJunchao Zhang #endif
1356bfd7d4fSJunchao Zhang   }
136e5c89e4eSSatish Balay   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
137e5c89e4eSSatish Balay   PetscFunctionReturn(0);
138e5c89e4eSSatish Balay }
139dd6ea824SBarry Smith 
140e5c89e4eSSatish Balay /*@C
141e5c89e4eSSatish Balay   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
142e5c89e4eSSatish Balay   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
143e5c89e4eSSatish Balay   except it takes TWO ilenths and output TWO olengths.
144e5c89e4eSSatish Balay 
145d083f849SBarry Smith   Collective
146e5c89e4eSSatish Balay 
147e5c89e4eSSatish Balay   Input Parameters:
148e5c89e4eSSatish Balay + comm      - Communicator
149e5c89e4eSSatish Balay . nsends    - number of messages that are to be sent.
150e5c89e4eSSatish Balay . nrecvs    - number of messages being received
151e5c89e4eSSatish Balay - ilengths1, ilengths2 - array of integers of length sizeof(comm)
152e5c89e4eSSatish Balay               a non zero ilengths[i] represent a message to i of length ilengths[i]
153e5c89e4eSSatish Balay 
154e5c89e4eSSatish Balay   Output Parameters:
155e5c89e4eSSatish Balay + onodes    - list of node-ids from which messages are expected
156e5c89e4eSSatish Balay - olengths1, olengths2 - corresponding message lengths
157e5c89e4eSSatish Balay 
158e5c89e4eSSatish Balay   Level: developer
159e5c89e4eSSatish Balay 
160e5c89e4eSSatish Balay   Notes:
161e5c89e4eSSatish Balay   With this info, the correct MPI_Irecv() can be posted with the correct
162e5c89e4eSSatish Balay   from-id, with a buffer with the right amount of memory required.
163e5c89e4eSSatish Balay 
164e5c89e4eSSatish Balay   The calling function deallocates the memory in onodes and olengths
165e5c89e4eSSatish Balay 
166c2916339SPierre Jolivet   To determine nrecvs, one can use PetscGatherNumberOfMessages()
167e5c89e4eSSatish Balay 
168e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
169e5c89e4eSSatish Balay @*/
1707087cfbeSBarry Smith PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
171e5c89e4eSSatish Balay {
172e5c89e4eSSatish Balay   PetscErrorCode ierr;
1730298fd71SBarry Smith   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
1740298fd71SBarry Smith   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
1750298fd71SBarry Smith   MPI_Status     *w_status = NULL;
176e5c89e4eSSatish Balay 
177e5c89e4eSSatish Balay   PetscFunctionBegin;
178ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
179e5c89e4eSSatish Balay   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
180e5c89e4eSSatish Balay 
1813bf92927SBarry Smith   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
182dcca6d9dSJed Brown   ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
183e5c89e4eSSatish Balay   s_waits = r_waits + nrecvs;
184e5c89e4eSSatish Balay 
185e5c89e4eSSatish Balay   /* Post the Irecv to get the message length-info */
186854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
187854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
188e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) {
189e5c89e4eSSatish Balay     buf_j = buf_r + (2*i);
19055b25c41SPierre Jolivet     ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
191e5c89e4eSSatish Balay   }
192e5c89e4eSSatish Balay 
193e5c89e4eSSatish Balay   /* Post the Isends with the message length-info */
194e5c89e4eSSatish Balay   for (i=0,j=0; i<size; ++i) {
195e5c89e4eSSatish Balay     if (ilengths1[i]) {
196e5c89e4eSSatish Balay       buf_j    = buf_s + (2*j);
197e5c89e4eSSatish Balay       buf_j[0] = *(ilengths1+i);
198e5c89e4eSSatish Balay       buf_j[1] = *(ilengths2+i);
199ffc4695bSBarry Smith       ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
200e5c89e4eSSatish Balay       j++;
201e5c89e4eSSatish Balay     }
202e5c89e4eSSatish Balay   }
203f327f304SBarry Smith   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);
204e5c89e4eSSatish Balay 
205e5c89e4eSSatish Balay   /* Post waits on sends and receivs */
206ffc4695bSBarry Smith   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
207e5c89e4eSSatish Balay 
208e5c89e4eSSatish Balay   /* Pack up the received data */
209854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
210e5c89e4eSSatish Balay   for (i=0; i<nrecvs; ++i) {
211e5c89e4eSSatish Balay     (*onodes)[i]    = w_status[i].MPI_SOURCE;
212e5c89e4eSSatish Balay     buf_j           = buf_r + (2*i);
213e5c89e4eSSatish Balay     (*olengths1)[i] = buf_j[0];
214e5c89e4eSSatish Balay     (*olengths2)[i] = buf_j[1];
215e5c89e4eSSatish Balay   }
216e5c89e4eSSatish Balay 
217e5c89e4eSSatish Balay   ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
218e5c89e4eSSatish Balay   PetscFunctionReturn(0);
219e5c89e4eSSatish Balay }
220e5c89e4eSSatish Balay 
221e5c89e4eSSatish Balay /*
222e5c89e4eSSatish Balay 
223*a5b23f4aSJose E. Roman   Allocate a buffer sufficient to hold messages of size specified in olengths.
224e5c89e4eSSatish Balay   And post Irecvs on these buffers using node info from onodes
225e5c89e4eSSatish Balay 
226e5c89e4eSSatish Balay  */
2277087cfbeSBarry Smith PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
228e5c89e4eSSatish Balay {
229e5c89e4eSSatish Balay   PetscErrorCode ierr;
230c05d87d6SBarry Smith   PetscInt       **rbuf_t,i,len = 0;
231e5c89e4eSSatish Balay   MPI_Request    *r_waits_t;
232e5c89e4eSSatish Balay 
233e5c89e4eSSatish Balay   PetscFunctionBegin;
234e5c89e4eSSatish Balay   /* compute memory required for recv buffers */
235e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
236e5c89e4eSSatish Balay 
237e5c89e4eSSatish Balay   /* allocate memory for recv buffers */
238854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
239785e854fSJed Brown   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
240e5c89e4eSSatish Balay   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
241e5c89e4eSSatish Balay 
242e5c89e4eSSatish Balay   /* Post the receives */
243785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
244e5c89e4eSSatish Balay   for (i=0; i<nrecvs; ++i) {
245ffc4695bSBarry Smith     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
246e5c89e4eSSatish Balay   }
247e5c89e4eSSatish Balay 
248e5c89e4eSSatish Balay   *rbuf    = rbuf_t;
249e5c89e4eSSatish Balay   *r_waits = r_waits_t;
250e5c89e4eSSatish Balay   PetscFunctionReturn(0);
251e5c89e4eSSatish Balay }
252e5c89e4eSSatish Balay 
2537087cfbeSBarry Smith PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
254e5c89e4eSSatish Balay {
255e5c89e4eSSatish Balay   PetscErrorCode ierr;
256052f0c41SBarry Smith   PetscMPIInt    i;
257e5c89e4eSSatish Balay   PetscScalar    **rbuf_t;
258e5c89e4eSSatish Balay   MPI_Request    *r_waits_t;
259c05d87d6SBarry Smith   PetscInt       len = 0;
260e5c89e4eSSatish Balay 
261fe28d99cSBarry Smith   PetscFunctionBegin;
262e5c89e4eSSatish Balay   /* compute memory required for recv buffers */
263e5c89e4eSSatish Balay   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
264e5c89e4eSSatish Balay 
265e5c89e4eSSatish Balay   /* allocate memory for recv buffers */
266854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
267785e854fSJed Brown   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
268e5c89e4eSSatish Balay   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
269e5c89e4eSSatish Balay 
270e5c89e4eSSatish Balay   /* Post the receives */
271785e854fSJed Brown   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
272e5c89e4eSSatish Balay   for (i=0; i<nrecvs; ++i) {
273ffc4695bSBarry Smith     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
274e5c89e4eSSatish Balay   }
275e5c89e4eSSatish Balay 
276e5c89e4eSSatish Balay   *rbuf    = rbuf_t;
277e5c89e4eSSatish Balay   *r_waits = r_waits_t;
278e5c89e4eSSatish Balay   PetscFunctionReturn(0);
279e5c89e4eSSatish Balay }
280