xref: /petsc/src/sys/objects/tagm.c (revision 2d776b4963042cdf8a412ba09e923aa51facd799)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 /* ---------------------------------------------------------------- */
3 /*
4    A simple way to manage tags inside a communicator.
5 
6    It uses the attributes to determine if a new communicator
7       is needed and to store the available tags.
8 
9 */
10 
11 /*@C
12     PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
13     processors that share the object MUST call this routine EXACTLY the same
14     number of times.  This tag should only be used with the current objects
15     communicator; do NOT use it with any other MPI communicator.
16 
17     Collective on obj
18 
19     Input Parameter:
20 .   obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
21          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);
22 
23     Output Parameter:
24 .   tag - the new tag
25 
26     Level: developer
27 
28     Note:
29     This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
30     object.
31 
32 .seealso: `PetscCommGetNewTag()`
33 @*/
34 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag)
35 {
36   PetscFunctionBegin;
37   PetscCall(PetscCommGetNewTag(obj->comm, tag));
38   PetscFunctionReturn(0);
39 }
40 
41 /*@
42     PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
43     processors that share the communicator MUST call this routine EXACTLY the same
44     number of times.  This tag should only be used with the current objects
45     communicator; do NOT use it with any other MPI communicator.
46 
47     Collective
48 
49     Input Parameter:
50 .   comm - the MPI communicator
51 
52     Output Parameter:
53 .   tag - the new tag
54 
55     Level: developer
56 
57 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
58 @*/
59 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag)
60 {
61   PetscCommCounter *counter;
62   PetscMPIInt      *maxval, flg;
63 
64   PetscFunctionBegin;
65   PetscValidIntPointer(tag, 2);
66 
67   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
68   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
69 
70   if (counter->tag < 1) {
71     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
72     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
73     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
74     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
75   }
76 
77   *tag = counter->tag--;
78   if (PetscDefined(USE_DEBUG)) {
79     /*
80      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
81      */
82     PetscCallMPI(MPI_Barrier(comm));
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*@C
88   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package
89 
90   Collective
91 
92   Input Parameter:
93 . comm_in - Input communicator
94 
95   Output Parameters:
96 . comm_out - Output communicator
97 
98   Notes:
99     Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it
100 
101     Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
102     mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
103     are no longer needed.
104 
105 Level: developer
106 
107 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
108 @*/
109 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out)
110 {
111   PetscCommCounter *counter;
112   PetscMPIInt       flg;
113 
114   PetscFunctionBegin;
115   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
116   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
117   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
118 
119   if (counter->comms) {
120     struct PetscCommStash *pcomms = counter->comms;
121 
122     *comm_out      = pcomms->comm;
123     counter->comms = pcomms->next;
124     PetscCall(PetscFree(pcomms));
125     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
126   } else {
127     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
128   }
129   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
130   PetscFunctionReturn(0);
131 }
132 
133 /*@C
134   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`
135 
136   Collective
137 
138   Input Parameters:
139 +  comm_in - Input communicator
140 -  comm_out - returned communicator
141 
142 Level: developer
143 
144 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
145 @*/
146 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out)
147 {
148   PetscCommCounter      *counter;
149   PetscMPIInt            flg;
150   struct PetscCommStash *pcomms, *ncomm;
151 
152   PetscFunctionBegin;
153   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
154   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
155   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
156 
157   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
158   ncomm->comm = *comm_out;
159   ncomm->next = NULL;
160   pcomms      = counter->comms;
161   while (pcomms && pcomms->next) pcomms = pcomms->next;
162   if (pcomms) {
163     pcomms->next = ncomm;
164   } else {
165     counter->comms = ncomm;
166   }
167   *comm_out = 0;
168   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
169   PetscFunctionReturn(0);
170 }
171 
172 /*@C
173    PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
174 
175    Collective
176 
177    Input Parameter:
178 .  comm_in - Input communicator
179 
180    Output Parameters:
181 +  comm_out - Output communicator.  May be comm_in.
182 -  first_tag - Tag available that has not already been used with this communicator (you may pass in NULL if you do not need a tag)
183 
184    Note:
185    PETSc communicators are just regular MPI communicators that keep track of which
186    tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
187    a PETSc creation routine it will attach a private communicator for use in the objects communications.
188    The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
189    level `MPI_Comm` that may be performing communication for the user or other library and so IS NOT used by PETSc.
190 
191 Level: developer
192 
193 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
194 @*/
195 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag)
196 {
197   PetscCommCounter *counter;
198   PetscMPIInt      *maxval, flg;
199 
200   PetscFunctionBegin;
201   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
202   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
203 
204   if (!flg) { /* this is NOT a PETSc comm */
205     union
206     {
207       MPI_Comm comm;
208       void    *ptr;
209     } ucomm;
210     /* check if this communicator has a PETSc communicator embedded in it */
211     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
212     if (!flg) {
213       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
214       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
215       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
216       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
217       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
218       counter->tag = *maxval;
219       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
220       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
221 
222       /* save PETSc communicator inside user communicator, so we can get it next time */
223       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
224       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
225       ucomm.comm = comm_in;
226       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
227     } else {
228       *comm_out = ucomm.comm;
229       /* pull out the inner MPI_Comm and hand it back to the caller */
230       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
231       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
232       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
233     }
234   } else *comm_out = comm_in;
235 
236   if (PetscDefined(USE_DEBUG)) {
237     /*
238      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
239      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
240      ALL processes that share a communicator MUST shared objects created from that communicator.
241      */
242     PetscCallMPI(MPI_Barrier(comm_in));
243   }
244 
245   if (counter->tag < 1) {
246     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
247     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
248     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
249     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
250   }
251 
252   if (first_tag) *first_tag = counter->tag--;
253 
254   counter->refcount++; /* number of references to this comm */
255   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
256   PetscFunctionReturn(0);
257 }
258 
259 /*@C
260    PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.
261 
262    Collective
263 
264    Input Parameter:
265 .  comm - the communicator to free
266 
267    Level: developer
268 
269 .seealso: `PetscCommDuplicate()`
270 @*/
271 PetscErrorCode PetscCommDestroy(MPI_Comm *comm)
272 {
273   PetscCommCounter *counter;
274   PetscMPIInt       flg;
275   MPI_Comm          icomm = *comm, ocomm;
276   union
277   {
278     MPI_Comm comm;
279     void    *ptr;
280   } ucomm;
281 
282   PetscFunctionBegin;
283   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(0);
284   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
285   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
286   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
287     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
288     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
289     icomm = ucomm.comm;
290     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
291     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
292   }
293 
294   counter->refcount--;
295 
296   if (!counter->refcount) {
297     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
298     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
299     if (flg) {
300       ocomm = ucomm.comm;
301       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
302       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
303       PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
304     }
305 
306     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
307     PetscCallMPI(MPI_Comm_free(&icomm));
308   }
309   *comm = MPI_COMM_NULL;
310   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
311   PetscFunctionReturn(0);
312 }
313 
314 /*@C
315     PetscObjectsListGetGlobalNumbering - computes a global numbering
316     of `PetscObject`s living on subcommunicators of a given communicator.
317 
318     Collective.
319 
320     Input Parameters:
321 +   comm    - the `MPI_Comm`
322 .   len     - local length of objlist
323 -   objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
324               (subcomm ordering is assumed to be deadlock-free)
325 
326     Output Parameters:
327 +   count      - global number of distinct subcommunicators on objlist (may be > len)
328 -   numbering  - global numbers of objlist entries (allocated by user)
329 
330     Level: developer
331 
332     Note:
333     This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.
334 
335 @*/
336 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
337 {
338   PetscInt    i, roots, offset;
339   PetscMPIInt size, rank;
340 
341   PetscFunctionBegin;
342   PetscValidPointer(objlist, 3);
343   if (!count && !numbering) PetscFunctionReturn(0);
344 
345   PetscCallMPI(MPI_Comm_size(comm, &size));
346   PetscCallMPI(MPI_Comm_rank(comm, &rank));
347   roots = 0;
348   for (i = 0; i < len; ++i) {
349     PetscMPIInt srank;
350     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
351     /* Am I the root of the i-th subcomm? */
352     if (!srank) ++roots;
353   }
354   if (count) {
355     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
356     PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
357   }
358   if (numbering) {
359     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
360     /*
361       At each subcomm root number all of the subcomms it owns locally
362       and make it global by calculating the shift among all of the roots.
363       The roots are ordered using the comm ordering.
364     */
365     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
366     offset -= roots;
367     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
368     /*
369       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
370       broadcast is collective on the subcomm.
371     */
372     roots = 0;
373     for (i = 0; i < len; ++i) {
374       PetscMPIInt srank;
375       numbering[i] = offset + roots; /* only meaningful if !srank. */
376 
377       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
378       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
379       if (!srank) ++roots;
380     }
381   }
382   PetscFunctionReturn(0);
383 }
384