xref: /petsc/src/sys/objects/tagm.c (revision 9767e1d8b281455ed5b7d022ec0a9ffdd7f4cae7)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 #include <petsc/private/hashmapobj.h>
3 #include <petsc/private/garbagecollector.h>
4 
5 /*
6    A simple way to manage tags inside a communicator.
7 
8    It uses the attributes to determine if a new communicator
9       is needed and to store the available tags.
10 
11 */
12 
13 /*@C
14     PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
15     processors that share the object MUST call this routine EXACTLY the same
16     number of times.  This tag should only be used with the current objects
17     communicator; do NOT use it with any other MPI communicator.
18 
19     Collective
20 
21     Input Parameter:
22 .   obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
23          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);
24 
25     Output Parameter:
26 .   tag - the new tag
27 
28     Level: developer
29 
30     Note:
31     This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
32     object.
33 
34 .seealso: `PetscCommGetNewTag()`
35 @*/
36 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag)
37 {
38   PetscFunctionBegin;
39   PetscCall(PetscCommGetNewTag(obj->comm, tag));
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 /*@
44     PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
45     processors that share the communicator MUST call this routine EXACTLY the same
46     number of times.  This tag should only be used with the current objects
47     communicator; do NOT use it with any other MPI communicator.
48 
49     Collective
50 
51     Input Parameter:
52 .   comm - the MPI communicator
53 
54     Output Parameter:
55 .   tag - the new tag
56 
57     Level: developer
58 
59 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
60 @*/
61 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag)
62 {
63   PetscCommCounter *counter;
64   PetscMPIInt      *maxval, flg;
65 
66   PetscFunctionBegin;
67   PetscValidIntPointer(tag, 2);
68 
69   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
70   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
71 
72   if (counter->tag < 1) {
73     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
74     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
75     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
76     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
77   }
78 
79   *tag = counter->tag--;
80   if (PetscDefined(USE_DEBUG)) {
81     /*
82      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
83      */
84     PetscCallMPI(MPI_Barrier(comm));
85   }
86   PetscFunctionReturn(PETSC_SUCCESS);
87 }
88 
89 /*@C
90   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package
91 
92   Collective
93 
94   Input Parameter:
95 . comm_in - Input communicator
96 
97   Output Parameter:
98 . comm_out - Output communicator
99 
100   Level: developer
101 
102   Notes:
103     Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it
104 
105     Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
106     mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
107     are no longer needed.
108 
109 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
110 @*/
111 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out)
112 {
113   PetscCommCounter *counter;
114   PetscMPIInt       flg;
115 
116   PetscFunctionBegin;
117   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
118   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
119   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
120 
121   if (counter->comms) {
122     struct PetscCommStash *pcomms = counter->comms;
123 
124     *comm_out      = pcomms->comm;
125     counter->comms = pcomms->next;
126     PetscCall(PetscFree(pcomms));
127     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
128   } else {
129     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
130   }
131   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /*@C
136   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`
137 
138   Collective
139 
140   Input Parameters:
141 +  comm_in - Input communicator
142 -  comm_out - returned communicator
143 
144    Level: developer
145 
146 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
147 @*/
148 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out)
149 {
150   PetscCommCounter      *counter;
151   PetscMPIInt            flg;
152   struct PetscCommStash *pcomms, *ncomm;
153 
154   PetscFunctionBegin;
155   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
156   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
157   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
158 
159   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
160   ncomm->comm = *comm_out;
161   ncomm->next = NULL;
162   pcomms      = counter->comms;
163   while (pcomms && pcomms->next) pcomms = pcomms->next;
164   if (pcomms) {
165     pcomms->next = ncomm;
166   } else {
167     counter->comms = ncomm;
168   }
169   *comm_out = 0;
170   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
171   PetscFunctionReturn(PETSC_SUCCESS);
172 }
173 
174 /*@C
175    PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
176 
177    Collective
178 
179    Input Parameter:
180 .  comm_in - Input communicator
181 
182    Output Parameters:
183 +  comm_out - Output communicator.  May be `comm_in`.
184 -  first_tag - Tag available that has not already been used with this communicator (you may pass in `NULL` if you do not need a tag)
185 
186    Level: developer
187 
188    Note:
189    PETSc communicators are just regular MPI communicators that keep track of which
190    tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
191    a PETSc creation routine it will attach a private communicator for use in the objects communications.
192    The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
193    level `MPI_Comm` that may be performing communication for the user or other library and so IS NOT used by PETSc.
194 
195 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
196 @*/
197 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag)
198 {
199   PetscInt64       *cidx;
200   PetscCommCounter *counter;
201   PetscMPIInt      *maxval, flg;
202 
203   PetscFunctionBegin;
204   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
205   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
206 
207   if (!flg) { /* this is NOT a PETSc comm */
208     union
209     {
210       MPI_Comm comm;
211       void    *ptr;
212     } ucomm;
213     /* check if this communicator has a PETSc communicator embedded in it */
214     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
215     if (!flg) {
216       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
217       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
218       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
219       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
220       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
221       counter->tag = *maxval;
222       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
223       /* Add an object creation index to the communicator */
224       PetscCall(PetscNew(&cidx));
225       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_CreationIdx_keyval, cidx));
226       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
227 
228       /* save PETSc communicator inside user communicator, so we can get it next time */
229       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
230       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
231       ucomm.comm = comm_in;
232       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
233     } else {
234       *comm_out = ucomm.comm;
235       /* pull out the inner MPI_Comm and hand it back to the caller */
236       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
237       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
238       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
239     }
240   } else *comm_out = comm_in;
241 
242   if (PetscDefined(USE_DEBUG)) {
243     /*
244      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
245      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
246      ALL processes that share a communicator MUST shared objects created from that communicator.
247      */
248     PetscCallMPI(MPI_Barrier(comm_in));
249   }
250 
251   if (counter->tag < 1) {
252     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
253     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
254     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
255     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
256   }
257 
258   if (first_tag) *first_tag = counter->tag--;
259 
260   counter->refcount++; /* number of references to this comm */
261   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@C
266    PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.
267 
268    Collective
269 
270    Input Parameter:
271 .  comm - the communicator to free
272 
273    Level: developer
274 
275 .seealso: `PetscCommDuplicate()`
276 @*/
277 PetscErrorCode PetscCommDestroy(MPI_Comm *comm)
278 {
279   PetscInt64       *cidx;
280   PetscCommCounter *counter;
281   PetscMPIInt       flg;
282   PetscGarbage      garbage;
283   MPI_Comm          icomm = *comm, ocomm;
284   union
285   {
286     MPI_Comm comm;
287     void    *ptr;
288   } ucomm;
289 
290   PetscFunctionBegin;
291   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
292   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
293   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
294   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
295     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
296     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
297     icomm = ucomm.comm;
298     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
299     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
300   }
301   counter->refcount--;
302   if (!counter->refcount) {
303     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
304     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
305     if (flg) {
306       ocomm = ucomm.comm;
307       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
308       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
309       PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
310     }
311 
312     /* Remove the object creation index on the communicator */
313     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_CreationIdx_keyval, &cidx, &flg));
314     if (flg) {
315       PetscCall(PetscFree(cidx));
316     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have object creation index");
317 
318     /* Remove garbage hashmap set up by garbage collection */
319     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Garbage_HMap_keyval, &garbage, &flg));
320     if (flg) {
321       PetscInt entries = 0;
322       PetscCall(PetscHMapObjGetSize(garbage.map, &entries));
323       if (entries > 0) PetscCall(PetscGarbageCleanup(icomm));
324       PetscCall(PetscHMapObjDestroy(&(garbage.map)));
325     }
326 
327     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
328     PetscCallMPI(MPI_Comm_free(&icomm));
329   }
330   *comm = MPI_COMM_NULL;
331   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
332   PetscFunctionReturn(PETSC_SUCCESS);
333 }
334 
335 /*@C
336     PetscObjectsListGetGlobalNumbering - computes a global numbering
337     of `PetscObject`s living on subcommunicators of a given communicator.
338 
339     Collective.
340 
341     Input Parameters:
342 +   comm    - the `MPI_Comm`
343 .   len     - local length of `objlist`
344 -   objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
345               (subcomm ordering is assumed to be deadlock-free)
346 
347     Output Parameters:
348 +   count      - global number of distinct subcommunicators on objlist (may be > len)
349 -   numbering  - global numbers of objlist entries (allocated by user)
350 
351     Level: developer
352 
353     Note:
354     This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.
355 
356 @*/
357 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
358 {
359   PetscInt    i, roots, offset;
360   PetscMPIInt size, rank;
361 
362   PetscFunctionBegin;
363   PetscValidPointer(objlist, 3);
364   if (!count && !numbering) PetscFunctionReturn(PETSC_SUCCESS);
365 
366   PetscCallMPI(MPI_Comm_size(comm, &size));
367   PetscCallMPI(MPI_Comm_rank(comm, &rank));
368   roots = 0;
369   for (i = 0; i < len; ++i) {
370     PetscMPIInt srank;
371     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
372     /* Am I the root of the i-th subcomm? */
373     if (!srank) ++roots;
374   }
375   if (count) {
376     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
377     PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
378   }
379   if (numbering) {
380     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
381     /*
382       At each subcomm root number all of the subcomms it owns locally
383       and make it global by calculating the shift among all of the roots.
384       The roots are ordered using the comm ordering.
385     */
386     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
387     offset -= roots;
388     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
389     /*
390       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
391       broadcast is collective on the subcomm.
392     */
393     roots = 0;
394     for (i = 0; i < len; ++i) {
395       PetscMPIInt srank;
396       numbering[i] = offset + roots; /* only meaningful if !srank. */
397 
398       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
399       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
400       if (!srank) ++roots;
401     }
402   }
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405