xref: /petsc/src/sys/objects/tagm.c (revision cac3c07dbc4e95423e22cb699bb64807a71d0bfe)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 #include <petsc/private/hashmapobj.h>
3 #include <petsc/private/garbagecollector.h>
4 
5 /*
6    A simple way to manage tags inside a communicator.
7 
8    It uses the attributes to determine if a new communicator
9       is needed and to store the available tags.
10 
11 */
12 
13 /*@C
14   PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
15   processors that share the object MUST call this routine EXACTLY the same
16   number of times.  This tag should only be used with the current objects
17   communicator; do NOT use it with any other MPI communicator.
18 
19   Collective
20 
21   Input Parameter:
22 . obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
23          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);
24 
25   Output Parameter:
26 . tag - the new tag
27 
28   Level: developer
29 
30   Note:
31   This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
32   object.
33 
34 .seealso: `PetscCommGetNewTag()`
35 @*/
36 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag)
37 {
38   PetscFunctionBegin;
39   PetscCall(PetscCommGetNewTag(obj->comm, tag));
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 /*@
44   PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator
45 
46   Collective
47 
48   Input Parameter:
49 . comm - the MPI communicator
50 
51   Output Parameter:
52 . tag - the new tag
53 
54   Level: developer
55 
56   Notes:
57   All processors that share the communicator MUST call this routine EXACTLY the same number of
58   times. This tag should only be used with the current objects communicator; do NOT use it
59   with any other MPI communicator.
60 
61 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
62 @*/
63 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag)
64 {
65   PetscCommCounter *counter;
66   PetscMPIInt      *maxval, flg;
67 
68   PetscFunctionBegin;
69   PetscAssertPointer(tag, 2);
70 
71   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
72   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
73 
74   if (counter->tag < 1) {
75     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
76     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
77     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
78     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
79   }
80 
81   *tag = counter->tag--;
82   if (PetscDefined(USE_DEBUG)) {
83     /*
84      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
85      */
86     PetscCallMPI(MPI_Barrier(comm));
87   }
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 /*@C
92   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package
93 
94   Collective
95 
96   Input Parameter:
97 . comm_in - Input communicator
98 
99   Output Parameter:
100 . comm_out - Output communicator
101 
102   Level: developer
103 
104   Notes:
105   Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it
106 
107   Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
108   mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
109   are no longer needed.
110 
111 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
112 @*/
113 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out)
114 {
115   PetscCommCounter *counter;
116   PetscMPIInt       flg;
117 
118   PetscFunctionBegin;
119   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
120   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
121   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
122 
123   if (counter->comms) {
124     struct PetscCommStash *pcomms = counter->comms;
125 
126     *comm_out      = pcomms->comm;
127     counter->comms = pcomms->next;
128     PetscCall(PetscFree(pcomms));
129     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
130   } else {
131     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
132   }
133   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*@C
138   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`
139 
140   Collective
141 
142   Input Parameters:
143 + comm_in  - Input communicator
144 - comm_out - returned communicator
145 
146   Level: developer
147 
148 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
149 @*/
150 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out)
151 {
152   PetscCommCounter      *counter;
153   PetscMPIInt            flg;
154   struct PetscCommStash *pcomms, *ncomm;
155 
156   PetscFunctionBegin;
157   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
158   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
159   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
160 
161   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
162   ncomm->comm = *comm_out;
163   ncomm->next = NULL;
164   pcomms      = counter->comms;
165   while (pcomms && pcomms->next) pcomms = pcomms->next;
166   if (pcomms) {
167     pcomms->next = ncomm;
168   } else {
169     counter->comms = ncomm;
170   }
171   *comm_out = 0;
172   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
173   PetscFunctionReturn(PETSC_SUCCESS);
174 }
175 
176 /*@C
177   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
178 
179   Collective
180 
181   Input Parameter:
182 . comm_in - Input communicator
183 
184   Output Parameters:
185 + comm_out  - Output communicator.  May be `comm_in`.
186 - first_tag - Tag available that has not already been used with this communicator (you may pass in `NULL` if you do not need a tag)
187 
188   Level: developer
189 
190   Note:
191   PETSc communicators are just regular MPI communicators that keep track of which
192   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
193   a PETSc creation routine it will attach a private communicator for use in the objects communications.
194   The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
195 
196   `MPI_Comm` That May Be Performing Communication For The User Or Other Library And So Is Not Used By Petsc.
197 
198 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
199 @*/
200 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag)
201 {
202   PetscInt64       *cidx;
203   PetscCommCounter *counter;
204   PetscMPIInt      *maxval, flg;
205 
206   PetscFunctionBegin;
207   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
208   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
209 
210   if (!flg) { /* this is NOT a PETSc comm */
211     union
212     {
213       MPI_Comm comm;
214       void    *ptr;
215     } ucomm;
216     /* check if this communicator has a PETSc communicator embedded in it */
217     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
218     if (!flg) {
219       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
220       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
221       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
222       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
223       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
224       counter->tag = *maxval;
225       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
226       /* Add an object creation index to the communicator */
227       PetscCall(PetscNew(&cidx));
228       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_CreationIdx_keyval, cidx));
229       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
230 
231       /* save PETSc communicator inside user communicator, so we can get it next time */
232       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
233       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
234       ucomm.comm = comm_in;
235       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
236     } else {
237       *comm_out = ucomm.comm;
238       /* pull out the inner MPI_Comm and hand it back to the caller */
239       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
240       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
241       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
242     }
243   } else *comm_out = comm_in;
244 
245   if (PetscDefined(USE_DEBUG)) {
246     /*
247      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
248      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
249      ALL processes that share a communicator MUST shared objects created from that communicator.
250      */
251     PetscCallMPI(MPI_Barrier(comm_in));
252   }
253 
254   if (counter->tag < 1) {
255     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
256     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
257     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
258     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
259   }
260 
261   if (first_tag) *first_tag = counter->tag--;
262 
263   counter->refcount++; /* number of references to this comm */
264   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
265   PetscFunctionReturn(PETSC_SUCCESS);
266 }
267 
268 /*@C
269   PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.
270 
271   Collective
272 
273   Input Parameter:
274 . comm - the communicator to free
275 
276   Level: developer
277 
278 .seealso: `PetscCommDuplicate()`
279 @*/
280 PetscErrorCode PetscCommDestroy(MPI_Comm *comm)
281 {
282   PetscInt64       *cidx;
283   PetscCommCounter *counter;
284   PetscMPIInt       flg;
285   PetscGarbage      garbage;
286   MPI_Comm          icomm = *comm, ocomm;
287   union
288   {
289     MPI_Comm comm;
290     void    *ptr;
291   } ucomm;
292 
293   PetscFunctionBegin;
294   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
295   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
296   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
297   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
298     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
299     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
300     icomm = ucomm.comm;
301     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
302     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
303   }
304   counter->refcount--;
305   if (!counter->refcount) {
306     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
307     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
308     if (flg) {
309       ocomm = ucomm.comm;
310       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
311       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
312       PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
313     }
314 
315     /* Remove the object creation index on the communicator */
316     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_CreationIdx_keyval, &cidx, &flg));
317     if (flg) {
318       PetscCall(PetscFree(cidx));
319     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have object creation index");
320 
321     /* Remove garbage hashmap set up by garbage collection */
322     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Garbage_HMap_keyval, &garbage, &flg));
323     if (flg) {
324       PetscInt entries = 0;
325       PetscCall(PetscHMapObjGetSize(garbage.map, &entries));
326       if (entries > 0) PetscCall(PetscGarbageCleanup(icomm));
327       PetscCall(PetscHMapObjDestroy(&(garbage.map)));
328     }
329 
330     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
331     PetscCallMPI(MPI_Comm_free(&icomm));
332   }
333   *comm = MPI_COMM_NULL;
334   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
335   PetscFunctionReturn(PETSC_SUCCESS);
336 }
337 
338 /*@C
339   PetscObjectsListGetGlobalNumbering - computes a global numbering
340   of `PetscObject`s living on subcommunicators of a given communicator.
341 
342   Collective.
343 
344   Input Parameters:
345 + comm    - the `MPI_Comm`
346 . len     - local length of `objlist`
347 - objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
348               (subcomm ordering is assumed to be deadlock-free)
349 
350   Output Parameters:
351 + count     - global number of distinct subcommunicators on objlist (may be > len)
352 - numbering - global numbers of objlist entries (allocated by user)
353 
354   Level: developer
355 
356   Note:
357   This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.
358 
359 .seealso: `PetscCommDuplicate()`, `PetscObjectDestroy()`
360 @*/
361 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
362 {
363   PetscInt    i, roots, offset;
364   PetscMPIInt size, rank;
365 
366   PetscFunctionBegin;
367   PetscAssertPointer(objlist, 3);
368   if (!count && !numbering) PetscFunctionReturn(PETSC_SUCCESS);
369 
370   PetscCallMPI(MPI_Comm_size(comm, &size));
371   PetscCallMPI(MPI_Comm_rank(comm, &rank));
372   roots = 0;
373   for (i = 0; i < len; ++i) {
374     PetscMPIInt srank;
375     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
376     /* Am I the root of the i-th subcomm? */
377     if (!srank) ++roots;
378   }
379   if (count) {
380     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
381     PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
382   }
383   if (numbering) {
384     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
385     /*
386       At each subcomm root number all of the subcomms it owns locally
387       and make it global by calculating the shift among all of the roots.
388       The roots are ordered using the comm ordering.
389     */
390     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
391     offset -= roots;
392     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
393     /*
394       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
395       broadcast is collective on the subcomm.
396     */
397     roots = 0;
398     for (i = 0; i < len; ++i) {
399       PetscMPIInt srank;
400       numbering[i] = offset + roots; /* only meaningful if !srank. */
401 
402       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
403       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
404       if (!srank) ++roots;
405     }
406   }
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409