xref: /petsc/src/sys/objects/tagm.c (revision e056e8ce1cfdaf3750afb193bfd9d5df747ade21)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 #include <petsc/private/hashmapobj.h>
3 #include <petsc/private/garbagecollector.h>
4 
5 /*
6    A simple way to manage tags inside a communicator.
7 
8    It uses the attributes to determine if a new communicator
9       is needed and to store the available tags.
10 
11 */
12 
13 /*@C
14   PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
15   processors that share the object MUST call this routine EXACTLY the same
16   number of times.  This tag should only be used with the current objects
17   communicator; do NOT use it with any other MPI communicator.
18 
19   Collective
20 
21   Input Parameter:
22 . obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
23          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);
24 
25   Output Parameter:
26 . tag - the new tag
27 
28   Level: developer
29 
30   Note:
31   This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
32   object.
33 
34 .seealso: `PetscCommGetNewTag()`
35 @*/
36 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag)
37 {
38   PetscFunctionBegin;
39   PetscCall(PetscCommGetNewTag(obj->comm, tag));
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 /*@
44   PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
45   processors that share the communicator MUST call this routine EXACTLY the same
46   number of times.  This tag should only be used with the current objects
47   communicator; do NOT use it with any other MPI communicator.
48 
49   Collective
50 
51   Input Parameter:
52 . comm - the MPI communicator
53 
54   Output Parameter:
55 . tag - the new tag
56 
57   Level: developer
58 
59 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
60 @*/
61 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag)
62 {
63   PetscCommCounter *counter;
64   PetscMPIInt      *maxval, flg;
65 
66   PetscFunctionBegin;
67   PetscValidIntPointer(tag, 2);
68 
69   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
70   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
71 
72   if (counter->tag < 1) {
73     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
74     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
75     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
76     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
77   }
78 
79   *tag = counter->tag--;
80   if (PetscDefined(USE_DEBUG)) {
81     /*
82      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
83      */
84     PetscCallMPI(MPI_Barrier(comm));
85   }
86   PetscFunctionReturn(PETSC_SUCCESS);
87 }
88 
89 /*@C
90   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package
91 
92   Collective
93 
94   Input Parameter:
95 . comm_in - Input communicator
96 
97   Output Parameter:
98 . comm_out - Output communicator
99 
100   Level: developer
101 
102   Notes:
103   Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it
104 
105   Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
106   mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
107   are no longer needed.
108 
109 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
110 @*/
111 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out)
112 {
113   PetscCommCounter *counter;
114   PetscMPIInt       flg;
115 
116   PetscFunctionBegin;
117   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
118   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
119   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
120 
121   if (counter->comms) {
122     struct PetscCommStash *pcomms = counter->comms;
123 
124     *comm_out      = pcomms->comm;
125     counter->comms = pcomms->next;
126     PetscCall(PetscFree(pcomms));
127     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
128   } else {
129     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
130   }
131   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /*@C
136   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`
137 
138   Collective
139 
140   Input Parameters:
141 + comm_in  - Input communicator
142 - comm_out - returned communicator
143 
144   Level: developer
145 
146 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
147 @*/
148 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out)
149 {
150   PetscCommCounter      *counter;
151   PetscMPIInt            flg;
152   struct PetscCommStash *pcomms, *ncomm;
153 
154   PetscFunctionBegin;
155   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
156   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
157   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
158 
159   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
160   ncomm->comm = *comm_out;
161   ncomm->next = NULL;
162   pcomms      = counter->comms;
163   while (pcomms && pcomms->next) pcomms = pcomms->next;
164   if (pcomms) {
165     pcomms->next = ncomm;
166   } else {
167     counter->comms = ncomm;
168   }
169   *comm_out = 0;
170   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
171   PetscFunctionReturn(PETSC_SUCCESS);
172 }
173 
174 /*@C
175   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
176 
177   Collective
178 
179   Input Parameter:
180 . comm_in - Input communicator
181 
182   Output Parameters:
183 + comm_out  - Output communicator.  May be `comm_in`.
184 - first_tag - Tag available that has not already been used with this communicator (you may pass in `NULL` if you do not need a tag)
185 
186   Level: developer
187 
188   Note:
189   PETSc communicators are just regular MPI communicators that keep track of which
190   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
191   a PETSc creation routine it will attach a private communicator for use in the objects communications.
192   The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
193 
194   Level `Mpi_Comm` That May Be Performing Communication For The User Or Other Library And So Is Not Used By Petsc.
195 
196 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
197 @*/
198 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag)
199 {
200   PetscInt64       *cidx;
201   PetscCommCounter *counter;
202   PetscMPIInt      *maxval, flg;
203 
204   PetscFunctionBegin;
205   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
206   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
207 
208   if (!flg) { /* this is NOT a PETSc comm */
209     union
210     {
211       MPI_Comm comm;
212       void    *ptr;
213     } ucomm;
214     /* check if this communicator has a PETSc communicator embedded in it */
215     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
216     if (!flg) {
217       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
218       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
219       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
220       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
221       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
222       counter->tag = *maxval;
223       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
224       /* Add an object creation index to the communicator */
225       PetscCall(PetscNew(&cidx));
226       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_CreationIdx_keyval, cidx));
227       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
228 
229       /* save PETSc communicator inside user communicator, so we can get it next time */
230       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
231       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
232       ucomm.comm = comm_in;
233       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
234     } else {
235       *comm_out = ucomm.comm;
236       /* pull out the inner MPI_Comm and hand it back to the caller */
237       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
238       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
239       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
240     }
241   } else *comm_out = comm_in;
242 
243   if (PetscDefined(USE_DEBUG)) {
244     /*
245      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
246      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
247      ALL processes that share a communicator MUST shared objects created from that communicator.
248      */
249     PetscCallMPI(MPI_Barrier(comm_in));
250   }
251 
252   if (counter->tag < 1) {
253     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
254     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
255     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
256     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
257   }
258 
259   if (first_tag) *first_tag = counter->tag--;
260 
261   counter->refcount++; /* number of references to this comm */
262   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 /*@C
267   PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.
268 
269   Collective
270 
271   Input Parameter:
272 . comm - the communicator to free
273 
274   Level: developer
275 
276 .seealso: `PetscCommDuplicate()`
277 @*/
278 PetscErrorCode PetscCommDestroy(MPI_Comm *comm)
279 {
280   PetscInt64       *cidx;
281   PetscCommCounter *counter;
282   PetscMPIInt       flg;
283   PetscGarbage      garbage;
284   MPI_Comm          icomm = *comm, ocomm;
285   union
286   {
287     MPI_Comm comm;
288     void    *ptr;
289   } ucomm;
290 
291   PetscFunctionBegin;
292   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
293   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
294   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
295   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
296     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
297     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
298     icomm = ucomm.comm;
299     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
300     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
301   }
302   counter->refcount--;
303   if (!counter->refcount) {
304     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
305     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
306     if (flg) {
307       ocomm = ucomm.comm;
308       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
309       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
310       PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
311     }
312 
313     /* Remove the object creation index on the communicator */
314     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_CreationIdx_keyval, &cidx, &flg));
315     if (flg) {
316       PetscCall(PetscFree(cidx));
317     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have object creation index");
318 
319     /* Remove garbage hashmap set up by garbage collection */
320     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Garbage_HMap_keyval, &garbage, &flg));
321     if (flg) {
322       PetscInt entries = 0;
323       PetscCall(PetscHMapObjGetSize(garbage.map, &entries));
324       if (entries > 0) PetscCall(PetscGarbageCleanup(icomm));
325       PetscCall(PetscHMapObjDestroy(&(garbage.map)));
326     }
327 
328     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
329     PetscCallMPI(MPI_Comm_free(&icomm));
330   }
331   *comm = MPI_COMM_NULL;
332   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 /*@C
337   PetscObjectsListGetGlobalNumbering - computes a global numbering
338   of `PetscObject`s living on subcommunicators of a given communicator.
339 
340   Collective.
341 
342   Input Parameters:
343 + comm    - the `MPI_Comm`
344 . len     - local length of `objlist`
345 - objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
346               (subcomm ordering is assumed to be deadlock-free)
347 
348   Output Parameters:
349 + count     - global number of distinct subcommunicators on objlist (may be > len)
350 - numbering - global numbers of objlist entries (allocated by user)
351 
352   Level: developer
353 
354   Note:
355   This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.
356 
357 @*/
358 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
359 {
360   PetscInt    i, roots, offset;
361   PetscMPIInt size, rank;
362 
363   PetscFunctionBegin;
364   PetscValidPointer(objlist, 3);
365   if (!count && !numbering) PetscFunctionReturn(PETSC_SUCCESS);
366 
367   PetscCallMPI(MPI_Comm_size(comm, &size));
368   PetscCallMPI(MPI_Comm_rank(comm, &rank));
369   roots = 0;
370   for (i = 0; i < len; ++i) {
371     PetscMPIInt srank;
372     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
373     /* Am I the root of the i-th subcomm? */
374     if (!srank) ++roots;
375   }
376   if (count) {
377     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
378     PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
379   }
380   if (numbering) {
381     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
382     /*
383       At each subcomm root number all of the subcomms it owns locally
384       and make it global by calculating the shift among all of the roots.
385       The roots are ordered using the comm ordering.
386     */
387     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
388     offset -= roots;
389     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
390     /*
391       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
392       broadcast is collective on the subcomm.
393     */
394     roots = 0;
395     for (i = 0; i < len; ++i) {
396       PetscMPIInt srank;
397       numbering[i] = offset + roots; /* only meaningful if !srank. */
398 
399       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
400       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
401       if (!srank) ++roots;
402     }
403   }
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406