xref: /petsc/src/sys/objects/tagm.c (revision 0ea77eda237b1068ff0d8bfa28c3463dc2087695)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 /* ---------------------------------------------------------------- */
3 /*
4    A simple way to manage tags inside a communicator.
5 
6    It uses the attributes to determine if a new communicator
7       is needed and to store the available tags.
8 
9 */
10 
11 /*@C
12     PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
13     processors that share the object MUST call this routine EXACTLY the same
14     number of times.  This tag should only be used with the current objects
15     communicator; do NOT use it with any other MPI communicator.
16 
17     Collective on obj
18 
19     Input Parameter:
20 .   obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
21          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);
22 
23     Output Parameter:
24 .   tag - the new tag
25 
26     Level: developer
27 
28     Note:
29     This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
30     object.
31 
32 .seealso: `PetscCommGetNewTag()`
33 @*/
34 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag) {
35   PetscFunctionBegin;
36   PetscCall(PetscCommGetNewTag(obj->comm, tag));
37   PetscFunctionReturn(0);
38 }
39 
40 /*@
41     PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
42     processors that share the communicator MUST call this routine EXACTLY the same
43     number of times.  This tag should only be used with the current objects
44     communicator; do NOT use it with any other MPI communicator.
45 
46     Collective
47 
48     Input Parameter:
49 .   comm - the MPI communicator
50 
51     Output Parameter:
52 .   tag - the new tag
53 
54     Level: developer
55 
56 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
57 @*/
58 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag) {
59   PetscCommCounter *counter;
60   PetscMPIInt      *maxval, flg;
61 
62   PetscFunctionBegin;
63   PetscValidIntPointer(tag, 2);
64 
65   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
66   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
67 
68   if (counter->tag < 1) {
69     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
70     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
71     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
72     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
73   }
74 
75   *tag = counter->tag--;
76   if (PetscDefined(USE_DEBUG)) {
77     /*
78      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
79      */
80     PetscCallMPI(MPI_Barrier(comm));
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 /*@C
86   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package
87 
88   Collective
89 
90   Input Parameter:
91 . comm_in - Input communicator
92 
93   Output Parameters:
94 . comm_out - Output communicator
95 
96   Notes:
97     Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it
98 
99     Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
100     mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
101     are no longer needed.
102 
103 Level: developer
104 
105 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
106 @*/
107 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out) {
108   PetscCommCounter *counter;
109   PetscMPIInt       flg;
110 
111   PetscFunctionBegin;
112   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
113   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
114   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
115 
116   if (counter->comms) {
117     struct PetscCommStash *pcomms = counter->comms;
118 
119     *comm_out      = pcomms->comm;
120     counter->comms = pcomms->next;
121     PetscCall(PetscFree(pcomms));
122     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
123   } else {
124     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
125   }
126   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
127   PetscFunctionReturn(0);
128 }
129 
130 /*@C
131   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`
132 
133   Collective
134 
135   Input Parameters:
136 +  comm_in - Input communicator
137 -  comm_out - returned communicator
138 
139 Level: developer
140 
141 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
142 @*/
143 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out) {
144   PetscCommCounter      *counter;
145   PetscMPIInt            flg;
146   struct PetscCommStash *pcomms, *ncomm;
147 
148   PetscFunctionBegin;
149   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
150   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
151   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
152 
153   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
154   ncomm->comm = *comm_out;
155   ncomm->next = NULL;
156   pcomms      = counter->comms;
157   while (pcomms && pcomms->next) pcomms = pcomms->next;
158   if (pcomms) {
159     pcomms->next = ncomm;
160   } else {
161     counter->comms = ncomm;
162   }
163   *comm_out = 0;
164   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
165   PetscFunctionReturn(0);
166 }
167 
168 /*@C
169    PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
170 
171    Collective
172 
173    Input Parameter:
174 .  comm_in - Input communicator
175 
176    Output Parameters:
177 +  comm_out - Output communicator.  May be comm_in.
178 -  first_tag - Tag available that has not already been used with this communicator (you may pass in NULL if you do not need a tag)
179 
180    Note:
181    PETSc communicators are just regular MPI communicators that keep track of which
182    tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
183    a PETSc creation routine it will attach a private communicator for use in the objects communications.
184    The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
185    level `MPI_Comm` that may be performing communication for the user or other library and so IS NOT used by PETSc.
186 
187 Level: developer
188 
189 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
190 @*/
191 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag) {
192   PetscCommCounter *counter;
193   PetscMPIInt      *maxval, flg;
194 
195   PetscFunctionBegin;
196   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
197   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
198 
199   if (!flg) { /* this is NOT a PETSc comm */
200     union
201     {
202       MPI_Comm comm;
203       void    *ptr;
204     } ucomm;
205     /* check if this communicator has a PETSc communicator embedded in it */
206     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
207     if (!flg) {
208       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
209       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
210       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
211       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
212       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
213       counter->tag = *maxval;
214       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
215       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
216 
217       /* save PETSc communicator inside user communicator, so we can get it next time */
218       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
219       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
220       ucomm.comm = comm_in;
221       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
222     } else {
223       *comm_out = ucomm.comm;
224       /* pull out the inner MPI_Comm and hand it back to the caller */
225       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
226       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
227       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
228     }
229   } else *comm_out = comm_in;
230 
231   if (PetscDefined(USE_DEBUG)) {
232     /*
233      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
234      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
235      ALL processes that share a communicator MUST shared objects created from that communicator.
236      */
237     PetscCallMPI(MPI_Barrier(comm_in));
238   }
239 
240   if (counter->tag < 1) {
241     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
242     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
243     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
244     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
245   }
246 
247   if (first_tag) *first_tag = counter->tag--;
248 
249   counter->refcount++; /* number of references to this comm */
250   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
251   PetscFunctionReturn(0);
252 }
253 
254 /*@C
255    PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.
256 
257    Collective
258 
259    Input Parameter:
260 .  comm - the communicator to free
261 
262    Level: developer
263 
264 .seealso: `PetscCommDuplicate()`
265 @*/
266 PetscErrorCode PetscCommDestroy(MPI_Comm *comm) {
267   PetscCommCounter *counter;
268   PetscMPIInt       flg;
269   MPI_Comm          icomm = *comm, ocomm;
270   union
271   {
272     MPI_Comm comm;
273     void    *ptr;
274   } ucomm;
275 
276   PetscFunctionBegin;
277   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(0);
278   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
279   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
280   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
281     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
282     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
283     icomm = ucomm.comm;
284     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
285     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
286   }
287 
288   counter->refcount--;
289 
290   if (!counter->refcount) {
291     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
292     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
293     if (flg) {
294       ocomm = ucomm.comm;
295       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
296       if (flg) {
297         PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
298       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
299     }
300 
301     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
302     PetscCallMPI(MPI_Comm_free(&icomm));
303   }
304   *comm = MPI_COMM_NULL;
305   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
306   PetscFunctionReturn(0);
307 }
308 
309 /*@C
310     PetscObjectsListGetGlobalNumbering - computes a global numbering
311     of `PetscObject`s living on subcommunicators of a given communicator.
312 
313     Collective.
314 
315     Input Parameters:
316 +   comm    - the `MPI_Comm`
317 .   len     - local length of objlist
318 -   objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
319               (subcomm ordering is assumed to be deadlock-free)
320 
321     Output Parameters:
322 +   count      - global number of distinct subcommunicators on objlist (may be > len)
323 -   numbering  - global numbers of objlist entries (allocated by user)
324 
325     Level: developer
326 
327     Note:
328     This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.
329 
330 @*/
331 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering) {
332   PetscInt    i, roots, offset;
333   PetscMPIInt size, rank;
334 
335   PetscFunctionBegin;
336   PetscValidPointer(objlist, 3);
337   if (!count && !numbering) PetscFunctionReturn(0);
338 
339   PetscCallMPI(MPI_Comm_size(comm, &size));
340   PetscCallMPI(MPI_Comm_rank(comm, &rank));
341   roots = 0;
342   for (i = 0; i < len; ++i) {
343     PetscMPIInt srank;
344     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
345     /* Am I the root of the i-th subcomm? */
346     if (!srank) ++roots;
347   }
348   if (count) {
349     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
350     PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
351   }
352   if (numbering) {
353     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
354     /*
355       At each subcomm root number all of the subcomms it owns locally
356       and make it global by calculating the shift among all of the roots.
357       The roots are ordered using the comm ordering.
358     */
359     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
360     offset -= roots;
361     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
362     /*
363       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
364       broadcast is collective on the subcomm.
365     */
366     roots = 0;
367     for (i = 0; i < len; ++i) {
368       PetscMPIInt srank;
369       numbering[i] = offset + roots; /* only meaningful if !srank. */
370 
371       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
372       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
373       if (!srank) ++roots;
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378