xref: /petsc/src/sys/objects/tagm.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 /* ---------------------------------------------------------------- */
3 /*
4    A simple way to manage tags inside a communicator.
5 
6    It uses the attributes to determine if a new communicator
7       is needed and to store the available tags.
8 
9 */
10 
11 /*@C
12     PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
13     processors that share the object MUST call this routine EXACTLY the same
14     number of times.  This tag should only be used with the current objects
15     communicator; do NOT use it with any other MPI communicator.
16 
17     Collective on PetscObject
18 
19     Input Parameter:
20 .   obj - the PETSc object; this must be cast with a (PetscObject), for example,
21          PetscObjectGetNewTag((PetscObject)mat,&tag);
22 
23     Output Parameter:
24 .   tag - the new tag
25 
26     Level: developer
27 
28 .seealso: `PetscCommGetNewTag()`
29 @*/
30 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag) {
31   PetscFunctionBegin;
32   PetscCall(PetscCommGetNewTag(obj->comm, tag));
33   PetscFunctionReturn(0);
34 }
35 
36 /*@
37     PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
38     processors that share the communicator MUST call this routine EXACTLY the same
39     number of times.  This tag should only be used with the current objects
40     communicator; do NOT use it with any other MPI communicator.
41 
42     Collective
43 
44     Input Parameter:
45 .   comm - the MPI communicator
46 
47     Output Parameter:
48 .   tag - the new tag
49 
50     Level: developer
51 
52 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
53 @*/
54 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag) {
55   PetscCommCounter *counter;
56   PetscMPIInt      *maxval, flg;
57 
58   PetscFunctionBegin;
59   PetscValidIntPointer(tag, 2);
60 
61   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
62   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
63 
64   if (counter->tag < 1) {
65     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
66     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
67     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
68     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
69   }
70 
71   *tag = counter->tag--;
72   if (PetscDefined(USE_DEBUG)) {
73     /*
74      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
75      */
76     PetscCallMPI(MPI_Barrier(comm));
77   }
78   PetscFunctionReturn(0);
79 }
80 
81 /*@C
82   PetscCommGetComm - get an MPI communicator from a PETSc communicator that can be passed off to another package
83 
84   Collective
85 
86   Input Parameter:
87 . comm_in - Input communicator
88 
89   Output Parameters:
90 . comm_out - Output communicator
91 
92   Notes:
93     Use PetscCommRestoreComm() to return the communicator when the external package no longer needs it
94 
95     Certain MPI implementations have MPI_Comm_free() that do not work, thus one can run out of available MPI communicators causing
96     mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
97     are no longer needed.
98 
99 Level: developer
100 
101 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
102 @*/
103 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out) {
104   PetscCommCounter *counter;
105   PetscMPIInt       flg;
106 
107   PetscFunctionBegin;
108   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
109   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
110   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
111 
112   if (counter->comms) {
113     struct PetscCommStash *pcomms = counter->comms;
114 
115     *comm_out      = pcomms->comm;
116     counter->comms = pcomms->next;
117     PetscCall(PetscFree(pcomms));
118     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
119   } else {
120     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
121   }
122   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
123   PetscFunctionReturn(0);
124 }
125 
126 /*@C
127   PetscCommRestoreComm - restores an MPI communicator that was obtained with PetscCommGetComm()
128 
129   Collective
130 
131   Input Parameters:
132 +  comm_in - Input communicator
133 -  comm_out - returned communicator
134 
135 Level: developer
136 
137 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
138 @*/
139 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out) {
140   PetscCommCounter      *counter;
141   PetscMPIInt            flg;
142   struct PetscCommStash *pcomms, *ncomm;
143 
144   PetscFunctionBegin;
145   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
146   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
147   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
148 
149   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
150   ncomm->comm = *comm_out;
151   ncomm->next = NULL;
152   pcomms      = counter->comms;
153   while (pcomms && pcomms->next) pcomms = pcomms->next;
154   if (pcomms) {
155     pcomms->next = ncomm;
156   } else {
157     counter->comms = ncomm;
158   }
159   *comm_out = 0;
160   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
161   PetscFunctionReturn(0);
162 }
163 
164 /*@C
165   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
166 
167   Collective
168 
169   Input Parameter:
170 . comm_in - Input communicator
171 
172   Output Parameters:
173 + comm_out - Output communicator.  May be comm_in.
174 - first_tag - Tag available that has not already been used with this communicator (you may
175   pass in NULL if you do not need a tag)
176 
177   PETSc communicators are just regular MPI communicators that keep track of which
178   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
179   a PETSc creation routine it will attach a private communicator for use in the objects communications.
180   The internal MPI_Comm is used to perform all the MPI calls for PETSc, the outer MPI_Comm is a user
181   level MPI_Comm that may be performing communication for the user or other library and so IS NOT used by PETSc.
182 
183 Level: developer
184 
185 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
186 @*/
187 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag) {
188   PetscCommCounter *counter;
189   PetscMPIInt      *maxval, flg;
190 
191   PetscFunctionBegin;
192   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
193   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
194 
195   if (!flg) { /* this is NOT a PETSc comm */
196     union
197     {
198       MPI_Comm comm;
199       void    *ptr;
200     } ucomm;
201     /* check if this communicator has a PETSc communicator embedded in it */
202     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
203     if (!flg) {
204       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
205       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
206       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
207       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
208       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
209       counter->tag = *maxval;
210       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
211       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
212 
213       /* save PETSc communicator inside user communicator, so we can get it next time */
214       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
215       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
216       ucomm.comm = comm_in;
217       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
218     } else {
219       *comm_out = ucomm.comm;
220       /* pull out the inner MPI_Comm and hand it back to the caller */
221       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
222       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
223       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
224     }
225   } else *comm_out = comm_in;
226 
227   if (PetscDefined(USE_DEBUG)) {
228     /*
229      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
230      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
231      ALL processes that share a communicator MUST shared objects created from that communicator.
232      */
233     PetscCallMPI(MPI_Barrier(comm_in));
234   }
235 
236   if (counter->tag < 1) {
237     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
238     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
239     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
240     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
241   }
242 
243   if (first_tag) *first_tag = counter->tag--;
244 
245   counter->refcount++; /* number of references to this comm */
246   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
247   PetscFunctionReturn(0);
248 }
249 
250 /*@C
251    PetscCommDestroy - Frees communicator.  Use in conjunction with PetscCommDuplicate().
252 
253    Collective
254 
255    Input Parameter:
256 .  comm - the communicator to free
257 
258    Level: developer
259 
260 .seealso: `PetscCommDuplicate()`
261 @*/
262 PetscErrorCode PetscCommDestroy(MPI_Comm *comm) {
263   PetscCommCounter *counter;
264   PetscMPIInt       flg;
265   MPI_Comm          icomm = *comm, ocomm;
266   union
267   {
268     MPI_Comm comm;
269     void    *ptr;
270   } ucomm;
271 
272   PetscFunctionBegin;
273   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(0);
274   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
275   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
276   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
277     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
278     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
279     icomm = ucomm.comm;
280     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
281     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
282   }
283 
284   counter->refcount--;
285 
286   if (!counter->refcount) {
287     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
288     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
289     if (flg) {
290       ocomm = ucomm.comm;
291       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
292       if (flg) {
293         PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
294       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
295     }
296 
297     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
298     PetscCallMPI(MPI_Comm_free(&icomm));
299   }
300   *comm = MPI_COMM_NULL;
301   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
302   PetscFunctionReturn(0);
303 }
304 
305 /*@C
306     PetscObjectsListGetGlobalNumbering - computes a global numbering
307     of PetscObjects living on subcommunicators of a given communicator.
308 
309     Collective.
310 
311     Input Parameters:
312 +   comm    - MPI_Comm
313 .   len     - local length of objlist
314 -   objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
315               (subcomm ordering is assumed to be deadlock-free)
316 
317     Output Parameters:
318 +   count      - global number of distinct subcommunicators on objlist (may be > len)
319 -   numbering  - global numbers of objlist entries (allocated by user)
320 
321     Level: developer
322 
323 @*/
324 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering) {
325   PetscInt    i, roots, offset;
326   PetscMPIInt size, rank;
327 
328   PetscFunctionBegin;
329   PetscValidPointer(objlist, 3);
330   if (!count && !numbering) PetscFunctionReturn(0);
331 
332   PetscCallMPI(MPI_Comm_size(comm, &size));
333   PetscCallMPI(MPI_Comm_rank(comm, &rank));
334   roots = 0;
335   for (i = 0; i < len; ++i) {
336     PetscMPIInt srank;
337     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
338     /* Am I the root of the i-th subcomm? */
339     if (!srank) ++roots;
340   }
341   if (count) {
342     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
343     PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
344   }
345   if (numbering) {
346     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
347     /*
348       At each subcomm root number all of the subcomms it owns locally
349       and make it global by calculating the shift among all of the roots.
350       The roots are ordered using the comm ordering.
351     */
352     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
353     offset -= roots;
354     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
355     /*
356       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
357       broadcast is collective on the subcomm.
358     */
359     roots = 0;
360     for (i = 0; i < len; ++i) {
361       PetscMPIInt srank;
362       numbering[i] = offset + roots; /* only meaningful if !srank. */
363 
364       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
365       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
366       if (!srank) ++roots;
367     }
368   }
369   PetscFunctionReturn(0);
370 }
371