xref: /petsc/src/sys/objects/tagm.c (revision 1179163e0bf5c4dd309079707fd3c0dfe8d44eee)
1 #include <petsc/private/petscimpl.h>             /*I    "petscsys.h"   I*/
2 /* ---------------------------------------------------------------- */
3 /*
4    A simple way to manage tags inside a communicator.
5 
6    It uses the attributes to determine if a new communicator
7       is needed and to store the available tags.
8 
9 */
10 
11 /*@C
12     PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
13     processors that share the object MUST call this routine EXACTLY the same
14     number of times.  This tag should only be used with the current objects
15     communicator; do NOT use it with any other MPI communicator.
16 
17     Collective on PetscObject
18 
19     Input Parameter:
20 .   obj - the PETSc object; this must be cast with a (PetscObject), for example,
21          PetscObjectGetNewTag((PetscObject)mat,&tag);
22 
23     Output Parameter:
24 .   tag - the new tag
25 
26     Level: developer
27 
28 .seealso: `PetscCommGetNewTag()`
29 @*/
30 PetscErrorCode  PetscObjectGetNewTag(PetscObject obj,PetscMPIInt *tag)
31 {
32   PetscFunctionBegin;
33   PetscCall(PetscCommGetNewTag(obj->comm,tag));
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38     PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
39     processors that share the communicator MUST call this routine EXACTLY the same
40     number of times.  This tag should only be used with the current objects
41     communicator; do NOT use it with any other MPI communicator.
42 
43     Collective
44 
45     Input Parameter:
46 .   comm - the MPI communicator
47 
48     Output Parameter:
49 .   tag - the new tag
50 
51     Level: developer
52 
53 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
54 @*/
55 PetscErrorCode  PetscCommGetNewTag(MPI_Comm comm,PetscMPIInt *tag)
56 {
57   PetscCommCounter *counter;
58   PetscMPIInt      *maxval,flg;
59 
60   PetscFunctionBegin;
61   PetscValidIntPointer(tag,2);
62 
63   PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg));
64   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator");
65 
66   if (counter->tag < 1) {
67 
68     PetscCall(PetscInfo(NULL,"Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n",counter->refcount));
69     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg));
70     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
71     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
72   }
73 
74   *tag = counter->tag--;
75   if (PetscDefined(USE_DEBUG)) {
76     /*
77      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
78      */
79     PetscCallMPI(MPI_Barrier(comm));
80   }
81   PetscFunctionReturn(0);
82 }
83 
84 /*@C
85   PetscCommGetComm - get an MPI communicator from a PETSc communicator that can be passed off to another package
86 
87   Collective
88 
89   Input Parameter:
90 . comm_in - Input communicator
91 
92   Output Parameters:
93 . comm_out - Output communicator
94 
95   Notes:
96     Use PetscCommRestoreComm() to return the communicator when the external package no longer needs it
97 
98     Certain MPI implementations have MPI_Comm_free() that do not work, thus one can run out of available MPI communicators causing
99     mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
100     are no longer needed.
101 
102 Level: developer
103 
104 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
105 @*/
106 PetscErrorCode  PetscCommGetComm(MPI_Comm comm_in,MPI_Comm *comm_out)
107 {
108   PetscCommCounter *counter;
109   PetscMPIInt      flg;
110 
111   PetscFunctionBegin;
112   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
113   PetscCallMPI(MPI_Comm_get_attr(comm_in,Petsc_Counter_keyval,&counter,&flg));
114   PetscCheck(flg,comm_in,PETSC_ERR_ARG_WRONGSTATE,"Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
115 
116   if (counter->comms) {
117     struct PetscCommStash *pcomms = counter->comms;
118 
119     *comm_out = pcomms->comm;
120     counter->comms = pcomms->next;
121     PetscCall(PetscFree(pcomms));
122     PetscCall(PetscInfo(NULL,"Reusing a communicator %ld %ld\n",(long)comm_in,(long)*comm_out));
123   } else {
124     PetscCallMPI(MPI_Comm_dup(comm_in,comm_out));
125   }
126   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
127   PetscFunctionReturn(0);
128 }
129 
130 /*@C
131   PetscCommRestoreComm - restores an MPI communicator that was obtained with PetscCommGetComm()
132 
133   Collective
134 
135   Input Parameters:
136 +  comm_in - Input communicator
137 -  comm_out - returned communicator
138 
139 Level: developer
140 
141 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
142 @*/
143 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in,MPI_Comm *comm_out)
144 {
145   PetscCommCounter      *counter;
146   PetscMPIInt           flg;
147   struct PetscCommStash *pcomms,*ncomm;
148 
149   PetscFunctionBegin;
150   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
151   PetscCallMPI(MPI_Comm_get_attr(comm_in,Petsc_Counter_keyval,&counter,&flg));
152   PetscCheck(flg,comm_in,PETSC_ERR_ARG_WRONGSTATE,"Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
153 
154   PetscCall(PetscMalloc(sizeof(struct PetscCommStash),&ncomm));
155   ncomm->comm = *comm_out;
156   ncomm->next = NULL;
157   pcomms = counter->comms;
158   while (pcomms && pcomms->next) pcomms = pcomms->next;
159   if (pcomms) {
160     pcomms->next   = ncomm;
161   } else {
162     counter->comms = ncomm;
163   }
164   *comm_out = 0;
165   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
166   PetscFunctionReturn(0);
167 }
168 
169 /*@C
170   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
171 
172   Collective
173 
174   Input Parameter:
175 . comm_in - Input communicator
176 
177   Output Parameters:
178 + comm_out - Output communicator.  May be comm_in.
179 - first_tag - Tag available that has not already been used with this communicator (you may
180   pass in NULL if you do not need a tag)
181 
182   PETSc communicators are just regular MPI communicators that keep track of which
183   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
184   a PETSc creation routine it will attach a private communicator for use in the objects communications.
185   The internal MPI_Comm is used to perform all the MPI calls for PETSc, the outer MPI_Comm is a user
186   level MPI_Comm that may be performing communication for the user or other library and so IS NOT used by PETSc.
187 
188 Level: developer
189 
190 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
191 @*/
192 PetscErrorCode  PetscCommDuplicate(MPI_Comm comm_in,MPI_Comm *comm_out,PetscMPIInt *first_tag)
193 {
194   PetscCommCounter *counter;
195   PetscMPIInt      *maxval,flg;
196 
197   PetscFunctionBegin;
198   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
199   PetscCallMPI(MPI_Comm_get_attr(comm_in,Petsc_Counter_keyval,&counter,&flg));
200 
201   if (!flg) {  /* this is NOT a PETSc comm */
202     union {MPI_Comm comm; void *ptr;} ucomm;
203     /* check if this communicator has a PETSc communicator embedded in it */
204     PetscCallMPI(MPI_Comm_get_attr(comm_in,Petsc_InnerComm_keyval,&ucomm,&flg));
205     if (!flg) {
206       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
207       PetscCallMPI(MPI_Comm_dup(comm_in,comm_out));
208       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg));
209       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
210       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
211       counter->tag = *maxval;
212       PetscCallMPI(MPI_Comm_set_attr(*comm_out,Petsc_Counter_keyval,counter));
213       PetscCall(PetscInfo(NULL,"Duplicating a communicator %ld %ld max tags = %d\n",(long)comm_in,(long)*comm_out,*maxval));
214 
215       /* save PETSc communicator inside user communicator, so we can get it next time */
216       ucomm.comm = *comm_out;   /* ONLY the comm part of the union is significant. */
217       PetscCallMPI(MPI_Comm_set_attr(comm_in,Petsc_InnerComm_keyval,ucomm.ptr));
218       ucomm.comm = comm_in;
219       PetscCallMPI(MPI_Comm_set_attr(*comm_out,Petsc_OuterComm_keyval,ucomm.ptr));
220     } else {
221       *comm_out = ucomm.comm;
222       /* pull out the inner MPI_Comm and hand it back to the caller */
223       PetscCallMPI(MPI_Comm_get_attr(*comm_out,Petsc_Counter_keyval,&counter,&flg));
224       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
225       PetscCall(PetscInfo(NULL,"Using internal PETSc communicator %ld %ld\n",(long)comm_in,(long)*comm_out));
226     }
227   } else *comm_out = comm_in;
228 
229   if (PetscDefined(USE_DEBUG)) {
230     /*
231      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
232      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
233      ALL processes that share a communicator MUST shared objects created from that communicator.
234      */
235     PetscCallMPI(MPI_Barrier(comm_in));
236   }
237 
238   if (counter->tag < 1) {
239     PetscCall(PetscInfo(NULL,"Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n",counter->refcount));
240     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg));
241     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
242     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
243   }
244 
245   if (first_tag) *first_tag = counter->tag--;
246 
247   counter->refcount++; /* number of references to this comm */
248   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
249   PetscFunctionReturn(0);
250 }
251 
252 /*@C
253    PetscCommDestroy - Frees communicator.  Use in conjunction with PetscCommDuplicate().
254 
255    Collective
256 
257    Input Parameter:
258 .  comm - the communicator to free
259 
260    Level: developer
261 
262 .seealso: `PetscCommDuplicate()`
263 @*/
264 PetscErrorCode  PetscCommDestroy(MPI_Comm *comm)
265 {
266   PetscCommCounter *counter;
267   PetscMPIInt      flg;
268   MPI_Comm         icomm = *comm,ocomm;
269   union {MPI_Comm comm; void *ptr;} ucomm;
270 
271   PetscFunctionBegin;
272   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(0);
273   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
274   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_Counter_keyval,&counter,&flg));
275   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
276     PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_InnerComm_keyval,&ucomm,&flg));
277     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
278     icomm = ucomm.comm;
279     PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_Counter_keyval,&counter,&flg));
280     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
281   }
282 
283   counter->refcount--;
284 
285   if (!counter->refcount) {
286     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
287     PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_OuterComm_keyval,&ucomm,&flg));
288     if (flg) {
289       ocomm = ucomm.comm;
290       PetscCallMPI(MPI_Comm_get_attr(ocomm,Petsc_InnerComm_keyval,&ucomm,&flg));
291       if (flg) {
292         PetscCallMPI(MPI_Comm_delete_attr(ocomm,Petsc_InnerComm_keyval));
293       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory",(long int)ocomm,(long int)icomm);
294     }
295 
296     PetscCall(PetscInfo(NULL,"Deleting PETSc MPI_Comm %ld\n",(long)icomm));
297     PetscCallMPI(MPI_Comm_free(&icomm));
298   }
299   *comm = MPI_COMM_NULL;
300   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
301   PetscFunctionReturn(0);
302 }
303 
304 /*@C
305     PetscObjectsListGetGlobalNumbering - computes a global numbering
306     of PetscObjects living on subcommunicators of a given communicator.
307 
308     Collective.
309 
310     Input Parameters:
311 +   comm    - MPI_Comm
312 .   len     - local length of objlist
313 -   objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
314               (subcomm ordering is assumed to be deadlock-free)
315 
316     Output Parameters:
317 +   count      - global number of distinct subcommunicators on objlist (may be > len)
318 -   numbering  - global numbers of objlist entries (allocated by user)
319 
320     Level: developer
321 
322 @*/
323 PetscErrorCode  PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
324 {
325   PetscInt       i, roots, offset;
326   PetscMPIInt    size, rank;
327 
328   PetscFunctionBegin;
329   PetscValidPointer(objlist,3);
330   if (!count && !numbering) PetscFunctionReturn(0);
331 
332   PetscCallMPI(MPI_Comm_size(comm, &size));
333   PetscCallMPI(MPI_Comm_rank(comm, &rank));
334   roots = 0;
335   for (i = 0; i < len; ++i) {
336     PetscMPIInt srank;
337     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
338     /* Am I the root of the i-th subcomm? */
339     if (!srank) ++roots;
340   }
341   if (count) {
342     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
343     PetscCall(MPIU_Allreduce(&roots,count,1,MPIU_INT,MPI_SUM,comm));
344   }
345   if (numbering) {
346     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
347     /*
348       At each subcomm root number all of the subcomms it owns locally
349       and make it global by calculating the shift among all of the roots.
350       The roots are ordered using the comm ordering.
351     */
352     PetscCallMPI(MPI_Scan(&roots,&offset,1,MPIU_INT,MPI_SUM,comm));
353     offset -= roots;
354     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
355     /*
356       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
357       broadcast is collective on the subcomm.
358     */
359     roots = 0;
360     for (i = 0; i < len; ++i) {
361       PetscMPIInt srank;
362       numbering[i] = offset + roots; /* only meaningful if !srank. */
363 
364       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
365       PetscCallMPI(MPI_Bcast(numbering+i,1,MPIU_INT,0,objlist[i]->comm));
366       if (!srank) ++roots;
367     }
368   }
369   PetscFunctionReturn(0);
370 }
371