xref: /petsc/src/sys/objects/tagm.c (revision 7d3de750dec08ee2edc7d15bcef3046c0443ab7d)
1 #include <petsc/private/petscimpl.h>             /*I    "petscsys.h"   I*/
2 /* ---------------------------------------------------------------- */
3 /*
4    A simple way to manage tags inside a communicator.
5 
6    It uses the attributes to determine if a new communicator
7       is needed and to store the available tags.
8 
9 */
10 
11 /*@C
12     PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
13     processors that share the object MUST call this routine EXACTLY the same
14     number of times.  This tag should only be used with the current objects
15     communicator; do NOT use it with any other MPI communicator.
16 
17     Collective on PetscObject
18 
19     Input Parameter:
20 .   obj - the PETSc object; this must be cast with a (PetscObject), for example,
21          PetscObjectGetNewTag((PetscObject)mat,&tag);
22 
23     Output Parameter:
24 .   tag - the new tag
25 
26     Level: developer
27 
28 .seealso: PetscCommGetNewTag()
29 @*/
30 PetscErrorCode  PetscObjectGetNewTag(PetscObject obj,PetscMPIInt *tag)
31 {
32   PetscErrorCode ierr;
33 
34   PetscFunctionBegin;
35   ierr = PetscCommGetNewTag(obj->comm,tag);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 /*@
40     PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator. All
41     processors that share the communicator MUST call this routine EXACTLY the same
42     number of times.  This tag should only be used with the current objects
43     communicator; do NOT use it with any other MPI communicator.
44 
45     Collective
46 
47     Input Parameter:
48 .   comm - the MPI communicator
49 
50     Output Parameter:
51 .   tag - the new tag
52 
53     Level: developer
54 
55 .seealso: PetscObjectGetNewTag(), PetscCommDuplicate()
56 @*/
57 PetscErrorCode  PetscCommGetNewTag(MPI_Comm comm,PetscMPIInt *tag)
58 {
59   PetscErrorCode   ierr;
60   PetscCommCounter *counter;
61   PetscMPIInt      *maxval,flg;
62 
63   PetscFunctionBegin;
64   PetscValidIntPointer(tag,2);
65 
66   ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
67   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator");
68 
69   if (counter->tag < 1) {
70 
71     ierr = PetscInfo(NULL,"Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n",counter->refcount);CHKERRQ(ierr);
72     ierr = MPI_Comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg);CHKERRMPI(ierr);
73     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
74     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
75   }
76 
77   *tag = counter->tag--;
78   if (PetscDefined(USE_DEBUG)) {
79     /*
80      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
81      */
82     ierr = MPI_Barrier(comm);CHKERRMPI(ierr);
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*@C
88   PetscCommGetComm - get an MPI communicator from a PETSc communicator that can be passed off to another package
89 
90   Collective
91 
92   Input Parameter:
93 . comm_in - Input communicator
94 
95   Output Parameters:
96 . comm_out - Output communicator
97 
98   Notes:
99     Use PetscCommRestoreComm() to return the communicator when the external package no longer needs it
100 
101     Certain MPI implementations have MPI_Comm_free() that do not work, thus one can run out of available MPI communicators causing
102     mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
103     are no longer needed.
104 
105 Level: developer
106 
107 .seealso: PetscObjectGetNewTag(), PetscCommGetNewTag(), PetscCommDestroy(), PetscCommRestoreComm()
108 @*/
109 PetscErrorCode  PetscCommGetComm(MPI_Comm comm_in,MPI_Comm *comm_out)
110 {
111   PetscErrorCode   ierr;
112   PetscCommCounter *counter;
113   PetscMPIInt      flg;
114 
115   PetscFunctionBegin;
116   ierr = PetscSpinlockLock(&PetscCommSpinLock);CHKERRQ(ierr);
117   ierr = MPI_Comm_get_attr(comm_in,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
118   if (!flg) SETERRQ(comm_in,PETSC_ERR_ARG_WRONGSTATE,"Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
119 
120   if (counter->comms) {
121     struct PetscCommStash *pcomms = counter->comms;
122 
123     *comm_out = pcomms->comm;
124     counter->comms = pcomms->next;
125     ierr = PetscFree(pcomms);CHKERRQ(ierr);
126     ierr = PetscInfo(NULL,"Reusing a communicator %ld %ld\n",(long)comm_in,(long)*comm_out);CHKERRQ(ierr);
127   } else {
128     ierr = MPI_Comm_dup(comm_in,comm_out);CHKERRMPI(ierr);
129   }
130   ierr = PetscSpinlockUnlock(&PetscCommSpinLock);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 /*@C
135   PetscCommRestoreComm - restores an MPI communicator that was obtained with PetscCommGetComm()
136 
137   Collective
138 
139   Input Parameters:
140 +  comm_in - Input communicator
141 -  comm_out - returned communicator
142 
143 Level: developer
144 
145 .seealso: PetscObjectGetNewTag(), PetscCommGetNewTag(), PetscCommDestroy(), PetscCommRestoreComm()
146 @*/
147 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in,MPI_Comm *comm_out)
148 {
149   PetscErrorCode        ierr;
150   PetscCommCounter      *counter;
151   PetscMPIInt           flg;
152   struct PetscCommStash *pcomms,*ncomm;
153 
154   PetscFunctionBegin;
155   ierr = PetscSpinlockLock(&PetscCommSpinLock);CHKERRQ(ierr);
156   ierr = MPI_Comm_get_attr(comm_in,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
157   if (!flg) SETERRQ(comm_in,PETSC_ERR_ARG_WRONGSTATE,"Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
158 
159   ierr = PetscMalloc(sizeof(struct PetscCommStash),&ncomm);CHKERRQ(ierr);
160   ncomm->comm = *comm_out;
161   ncomm->next = NULL;
162   pcomms = counter->comms;
163   while (pcomms && pcomms->next) pcomms = pcomms->next;
164   if (pcomms) {
165     pcomms->next   = ncomm;
166   } else {
167     counter->comms = ncomm;
168   }
169   *comm_out = 0;
170   ierr = PetscSpinlockUnlock(&PetscCommSpinLock);CHKERRQ(ierr);
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
176 
177   Collective
178 
179   Input Parameter:
180   . comm_in - Input communicator
181 
182   Output Parameters:
183   + comm_out - Output communicator.  May be comm_in.
184   - first_tag - Tag available that has not already been used with this communicator (you may
185   pass in NULL if you do not need a tag)
186 
187   PETSc communicators are just regular MPI communicators that keep track of which
188   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
189   a PETSc creation routine it will attach a private communicator for use in the objects communications.
190   The internal MPI_Comm is used to perform all the MPI calls for PETSc, the outer MPI_Comm is a user
191   level MPI_Comm that may be performing communication for the user or other library and so IS NOT used by PETSc.
192 
193 Level: developer
194 
195 .seealso: PetscObjectGetNewTag(), PetscCommGetNewTag(), PetscCommDestroy()
196 @*/
197 PetscErrorCode  PetscCommDuplicate(MPI_Comm comm_in,MPI_Comm *comm_out,PetscMPIInt *first_tag)
198 {
199   PetscErrorCode   ierr;
200   PetscCommCounter *counter;
201   PetscMPIInt      *maxval,flg;
202 
203   PetscFunctionBegin;
204   ierr = PetscSpinlockLock(&PetscCommSpinLock);CHKERRQ(ierr);
205   ierr = MPI_Comm_get_attr(comm_in,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
206 
207   if (!flg) {  /* this is NOT a PETSc comm */
208     union {MPI_Comm comm; void *ptr;} ucomm;
209     /* check if this communicator has a PETSc communicator imbedded in it */
210     ierr = MPI_Comm_get_attr(comm_in,Petsc_InnerComm_keyval,&ucomm,&flg);CHKERRMPI(ierr);
211     if (!flg) {
212       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
213       ierr = MPI_Comm_dup(comm_in,comm_out);CHKERRMPI(ierr);
214       ierr = MPI_Comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg);CHKERRMPI(ierr);
215       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
216       ierr = PetscNew(&counter);CHKERRQ(ierr); /* all fields of counter are zero'ed */
217       counter->tag = *maxval;
218       ierr = MPI_Comm_set_attr(*comm_out,Petsc_Counter_keyval,counter);CHKERRMPI(ierr);
219       ierr = PetscInfo(NULL,"Duplicating a communicator %ld %ld max tags = %d\n",(long)comm_in,(long)*comm_out,*maxval);CHKERRQ(ierr);
220 
221       /* save PETSc communicator inside user communicator, so we can get it next time */
222       ucomm.comm = *comm_out;   /* ONLY the comm part of the union is significant. */
223       ierr = MPI_Comm_set_attr(comm_in,Petsc_InnerComm_keyval,ucomm.ptr);CHKERRMPI(ierr);
224       ucomm.comm = comm_in;
225       ierr = MPI_Comm_set_attr(*comm_out,Petsc_OuterComm_keyval,ucomm.ptr);CHKERRMPI(ierr);
226     } else {
227       *comm_out = ucomm.comm;
228       /* pull out the inner MPI_Comm and hand it back to the caller */
229       ierr = MPI_Comm_get_attr(*comm_out,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
230       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
231       ierr = PetscInfo(NULL,"Using internal PETSc communicator %ld %ld\n",(long)comm_in,(long)*comm_out);CHKERRQ(ierr);
232     }
233   } else *comm_out = comm_in;
234 
235   if (PetscDefined(USE_DEBUG)) {
236     /*
237      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
238      This likley means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
239      ALL processes that share a communicator MUST shared objects created from that communicator.
240      */
241     ierr = MPI_Barrier(comm_in);CHKERRMPI(ierr);
242   }
243 
244   if (counter->tag < 1) {
245     ierr = PetscInfo(NULL,"Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n",counter->refcount);CHKERRQ(ierr);
246     ierr = MPI_Comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,&maxval,&flg);CHKERRMPI(ierr);
247     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
248     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
249   }
250 
251   if (first_tag) *first_tag = counter->tag--;
252 
253   counter->refcount++; /* number of references to this comm */
254   ierr = PetscSpinlockUnlock(&PetscCommSpinLock);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 /*@C
259    PetscCommDestroy - Frees communicator.  Use in conjunction with PetscCommDuplicate().
260 
261    Collective
262 
263    Input Parameter:
264 .  comm - the communicator to free
265 
266    Level: developer
267 
268 .seealso:   PetscCommDuplicate()
269 @*/
270 PetscErrorCode  PetscCommDestroy(MPI_Comm *comm)
271 {
272   PetscErrorCode   ierr;
273   PetscCommCounter *counter;
274   PetscMPIInt      flg;
275   MPI_Comm         icomm = *comm,ocomm;
276   union {MPI_Comm comm; void *ptr;} ucomm;
277 
278   PetscFunctionBegin;
279   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(0);
280   ierr = PetscSpinlockLock(&PetscCommSpinLock);CHKERRQ(ierr);
281   ierr = MPI_Comm_get_attr(icomm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
282   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
283     ierr = MPI_Comm_get_attr(icomm,Petsc_InnerComm_keyval,&ucomm,&flg);CHKERRMPI(ierr);
284     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
285     icomm = ucomm.comm;
286     ierr = MPI_Comm_get_attr(icomm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr);
287     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
288   }
289 
290   counter->refcount--;
291 
292   if (!counter->refcount) {
293     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
294     ierr = MPI_Comm_get_attr(icomm,Petsc_OuterComm_keyval,&ucomm,&flg);CHKERRMPI(ierr);
295     if (flg) {
296       ocomm = ucomm.comm;
297       ierr = MPI_Comm_get_attr(ocomm,Petsc_InnerComm_keyval,&ucomm,&flg);CHKERRMPI(ierr);
298       if (flg) {
299         ierr = MPI_Comm_delete_attr(ocomm,Petsc_InnerComm_keyval);CHKERRMPI(ierr);
300       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory",(long int)ocomm,(long int)icomm);
301     }
302 
303     ierr = PetscInfo(NULL,"Deleting PETSc MPI_Comm %ld\n",(long)icomm);CHKERRQ(ierr);
304     ierr = MPI_Comm_free(&icomm);CHKERRMPI(ierr);
305   }
306   *comm = MPI_COMM_NULL;
307   ierr = PetscSpinlockUnlock(&PetscCommSpinLock);CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 /*@C
312     PetscObjectsListGetGlobalNumbering - computes a global numbering
313     of PetscObjects living on subcommunicators of a given communicator.
314 
315     Collective.
316 
317     Input Parameters:
318 +   comm    - MPI_Comm
319 .   len     - local length of objlist
320 -   objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
321               (subcomm ordering is assumed to be deadlock-free)
322 
323     Output Parameters:
324 +   count      - global number of distinct subcommunicators on objlist (may be > len)
325 -   numbering  - global numbers of objlist entries (allocated by user)
326 
327     Level: developer
328 
329 @*/
330 PetscErrorCode  PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering)
331 {
332   PetscErrorCode ierr;
333   PetscInt       i, roots, offset;
334   PetscMPIInt    size, rank;
335 
336   PetscFunctionBegin;
337   PetscValidPointer(objlist,3);
338   if (!count && !numbering) PetscFunctionReturn(0);
339 
340   ierr  = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
341   ierr  = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
342   roots = 0;
343   for (i = 0; i < len; ++i) {
344     PetscMPIInt srank;
345     ierr = MPI_Comm_rank(objlist[i]->comm, &srank);CHKERRMPI(ierr);
346     /* Am I the root of the i-th subcomm? */
347     if (!srank) ++roots;
348   }
349   if (count) {
350     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
351     ierr = MPIU_Allreduce(&roots,count,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
352   }
353   if (numbering) {
354     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
355     /*
356       At each subcomm root number all of the subcomms it owns locally
357       and make it global by calculating the shift among all of the roots.
358       The roots are ordered using the comm ordering.
359     */
360     ierr    = MPI_Scan(&roots,&offset,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
361     offset -= roots;
362     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
363     /*
364       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
365       broadcast is collective on the subcomm.
366     */
367     roots = 0;
368     for (i = 0; i < len; ++i) {
369       PetscMPIInt srank;
370       numbering[i] = offset + roots; /* only meaningful if !srank. */
371 
372       ierr = MPI_Comm_rank(objlist[i]->comm, &srank);CHKERRMPI(ierr);
373       ierr = MPI_Bcast(numbering+i,1,MPIU_INT,0,objlist[i]->comm);CHKERRMPI(ierr);
374       if (!srank) ++roots;
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380