1 #include <petsc/private/petscimpl.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/hashmapobj.h> 3 #include <petsc/private/garbagecollector.h> 4 5 /* 6 A simple way to manage tags inside a communicator. 7 8 It uses the attributes to determine if a new communicator 9 is needed and to store the available tags. 10 11 */ 12 13 /*@ 14 PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All 15 processors that share the object MUST call this routine EXACTLY the same 16 number of times. This tag should only be used with the current objects 17 communicator; do NOT use it with any other MPI communicator. 18 19 Collective 20 21 Input Parameter: 22 . obj - the PETSc object; this must be cast with a (`PetscObject`), for example, 23 `PetscObjectGetNewTag`((`PetscObject`)mat,&tag); 24 25 Output Parameter: 26 . tag - the new tag 27 28 Level: developer 29 30 Note: 31 This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific 32 object. 33 34 .seealso: `PetscCommGetNewTag()` 35 @*/ 36 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag) 37 { 38 PetscFunctionBegin; 39 PetscCall(PetscCommGetNewTag(obj->comm, tag)); 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 /*@ 44 PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator 45 46 Collective 47 48 Input Parameter: 49 . comm - the MPI communicator 50 51 Output Parameter: 52 . tag - the new tag 53 54 Level: developer 55 56 Notes: 57 All processors that share the communicator MUST call this routine EXACTLY the same number of 58 times. This tag should only be used with the current objects communicator; do NOT use it 59 with any other MPI communicator. 60 61 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()` 62 @*/ 63 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag) 64 { 65 PetscCommCounter *counter; 66 PetscMPIInt *maxval, flg; 67 68 PetscFunctionBegin; 69 PetscAssertPointer(tag, 2); 70 71 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 72 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator"); 73 74 if (counter->tag < 1) { 75 PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount)); 76 PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg)); 77 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB"); 78 counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */ 79 } 80 81 *tag = counter->tag--; 82 if (PetscDefined(USE_DEBUG)) { 83 /* 84 Hanging here means that some processes have called PetscCommGetNewTag() and others have not. 85 */ 86 PetscCallMPI(MPI_Barrier(comm)); 87 } 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 /*@C 92 PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package 93 94 Collective 95 96 Input Parameter: 97 . comm_in - Input communicator 98 99 Output Parameter: 100 . comm_out - Output communicator 101 102 Level: developer 103 104 Notes: 105 Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it 106 107 Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing 108 mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that 109 are no longer needed. 110 111 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()` 112 @*/ 113 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out) 114 { 115 PetscCommCounter *counter; 116 PetscMPIInt flg; 117 118 PetscFunctionBegin; 119 PetscCall(PetscSpinlockLock(&PetscCommSpinLock)); 120 PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg)); 121 PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD"); 122 123 if (counter->comms) { 124 struct PetscCommStash *pcomms = counter->comms; 125 126 *comm_out = pcomms->comm; 127 counter->comms = pcomms->next; 128 PetscCall(PetscFree(pcomms)); 129 PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out)); 130 } else { 131 PetscCallMPI(MPI_Comm_dup(comm_in, comm_out)); 132 } 133 PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /*@C 138 PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()` 139 140 Collective 141 142 Input Parameters: 143 + comm_in - Input communicator 144 - comm_out - returned communicator 145 146 Level: developer 147 148 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()` 149 @*/ 150 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out) 151 { 152 PetscCommCounter *counter; 153 PetscMPIInt flg; 154 struct PetscCommStash *pcomms, *ncomm; 155 156 PetscFunctionBegin; 157 PetscCall(PetscSpinlockLock(&PetscCommSpinLock)); 158 PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg)); 159 PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD"); 160 161 PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm)); 162 ncomm->comm = *comm_out; 163 ncomm->next = NULL; 164 pcomms = counter->comms; 165 while (pcomms && pcomms->next) pcomms = pcomms->next; 166 if (pcomms) { 167 pcomms->next = ncomm; 168 } else { 169 counter->comms = ncomm; 170 } 171 *comm_out = 0; 172 PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock)); 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 /*@C 177 PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator. 178 179 Collective 180 181 Input Parameter: 182 . comm_in - Input communicator 183 184 Output Parameters: 185 + comm_out - Output communicator. May be `comm_in`. 186 - first_tag - Tag available that has not already been used with this communicator (you may pass in `NULL` if you do not need a tag) 187 188 Level: developer 189 190 Note: 191 PETSc communicators are just regular MPI communicators that keep track of which 192 tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into 193 a PETSc creation routine it will attach a private communicator for use in the objects communications. 194 The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user 195 and is not used by PETSc. 196 197 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()` 198 @*/ 199 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag) 200 { 201 PetscInt64 *cidx; 202 PetscCommCounter *counter; 203 PetscMPIInt *maxval, flg; 204 205 PetscFunctionBegin; 206 PetscCall(PetscSpinlockLock(&PetscCommSpinLock)); 207 PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg)); 208 209 if (!flg) { /* this is NOT a PETSc comm */ 210 union 211 { 212 MPI_Comm comm; 213 void *ptr; 214 } ucomm; 215 /* check if this communicator has a PETSc communicator embedded in it */ 216 PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg)); 217 if (!flg) { 218 /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */ 219 PetscCallMPI(MPI_Comm_dup(comm_in, comm_out)); 220 PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg)); 221 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB"); 222 PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */ 223 counter->tag = *maxval; 224 PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter)); 225 /* Add an object creation index to the communicator */ 226 PetscCall(PetscNew(&cidx)); 227 PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_CreationIdx_keyval, cidx)); 228 PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval)); 229 230 /* save PETSc communicator inside user communicator, so we can get it next time */ 231 ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */ 232 PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr)); 233 ucomm.comm = comm_in; 234 PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr)); 235 } else { 236 *comm_out = ucomm.comm; 237 /* pull out the inner MPI_Comm and hand it back to the caller */ 238 PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg)); 239 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 240 PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out)); 241 } 242 } else *comm_out = comm_in; 243 244 if (PetscDefined(USE_DEBUG)) { 245 /* 246 Hanging here means that some processes have called PetscCommDuplicate() and others have not. 247 This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject! 248 ALL processes that share a communicator MUST shared objects created from that communicator. 249 */ 250 PetscCallMPI(MPI_Barrier(comm_in)); 251 } 252 253 if (counter->tag < 1) { 254 PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount)); 255 PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg)); 256 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB"); 257 counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */ 258 } 259 260 if (first_tag) *first_tag = counter->tag--; 261 262 counter->refcount++; /* number of references to this comm */ 263 PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock)); 264 PetscFunctionReturn(PETSC_SUCCESS); 265 } 266 267 /*@C 268 PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`. 269 270 Collective 271 272 Input Parameter: 273 . comm - the communicator to free 274 275 Level: developer 276 277 Notes: 278 Sets `comm` to `NULL` 279 280 The communicator is reference counted so it is only truly removed from the system when its reference count drops to zero 281 282 .seealso: `PetscCommDuplicate()` 283 @*/ 284 PetscErrorCode PetscCommDestroy(MPI_Comm *comm) 285 { 286 PetscInt64 *cidx; 287 PetscCommCounter *counter; 288 PetscMPIInt flg; 289 PetscGarbage garbage; 290 MPI_Comm icomm = *comm, ocomm; 291 union 292 { 293 MPI_Comm comm; 294 void *ptr; 295 } ucomm; 296 297 PetscFunctionBegin; 298 if (*comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS); 299 PetscCall(PetscSpinlockLock(&PetscCommSpinLock)); 300 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg)); 301 if (!flg) { /* not a PETSc comm, check if it has an inner comm */ 302 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg)); 303 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm"); 304 icomm = ucomm.comm; 305 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg)); 306 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory"); 307 } 308 counter->refcount--; 309 if (!counter->refcount) { 310 /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */ 311 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg)); 312 if (flg) { 313 ocomm = ucomm.comm; 314 PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg)); 315 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm); 316 PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval)); 317 } 318 319 /* Remove the object creation index on the communicator */ 320 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_CreationIdx_keyval, &cidx, &flg)); 321 if (flg) { 322 PetscCall(PetscFree(cidx)); 323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have object creation index"); 324 325 /* Remove garbage hashmap set up by garbage collection */ 326 PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Garbage_HMap_keyval, &garbage, &flg)); 327 if (flg) { 328 PetscInt entries = 0; 329 PetscCall(PetscHMapObjGetSize(garbage.map, &entries)); 330 if (entries > 0) PetscCall(PetscGarbageCleanup(icomm)); 331 PetscCall(PetscHMapObjDestroy(&garbage.map)); 332 } 333 334 PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm)); 335 PetscCallMPI(MPI_Comm_free(&icomm)); 336 } 337 *comm = MPI_COMM_NULL; 338 PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock)); 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 /*@C 343 PetscObjectsListGetGlobalNumbering - computes a global numbering 344 of `PetscObject`s living on subcommunicators of a given communicator. 345 346 Collective. 347 348 Input Parameters: 349 + comm - the `MPI_Comm` 350 . len - local length of `objlist` 351 - objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank 352 (subcomm ordering is assumed to be deadlock-free) 353 354 Output Parameters: 355 + count - global number of distinct subcommunicators on objlist (may be > len) 356 - numbering - global numbers of objlist entries (allocated by user) 357 358 Level: developer 359 360 Note: 361 This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles. 362 363 .seealso: `PetscCommDuplicate()`, `PetscObjectDestroy()` 364 @*/ 365 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering) 366 { 367 PetscInt i, roots, offset; 368 PetscMPIInt size, rank; 369 370 PetscFunctionBegin; 371 PetscAssertPointer(objlist, 3); 372 if (!count && !numbering) PetscFunctionReturn(PETSC_SUCCESS); 373 374 PetscCallMPI(MPI_Comm_size(comm, &size)); 375 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 376 roots = 0; 377 for (i = 0; i < len; ++i) { 378 PetscMPIInt srank; 379 PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank)); 380 /* Am I the root of the i-th subcomm? */ 381 if (!srank) ++roots; 382 } 383 if (count) { 384 /* Obtain the sum of all roots -- the global number of distinct subcomms. */ 385 PetscCall(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm)); 386 } 387 if (numbering) { 388 /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */ 389 /* 390 At each subcomm root number all of the subcomms it owns locally 391 and make it global by calculating the shift among all of the roots. 392 The roots are ordered using the comm ordering. 393 */ 394 PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm)); 395 offset -= roots; 396 /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/ 397 /* 398 This is where the assumption of a deadlock-free ordering of the subcomms is assumed: 399 broadcast is collective on the subcomm. 400 */ 401 roots = 0; 402 for (i = 0; i < len; ++i) { 403 PetscMPIInt srank; 404 numbering[i] = offset + roots; /* only meaningful if !srank. */ 405 406 PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank)); 407 PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm)); 408 if (!srank) ++roots; 409 } 410 } 411 PetscFunctionReturn(PETSC_SUCCESS); 412 } 413