xref: /petsc/src/sys/utils/mpishm.c (revision 811af0c4b09a35de4306c442f88bd09fdc09897d)
15f7487a0SJunchao Zhang #include <petscsys.h> /*I  "petscsys.h"  I*/
25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h>
35f7487a0SJunchao Zhang 
45f7487a0SJunchao Zhang struct _n_PetscShmComm {
55f7487a0SJunchao Zhang   PetscMPIInt *globranks;         /* global ranks of each rank in the shared memory communicator */
65f7487a0SJunchao Zhang   PetscMPIInt  shmsize;           /* size of the shared memory communicator */
75f7487a0SJunchao Zhang   MPI_Comm     globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
85f7487a0SJunchao Zhang };
95f7487a0SJunchao Zhang 
105f7487a0SJunchao Zhang /*
1133779a13SJunchao Zhang    Private routine to delete internal shared memory communicator when a communicator is freed.
125f7487a0SJunchao Zhang 
135f7487a0SJunchao Zhang    This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this  data as an attribute is freed.
145f7487a0SJunchao Zhang 
155f7487a0SJunchao Zhang    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
165f7487a0SJunchao Zhang 
175f7487a0SJunchao Zhang */
189371c9d4SSatish Balay PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state) {
195f7487a0SJunchao Zhang   PetscShmComm p = (PetscShmComm)val;
205f7487a0SJunchao Zhang 
215f7487a0SJunchao Zhang   PetscFunctionBegin;
229566063dSJacob Faibussowitsch   PetscCallMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm));
239566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&p->shmcomm));
249566063dSJacob Faibussowitsch   PetscCallMPI(PetscFree(p->globranks));
259566063dSJacob Faibussowitsch   PetscCallMPI(PetscFree(val));
265f7487a0SJunchao Zhang   PetscFunctionReturn(MPI_SUCCESS);
275f7487a0SJunchao Zhang }
285f7487a0SJunchao Zhang 
29b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
30b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet().
31b48189acSJunchao Zhang   Since we predict communicators passed to PetscShmCommGet() are very likely
32b48189acSJunchao Zhang   either a petsc inner communicator or an MPI communicator with a linked petsc
33b48189acSJunchao Zhang   inner communicator, we use a simple static array to store dupped communicators
34b48189acSJunchao Zhang   on rare cases otherwise.
35b48189acSJunchao Zhang  */
36b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16
37b48189acSJunchao Zhang static PetscInt       num_dupped_comms = 0;
38b48189acSJunchao Zhang static MPI_Comm       shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS];
399371c9d4SSatish Balay static PetscErrorCode PetscShmCommDestroyDuppedComms(void) {
40b48189acSJunchao Zhang   PetscInt i;
41b48189acSJunchao Zhang   PetscFunctionBegin;
429566063dSJacob Faibussowitsch   for (i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i]));
43b48189acSJunchao Zhang   num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */
44b48189acSJunchao Zhang   PetscFunctionReturn(0);
45b48189acSJunchao Zhang }
46b48189acSJunchao Zhang #endif
47b48189acSJunchao Zhang 
485f7487a0SJunchao Zhang /*@C
49*811af0c4SBarry Smith     PetscShmCommGet - Returns a sub-communicator of all ranks that share a common memory
505f7487a0SJunchao Zhang 
51d083f849SBarry Smith     Collective.
525f7487a0SJunchao Zhang 
535f7487a0SJunchao Zhang     Input Parameter:
54*811af0c4SBarry Smith .   globcomm - `MPI_Comm`, which can be a user MPI_Comm or a PETSc inner MPI_Comm
555f7487a0SJunchao Zhang 
565f7487a0SJunchao Zhang     Output Parameter:
575f7487a0SJunchao Zhang .   pshmcomm - the PETSc shared memory communicator object
585f7487a0SJunchao Zhang 
595f7487a0SJunchao Zhang     Level: developer
605f7487a0SJunchao Zhang 
61*811af0c4SBarry Smith     Note:
625f7487a0SJunchao Zhang        When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis
635f7487a0SJunchao Zhang 
64*811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()`
655f7487a0SJunchao Zhang @*/
669371c9d4SSatish Balay PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm) {
675f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
685f7487a0SJunchao Zhang   MPI_Group         globgroup, shmgroup;
695f7487a0SJunchao Zhang   PetscMPIInt      *shmranks, i, flg;
705f7487a0SJunchao Zhang   PetscCommCounter *counter;
715f7487a0SJunchao Zhang 
725f7487a0SJunchao Zhang   PetscFunctionBegin;
733ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 2);
74b48189acSJunchao Zhang   /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */
759566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg));
76b48189acSJunchao Zhang   if (!flg) { /* globcomm is not a petsc comm */
779371c9d4SSatish Balay     union
789371c9d4SSatish Balay     {
799371c9d4SSatish Balay       MPI_Comm comm;
809371c9d4SSatish Balay       void    *ptr;
819371c9d4SSatish Balay     } ucomm;
82b48189acSJunchao Zhang     /* check if globcomm already has a linked petsc inner comm */
839566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg));
84b48189acSJunchao Zhang     if (!flg) {
85b48189acSJunchao Zhang       /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */
8608401ef6SPierre Jolivet       PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS);
879566063dSJacob Faibussowitsch       PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL));
88b48189acSJunchao Zhang       /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */
899566063dSJacob Faibussowitsch       if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms));
90b48189acSJunchao Zhang       shmcomm_dupped_comms[num_dupped_comms] = globcomm;
91b48189acSJunchao Zhang       num_dupped_comms++;
92b48189acSJunchao Zhang     } else {
93b48189acSJunchao Zhang       /* otherwise, we pull out the inner comm and use it as globcomm */
94b48189acSJunchao Zhang       globcomm = ucomm.comm;
95b48189acSJunchao Zhang     }
96b48189acSJunchao Zhang   }
975f7487a0SJunchao Zhang 
98b48189acSJunchao Zhang   /* Check if globcomm already has an attached pshmcomm. If no, create one */
999566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg));
1005f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
1015f7487a0SJunchao Zhang 
1029566063dSJacob Faibussowitsch   PetscCall(PetscNew(pshmcomm));
1035f7487a0SJunchao Zhang   (*pshmcomm)->globcomm = globcomm;
1045f7487a0SJunchao Zhang 
1059566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm));
1065f7487a0SJunchao Zhang 
1079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize));
1089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group(globcomm, &globgroup));
1099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup));
1109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks));
1119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks));
1125f7487a0SJunchao Zhang   for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i;
1139566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks));
1149566063dSJacob Faibussowitsch   PetscCall(PetscFree(shmranks));
1159566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&globgroup));
1169566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&shmgroup));
1175f7487a0SJunchao Zhang 
11848a46eb9SPierre Jolivet   for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i]));
1199566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm));
1205f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1215f7487a0SJunchao Zhang #else
1225f7487a0SJunchao Zhang   SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
1235f7487a0SJunchao Zhang #endif
1245f7487a0SJunchao Zhang }
1255f7487a0SJunchao Zhang 
1265f7487a0SJunchao Zhang /*@C
1275f7487a0SJunchao Zhang     PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
1285f7487a0SJunchao Zhang 
1295f7487a0SJunchao Zhang     Input Parameters:
1305f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
1315f7487a0SJunchao Zhang -   grank    - the global rank
1325f7487a0SJunchao Zhang 
1335f7487a0SJunchao Zhang     Output Parameter:
134*811af0c4SBarry Smith .   lrank - the local rank, or `MPI_PROC_NULL` if it does not exist
1355f7487a0SJunchao Zhang 
1365f7487a0SJunchao Zhang     Level: developer
1375f7487a0SJunchao Zhang 
1385f7487a0SJunchao Zhang     Developer Notes:
1395f7487a0SJunchao Zhang     Assumes the pshmcomm->globranks[] is sorted
1405f7487a0SJunchao Zhang 
1415f7487a0SJunchao Zhang     It may be better to rewrite this to map multiple global ranks to local in the same function call
1425f7487a0SJunchao Zhang 
143*811af0c4SBarry Smith .seealso: `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()`
1445f7487a0SJunchao Zhang @*/
1459371c9d4SSatish Balay PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank) {
1465f7487a0SJunchao Zhang   PetscMPIInt low, high, t, i;
1475f7487a0SJunchao Zhang   PetscBool   flg = PETSC_FALSE;
1485f7487a0SJunchao Zhang 
1495f7487a0SJunchao Zhang   PetscFunctionBegin;
1503ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 1);
151dadcf809SJacob Faibussowitsch   PetscValidIntPointer(lrank, 3);
1525f7487a0SJunchao Zhang   *lrank = MPI_PROC_NULL;
1535f7487a0SJunchao Zhang   if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0);
1545f7487a0SJunchao Zhang   if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(0);
1559566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL));
1565f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
1575f7487a0SJunchao Zhang   low  = 0;
1585f7487a0SJunchao Zhang   high = pshmcomm->shmsize;
1595f7487a0SJunchao Zhang   while (high - low > 5) {
1605f7487a0SJunchao Zhang     t = (low + high) / 2;
1615f7487a0SJunchao Zhang     if (pshmcomm->globranks[t] > grank) high = t;
1625f7487a0SJunchao Zhang     else low = t;
1635f7487a0SJunchao Zhang   }
1645f7487a0SJunchao Zhang   for (i = low; i < high; i++) {
1655f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0);
1665f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] == grank) {
1675f7487a0SJunchao Zhang       *lrank = i;
1685f7487a0SJunchao Zhang       PetscFunctionReturn(0);
1695f7487a0SJunchao Zhang     }
1705f7487a0SJunchao Zhang   }
1715f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1725f7487a0SJunchao Zhang }
1735f7487a0SJunchao Zhang 
1745f7487a0SJunchao Zhang /*@C
1755f7487a0SJunchao Zhang     PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
1765f7487a0SJunchao Zhang 
1775f7487a0SJunchao Zhang     Input Parameters:
1785f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
1795f7487a0SJunchao Zhang -   lrank    - the local rank in the shared memory communicator
1805f7487a0SJunchao Zhang 
1815f7487a0SJunchao Zhang     Output Parameter:
1825f7487a0SJunchao Zhang .   grank - the global rank in the global communicator where the shared memory communicator is built
1835f7487a0SJunchao Zhang 
1845f7487a0SJunchao Zhang     Level: developer
1855f7487a0SJunchao Zhang 
186*811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommGetMpiShmComm()`
1875f7487a0SJunchao Zhang @*/
1889371c9d4SSatish Balay PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank) {
1895f7487a0SJunchao Zhang   PetscFunctionBegin;
1903ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 1);
191dadcf809SJacob Faibussowitsch   PetscValidIntPointer(grank, 3);
1922c71b3e2SJacob Faibussowitsch   PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank);
1935f7487a0SJunchao Zhang   *grank = pshmcomm->globranks[lrank];
1945f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1955f7487a0SJunchao Zhang }
1965f7487a0SJunchao Zhang 
1975f7487a0SJunchao Zhang /*@C
1985f7487a0SJunchao Zhang     PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
1995f7487a0SJunchao Zhang 
2005f7487a0SJunchao Zhang     Input Parameter:
2015f7487a0SJunchao Zhang .   pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
2025f7487a0SJunchao Zhang 
2035f7487a0SJunchao Zhang     Output Parameter:
2045f7487a0SJunchao Zhang .   comm     - the MPI communicator
2055f7487a0SJunchao Zhang 
2065f7487a0SJunchao Zhang     Level: developer
2075f7487a0SJunchao Zhang 
208*811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`
2095f7487a0SJunchao Zhang @*/
2109371c9d4SSatish Balay PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm) {
2115f7487a0SJunchao Zhang   PetscFunctionBegin;
2123ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 1);
2133ca90d2dSJacob Faibussowitsch   PetscValidPointer(comm, 2);
2145f7487a0SJunchao Zhang   *comm = pshmcomm->shmcomm;
2155f7487a0SJunchao Zhang   PetscFunctionReturn(0);
2165f7487a0SJunchao Zhang }
2175f7487a0SJunchao Zhang 
21820b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
219a32e93adSJunchao Zhang #include <pthread.h>
220a32e93adSJunchao Zhang #include <hwloc.h>
221a32e93adSJunchao Zhang #include <omp.h>
222a32e93adSJunchao Zhang 
223eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available,
224eff715bbSJunchao Zhang    otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much
2254df5c2c7SJunchao Zhang    simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance
2264df5c2c7SJunchao Zhang    by 50%. Until the reason is found out, we use mmap() instead.
2274df5c2c7SJunchao Zhang */
2284df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY
2294df5c2c7SJunchao Zhang 
2304df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2314df5c2c7SJunchao Zhang #include <sys/mman.h>
2324df5c2c7SJunchao Zhang #include <sys/types.h>
2334df5c2c7SJunchao Zhang #include <sys/stat.h>
2344df5c2c7SJunchao Zhang #include <fcntl.h>
2354df5c2c7SJunchao Zhang #endif
2364df5c2c7SJunchao Zhang 
237a32e93adSJunchao Zhang struct _n_PetscOmpCtrl {
238a32e93adSJunchao Zhang   MPI_Comm           omp_comm;        /* a shared memory communicator to spawn omp threads */
239a32e93adSJunchao Zhang   MPI_Comm           omp_master_comm; /* a communicator to give to third party libraries */
240a32e93adSJunchao Zhang   PetscMPIInt        omp_comm_size;   /* size of omp_comm, a kind of OMP_NUM_THREADS */
241a32e93adSJunchao Zhang   PetscBool          is_omp_master;   /* rank 0's in omp_comm */
242a32e93adSJunchao Zhang   MPI_Win            omp_win;         /* a shared memory window containing a barrier */
243a32e93adSJunchao Zhang   pthread_barrier_t *barrier;         /* pointer to the barrier */
244a32e93adSJunchao Zhang   hwloc_topology_t   topology;
245a32e93adSJunchao Zhang   hwloc_cpuset_t     cpuset;     /* cpu bindings of omp master */
246a32e93adSJunchao Zhang   hwloc_cpuset_t     omp_cpuset; /* union of cpu bindings of ranks in omp_comm */
247a32e93adSJunchao Zhang };
248a32e93adSJunchao Zhang 
249eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm
2508fcaa860SBarry Smith    contained by the controller.
251eff715bbSJunchao Zhang 
2528fcaa860SBarry Smith    PETSc OpenMP controller users do not call this function directly. This function exists
253eff715bbSJunchao Zhang    only because we want to separate shared memory allocation methods from other code.
254eff715bbSJunchao Zhang  */
2559371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) {
256a32e93adSJunchao Zhang   MPI_Aint              size;
257a32e93adSJunchao Zhang   void                 *baseptr;
258a32e93adSJunchao Zhang   pthread_barrierattr_t attr;
259a32e93adSJunchao Zhang 
2604df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2614df5c2c7SJunchao Zhang   PetscInt  fd;
2624df5c2c7SJunchao Zhang   PetscChar pathname[PETSC_MAX_PATH_LEN];
2634df5c2c7SJunchao Zhang #else
2644df5c2c7SJunchao Zhang   PetscMPIInt disp_unit;
2654df5c2c7SJunchao Zhang #endif
2664df5c2c7SJunchao Zhang 
2674df5c2c7SJunchao Zhang   PetscFunctionBegin;
2684df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2694df5c2c7SJunchao Zhang   size = sizeof(pthread_barrier_t);
2704df5c2c7SJunchao Zhang   if (ctrl->is_omp_master) {
271eff715bbSJunchao Zhang     /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */
2729566063dSJacob Faibussowitsch     PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN));
2739566063dSJacob Faibussowitsch     PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN));
2744df5c2c7SJunchao Zhang     /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */
2759371c9d4SSatish Balay     fd = mkstemp(pathname);
2769371c9d4SSatish Balay     PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname);
2779566063dSJacob Faibussowitsch     PetscCall(ftruncate(fd, size));
2789371c9d4SSatish Balay     baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2799371c9d4SSatish Balay     PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed");
2809566063dSJacob Faibussowitsch     PetscCall(close(fd));
2819566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm));
282eff715bbSJunchao Zhang     /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */
2839566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
2849566063dSJacob Faibussowitsch     PetscCall(unlink(pathname));
2854df5c2c7SJunchao Zhang   } else {
2869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm));
2879371c9d4SSatish Balay     fd = open(pathname, O_RDWR);
2889371c9d4SSatish Balay     PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname);
2899371c9d4SSatish Balay     baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2909371c9d4SSatish Balay     PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed");
2919566063dSJacob Faibussowitsch     PetscCall(close(fd));
2929566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
2934df5c2c7SJunchao Zhang   }
2944df5c2c7SJunchao Zhang #else
295a32e93adSJunchao Zhang   size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0;
2969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win));
2979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr));
2984df5c2c7SJunchao Zhang #endif
299a32e93adSJunchao Zhang   ctrl->barrier = (pthread_barrier_t *)baseptr;
300a32e93adSJunchao Zhang 
301a32e93adSJunchao Zhang   /* omp master initializes the barrier */
302a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
3039566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size));
3049566063dSJacob Faibussowitsch     PetscCall(pthread_barrierattr_init(&attr));
3059566063dSJacob Faibussowitsch     PetscCall(pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); /* make the barrier also work for processes */
3069566063dSJacob Faibussowitsch     PetscCall(pthread_barrier_init(ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size));
3079566063dSJacob Faibussowitsch     PetscCall(pthread_barrierattr_destroy(&attr));
308a32e93adSJunchao Zhang   }
309a32e93adSJunchao Zhang 
3104df5c2c7SJunchao Zhang   /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */
3119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
312a32e93adSJunchao Zhang   PetscFunctionReturn(0);
313a32e93adSJunchao Zhang }
314a32e93adSJunchao Zhang 
3158fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */
3169371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) {
3174df5c2c7SJunchao Zhang   PetscFunctionBegin;
3184df5c2c7SJunchao Zhang   /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */
3199566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
3209566063dSJacob Faibussowitsch   if (ctrl->is_omp_master) PetscCall(pthread_barrier_destroy(ctrl->barrier));
3214df5c2c7SJunchao Zhang 
3224df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
3239566063dSJacob Faibussowitsch   PetscCall(munmap(ctrl->barrier, sizeof(pthread_barrier_t)));
3244df5c2c7SJunchao Zhang #else
3259566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Win_free(&ctrl->omp_win));
3264df5c2c7SJunchao Zhang #endif
327a32e93adSJunchao Zhang   PetscFunctionReturn(0);
328a32e93adSJunchao Zhang }
329a32e93adSJunchao Zhang 
330eff715bbSJunchao Zhang /*@C
331*811af0c4SBarry Smith     PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries that use OpenMP
332eff715bbSJunchao Zhang 
333d8d19677SJose E. Roman     Input Parameters:
334eff715bbSJunchao Zhang +   petsc_comm - a communicator some PETSc object (for example, a matrix) lives in
335a2b725a8SWilliam Gropp -   nthreads   - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value
336eff715bbSJunchao Zhang 
337eff715bbSJunchao Zhang     Output Parameter:
3388fcaa860SBarry Smith .   pctrl      - a PETSc OpenMP controller
339eff715bbSJunchao Zhang 
340eff715bbSJunchao Zhang     Level: developer
341eff715bbSJunchao Zhang 
342*811af0c4SBarry Smith     Developer Note:
343*811af0c4SBarry Smith     Possibly use the variable `PetscNumOMPThreads` to determine the number for threads to use
3448fcaa860SBarry Smith 
345*811af0c4SBarry Smith .seealso: `PetscOmpCtrlDestroy()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`,
346eff715bbSJunchao Zhang @*/
3479371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) {
348a32e93adSJunchao Zhang   PetscOmpCtrl   ctrl;
349a32e93adSJunchao Zhang   unsigned long *cpu_ulongs = NULL;
350a32e93adSJunchao Zhang   PetscInt       i, nr_cpu_ulongs;
351a32e93adSJunchao Zhang   PetscShmComm   pshmcomm;
352a32e93adSJunchao Zhang   MPI_Comm       shm_comm;
353a32e93adSJunchao Zhang   PetscMPIInt    shm_rank, shm_comm_size, omp_rank, color;
3547c405c4aSJunchao Zhang   PetscInt       num_packages, num_cores;
355a32e93adSJunchao Zhang 
356a32e93adSJunchao Zhang   PetscFunctionBegin;
3579566063dSJacob Faibussowitsch   PetscCall(PetscNew(&ctrl));
358a32e93adSJunchao Zhang 
359a32e93adSJunchao Zhang   /*=================================================================================
3607c405c4aSJunchao Zhang     Init hwloc
3617c405c4aSJunchao Zhang    ==================================================================================*/
3629566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_init(&ctrl->topology));
3637c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
3647c405c4aSJunchao Zhang   /* to filter out unneeded info and have faster hwloc_topology_load */
3659566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_set_all_types_filter(ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE));
3669566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_set_type_filter(ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL));
3677c405c4aSJunchao Zhang #endif
3689566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_load(ctrl->topology));
3697c405c4aSJunchao Zhang 
3707c405c4aSJunchao Zhang   /*=================================================================================
371a32e93adSJunchao Zhang     Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to
372a32e93adSJunchao Zhang     physically shared memory. Rank 0 of each omp_comm is called an OMP master, and
373a32e93adSJunchao Zhang     others are called slaves. OMP Masters make up a new comm called omp_master_comm,
374a32e93adSJunchao Zhang     which is usually passed to third party libraries.
375a32e93adSJunchao Zhang    ==================================================================================*/
376a32e93adSJunchao Zhang 
377a32e93adSJunchao Zhang   /* fetch the stored shared memory communicator */
3789566063dSJacob Faibussowitsch   PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm));
3799566063dSJacob Faibussowitsch   PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm));
380a32e93adSJunchao Zhang 
3819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank));
3829566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size));
383a32e93adSJunchao Zhang 
3847c405c4aSJunchao Zhang   /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */
3857c405c4aSJunchao Zhang   if (nthreads == -1) {
386a312e481SBarry Smith     num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE);
387a312e481SBarry Smith     num_cores    = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE);
3887c405c4aSJunchao Zhang     nthreads     = num_cores / num_packages;
3897c405c4aSJunchao Zhang     if (nthreads > shm_comm_size) nthreads = shm_comm_size;
3907c405c4aSJunchao Zhang   }
3917c405c4aSJunchao Zhang 
3925f80ce2aSJacob Faibussowitsch   PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size);
3939566063dSJacob Faibussowitsch   if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size));
394a32e93adSJunchao Zhang 
395a32e93adSJunchao Zhang   /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if
396a32e93adSJunchao Zhang      shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get
397a32e93adSJunchao Zhang      color 1. They are put in two omp_comms. Note that petsc_ranks may or may not
398a32e93adSJunchao Zhang      be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1.
399a32e93adSJunchao Zhang      Use 0 as key so that rank ordering wont change in new comm.
400a32e93adSJunchao Zhang    */
401a32e93adSJunchao Zhang   color = shm_rank / nthreads;
4029566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm));
403a32e93adSJunchao Zhang 
404a32e93adSJunchao Zhang   /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */
4059566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank));
406a32e93adSJunchao Zhang   if (!omp_rank) {
407a32e93adSJunchao Zhang     ctrl->is_omp_master = PETSC_TRUE; /* master */
408a32e93adSJunchao Zhang     color               = 0;
409a32e93adSJunchao Zhang   } else {
410a32e93adSJunchao Zhang     ctrl->is_omp_master = PETSC_FALSE;   /* slave */
411a32e93adSJunchao Zhang     color               = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */
412a32e93adSJunchao Zhang   }
4139566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm));
414a32e93adSJunchao Zhang 
415a32e93adSJunchao Zhang   /*=================================================================================
416a32e93adSJunchao Zhang     Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put
417a32e93adSJunchao Zhang     slave ranks in sleep and idle their CPU, so that the master can fork OMP threads
418a32e93adSJunchao Zhang     and run them on the idle CPUs.
419a32e93adSJunchao Zhang    ==================================================================================*/
4209566063dSJacob Faibussowitsch   PetscCall(PetscOmpCtrlCreateBarrier(ctrl));
421a32e93adSJunchao Zhang 
422a32e93adSJunchao Zhang   /*=================================================================================
423a32e93adSJunchao Zhang     omp master logs its cpu binding (i.e., cpu set) and computes a new binding that
424a32e93adSJunchao Zhang     is the union of the bindings of all ranks in the omp_comm
425a32e93adSJunchao Zhang     =================================================================================*/
426a32e93adSJunchao Zhang 
4279371c9d4SSatish Balay   ctrl->cpuset = hwloc_bitmap_alloc();
4289371c9d4SSatish Balay   PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed");
4299566063dSJacob Faibussowitsch   PetscCall(hwloc_get_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS));
430a32e93adSJunchao Zhang 
4313ab56b82SJunchao Zhang   /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */
432a32e93adSJunchao Zhang   nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8;
4339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs));
434a32e93adSJunchao Zhang   if (nr_cpu_ulongs == 1) {
435a32e93adSJunchao Zhang     cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset);
436a32e93adSJunchao Zhang   } else {
437a32e93adSJunchao Zhang     for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i);
438a32e93adSJunchao Zhang   }
439a32e93adSJunchao Zhang 
4409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm));
441a32e93adSJunchao Zhang 
442a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
4439371c9d4SSatish Balay     ctrl->omp_cpuset = hwloc_bitmap_alloc();
4449371c9d4SSatish Balay     PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed");
445a32e93adSJunchao Zhang     if (nr_cpu_ulongs == 1) {
4463ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
4479566063dSJacob Faibussowitsch       PetscCall(hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]));
4483ab56b82SJunchao Zhang #else
4493ab56b82SJunchao Zhang       hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]);
4503ab56b82SJunchao Zhang #endif
451a32e93adSJunchao Zhang     } else {
4523ab56b82SJunchao Zhang       for (i = 0; i < nr_cpu_ulongs; i++) {
4533ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
4549566063dSJacob Faibussowitsch         PetscCall(hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]));
4553ab56b82SJunchao Zhang #else
4563ab56b82SJunchao Zhang         hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]);
4573ab56b82SJunchao Zhang #endif
4583ab56b82SJunchao Zhang       }
459a32e93adSJunchao Zhang     }
460a32e93adSJunchao Zhang   }
4619566063dSJacob Faibussowitsch   PetscCall(PetscFree(cpu_ulongs));
462a32e93adSJunchao Zhang   *pctrl = ctrl;
463a32e93adSJunchao Zhang   PetscFunctionReturn(0);
464a32e93adSJunchao Zhang }
465a32e93adSJunchao Zhang 
466eff715bbSJunchao Zhang /*@C
46764f49babSJed Brown     PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller
468eff715bbSJunchao Zhang 
469eff715bbSJunchao Zhang     Input Parameter:
4708fcaa860SBarry Smith .   pctrl  - a PETSc OpenMP controller
471eff715bbSJunchao Zhang 
472eff715bbSJunchao Zhang     Level: developer
473eff715bbSJunchao Zhang 
474*811af0c4SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`,
475eff715bbSJunchao Zhang @*/
4769371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) {
477a32e93adSJunchao Zhang   PetscOmpCtrl ctrl = *pctrl;
478a32e93adSJunchao Zhang 
479a32e93adSJunchao Zhang   PetscFunctionBegin;
480a32e93adSJunchao Zhang   hwloc_bitmap_free(ctrl->cpuset);
481a32e93adSJunchao Zhang   hwloc_topology_destroy(ctrl->topology);
482a32e93adSJunchao Zhang   PetscOmpCtrlDestroyBarrier(ctrl);
4839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm));
484a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
485a32e93adSJunchao Zhang     hwloc_bitmap_free(ctrl->omp_cpuset);
4869566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm));
487a32e93adSJunchao Zhang   }
4889566063dSJacob Faibussowitsch   PetscCall(PetscFree(ctrl));
489a32e93adSJunchao Zhang   PetscFunctionReturn(0);
490a32e93adSJunchao Zhang }
491a32e93adSJunchao Zhang 
492a32e93adSJunchao Zhang /*@C
4938fcaa860SBarry Smith     PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller
494a32e93adSJunchao Zhang 
495a32e93adSJunchao Zhang     Input Parameter:
4968fcaa860SBarry Smith .   ctrl - a PETSc OMP controller
497a32e93adSJunchao Zhang 
498d8d19677SJose E. Roman     Output Parameters:
499eff715bbSJunchao Zhang +   omp_comm         - a communicator that includes a master rank and slave ranks where master spawns threads
500a32e93adSJunchao Zhang .   omp_master_comm  - on master ranks, return a communicator that include master ranks of each omp_comm;
501*811af0c4SBarry Smith                        on slave ranks, `MPI_COMM_NULL` will be return in reality.
502a32e93adSJunchao Zhang -   is_omp_master    - true if the calling process is an OMP master rank.
503a32e93adSJunchao Zhang 
504*811af0c4SBarry Smith     Note:
505*811af0c4SBarry Smith     Any output parameter can be NULL. The parameter is just ignored.
506eff715bbSJunchao Zhang 
507a32e93adSJunchao Zhang     Level: developer
508*811af0c4SBarry Smith 
509*811af0c4SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`,
510a32e93adSJunchao Zhang @*/
5119371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) {
512a32e93adSJunchao Zhang   PetscFunctionBegin;
513a32e93adSJunchao Zhang   if (omp_comm) *omp_comm = ctrl->omp_comm;
514a32e93adSJunchao Zhang   if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm;
515a32e93adSJunchao Zhang   if (is_omp_master) *is_omp_master = ctrl->is_omp_master;
516a32e93adSJunchao Zhang   PetscFunctionReturn(0);
517a32e93adSJunchao Zhang }
518a32e93adSJunchao Zhang 
519eff715bbSJunchao Zhang /*@C
5208fcaa860SBarry Smith     PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU)
521eff715bbSJunchao Zhang 
522eff715bbSJunchao Zhang     Input Parameter:
5238fcaa860SBarry Smith .   ctrl - a PETSc OMP controller
524eff715bbSJunchao Zhang 
525eff715bbSJunchao Zhang     Notes:
526*811af0c4SBarry Smith     this is a pthread barrier on MPI ranks. Using `MPI_Barrier()` instead is conceptually correct. But MPI standard does not
527*811af0c4SBarry Smith     require processes blocked by `MPI_Barrier()` free their CPUs to let other processes progress. In practice, to minilize latency,
528*811af0c4SBarry Smith     MPI ranks stuck in `MPI_Barrier()` keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement.
529eff715bbSJunchao Zhang 
530*811af0c4SBarry Smith     A code using `PetscOmpCtrlBarrier()` would be like this,
531*811af0c4SBarry Smith .vb
532eff715bbSJunchao Zhang     if (is_omp_master) {
533eff715bbSJunchao Zhang       PetscOmpCtrlOmpRegionOnMasterBegin(ctrl);
534eff715bbSJunchao Zhang       Call the library using OpenMP
535eff715bbSJunchao Zhang       PetscOmpCtrlOmpRegionOnMasterEnd(ctrl);
536eff715bbSJunchao Zhang     }
537eff715bbSJunchao Zhang     PetscOmpCtrlBarrier(ctrl);
538*811af0c4SBarry Smith .ve
539eff715bbSJunchao Zhang 
540eff715bbSJunchao Zhang     Level: developer
541eff715bbSJunchao Zhang 
542*811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`,
543eff715bbSJunchao Zhang @*/
5449371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) {
5452da392ccSBarry Smith   int err;
546a32e93adSJunchao Zhang 
547a32e93adSJunchao Zhang   PetscFunctionBegin;
5482da392ccSBarry Smith   err = pthread_barrier_wait(ctrl->barrier);
54939619372SPierre Jolivet   PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err);
550a32e93adSJunchao Zhang   PetscFunctionReturn(0);
551a32e93adSJunchao Zhang }
552a32e93adSJunchao Zhang 
553eff715bbSJunchao Zhang /*@C
554eff715bbSJunchao Zhang     PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks
555eff715bbSJunchao Zhang 
556eff715bbSJunchao Zhang     Input Parameter:
5578fcaa860SBarry Smith .   ctrl - a PETSc OMP controller
558eff715bbSJunchao Zhang 
559*811af0c4SBarry Smith     Note:
560*811af0c4SBarry Smith     Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank.
561eff715bbSJunchao Zhang     This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime
562eff715bbSJunchao Zhang 
563eff715bbSJunchao Zhang     Level: developer
564eff715bbSJunchao Zhang 
565*811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`
566eff715bbSJunchao Zhang @*/
5679371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) {
568a32e93adSJunchao Zhang   PetscFunctionBegin;
5699566063dSJacob Faibussowitsch   PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS));
570eff715bbSJunchao Zhang   omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */
571a32e93adSJunchao Zhang   PetscFunctionReturn(0);
572a32e93adSJunchao Zhang }
573a32e93adSJunchao Zhang 
574eff715bbSJunchao Zhang /*@C
575eff715bbSJunchao Zhang    PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks
576eff715bbSJunchao Zhang 
577eff715bbSJunchao Zhang    Input Parameter:
5788fcaa860SBarry Smith .  ctrl - a PETSc OMP controller
579eff715bbSJunchao Zhang 
580*811af0c4SBarry Smith    Note:
581*811af0c4SBarry Smith    Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank.
582eff715bbSJunchao Zhang    This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1.
583eff715bbSJunchao Zhang 
584eff715bbSJunchao Zhang    Level: developer
585eff715bbSJunchao Zhang 
586*811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`
587eff715bbSJunchao Zhang @*/
5889371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) {
589a32e93adSJunchao Zhang   PetscFunctionBegin;
5909566063dSJacob Faibussowitsch   PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS));
591eff715bbSJunchao Zhang   omp_set_num_threads(1);
592a32e93adSJunchao Zhang   PetscFunctionReturn(0);
593a32e93adSJunchao Zhang }
594a32e93adSJunchao Zhang 
5954df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY
59620b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */
597