xref: /petsc/src/sys/utils/mpishm.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
15f7487a0SJunchao Zhang #include <petscsys.h> /*I  "petscsys.h"  I*/
25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h>
35f7487a0SJunchao Zhang 
45f7487a0SJunchao Zhang struct _n_PetscShmComm {
55f7487a0SJunchao Zhang   PetscMPIInt *globranks;         /* global ranks of each rank in the shared memory communicator */
65f7487a0SJunchao Zhang   PetscMPIInt  shmsize;           /* size of the shared memory communicator */
75f7487a0SJunchao Zhang   MPI_Comm     globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
85f7487a0SJunchao Zhang };
95f7487a0SJunchao Zhang 
105f7487a0SJunchao Zhang /*
1133779a13SJunchao Zhang    Private routine to delete internal shared memory communicator when a communicator is freed.
125f7487a0SJunchao Zhang 
135f7487a0SJunchao Zhang    This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this  data as an attribute is freed.
145f7487a0SJunchao Zhang 
155f7487a0SJunchao Zhang    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
165f7487a0SJunchao Zhang 
175f7487a0SJunchao Zhang */
189371c9d4SSatish Balay PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state) {
195f7487a0SJunchao Zhang   PetscShmComm p = (PetscShmComm)val;
205f7487a0SJunchao Zhang 
215f7487a0SJunchao Zhang   PetscFunctionBegin;
229566063dSJacob Faibussowitsch   PetscCallMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm));
239566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&p->shmcomm));
249566063dSJacob Faibussowitsch   PetscCallMPI(PetscFree(p->globranks));
259566063dSJacob Faibussowitsch   PetscCallMPI(PetscFree(val));
265f7487a0SJunchao Zhang   PetscFunctionReturn(MPI_SUCCESS);
275f7487a0SJunchao Zhang }
285f7487a0SJunchao Zhang 
29b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
30b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet().
31b48189acSJunchao Zhang   Since we predict communicators passed to PetscShmCommGet() are very likely
32b48189acSJunchao Zhang   either a petsc inner communicator or an MPI communicator with a linked petsc
33b48189acSJunchao Zhang   inner communicator, we use a simple static array to store dupped communicators
34b48189acSJunchao Zhang   on rare cases otherwise.
35b48189acSJunchao Zhang  */
36b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16
37b48189acSJunchao Zhang static PetscInt       num_dupped_comms = 0;
38b48189acSJunchao Zhang static MPI_Comm       shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS];
399371c9d4SSatish Balay static PetscErrorCode PetscShmCommDestroyDuppedComms(void) {
40b48189acSJunchao Zhang   PetscInt i;
41b48189acSJunchao Zhang   PetscFunctionBegin;
429566063dSJacob Faibussowitsch   for (i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i]));
43b48189acSJunchao Zhang   num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */
44b48189acSJunchao Zhang   PetscFunctionReturn(0);
45b48189acSJunchao Zhang }
46b48189acSJunchao Zhang #endif
47b48189acSJunchao Zhang 
485f7487a0SJunchao Zhang /*@C
49b48189acSJunchao Zhang     PetscShmCommGet - Given a communicator returns a sub-communicator of all ranks that share a common memory
505f7487a0SJunchao Zhang 
51d083f849SBarry Smith     Collective.
525f7487a0SJunchao Zhang 
535f7487a0SJunchao Zhang     Input Parameter:
54b48189acSJunchao Zhang .   globcomm - MPI_Comm, which can be a user MPI_Comm or a PETSc inner MPI_Comm
555f7487a0SJunchao Zhang 
565f7487a0SJunchao Zhang     Output Parameter:
575f7487a0SJunchao Zhang .   pshmcomm - the PETSc shared memory communicator object
585f7487a0SJunchao Zhang 
595f7487a0SJunchao Zhang     Level: developer
605f7487a0SJunchao Zhang 
615f7487a0SJunchao Zhang     Notes:
625f7487a0SJunchao Zhang        When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis
635f7487a0SJunchao Zhang 
645f7487a0SJunchao Zhang @*/
659371c9d4SSatish Balay PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm) {
665f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
675f7487a0SJunchao Zhang   MPI_Group         globgroup, shmgroup;
685f7487a0SJunchao Zhang   PetscMPIInt      *shmranks, i, flg;
695f7487a0SJunchao Zhang   PetscCommCounter *counter;
705f7487a0SJunchao Zhang 
715f7487a0SJunchao Zhang   PetscFunctionBegin;
723ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 2);
73b48189acSJunchao Zhang   /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */
749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg));
75b48189acSJunchao Zhang   if (!flg) { /* globcomm is not a petsc comm */
769371c9d4SSatish Balay     union
779371c9d4SSatish Balay     {
789371c9d4SSatish Balay       MPI_Comm comm;
799371c9d4SSatish Balay       void    *ptr;
809371c9d4SSatish Balay     } ucomm;
81b48189acSJunchao Zhang     /* check if globcomm already has a linked petsc inner comm */
829566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg));
83b48189acSJunchao Zhang     if (!flg) {
84b48189acSJunchao Zhang       /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */
8508401ef6SPierre Jolivet       PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS);
869566063dSJacob Faibussowitsch       PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL));
87b48189acSJunchao Zhang       /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */
889566063dSJacob Faibussowitsch       if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms));
89b48189acSJunchao Zhang       shmcomm_dupped_comms[num_dupped_comms] = globcomm;
90b48189acSJunchao Zhang       num_dupped_comms++;
91b48189acSJunchao Zhang     } else {
92b48189acSJunchao Zhang       /* otherwise, we pull out the inner comm and use it as globcomm */
93b48189acSJunchao Zhang       globcomm = ucomm.comm;
94b48189acSJunchao Zhang     }
95b48189acSJunchao Zhang   }
965f7487a0SJunchao Zhang 
97b48189acSJunchao Zhang   /* Check if globcomm already has an attached pshmcomm. If no, create one */
989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg));
995f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
1005f7487a0SJunchao Zhang 
1019566063dSJacob Faibussowitsch   PetscCall(PetscNew(pshmcomm));
1025f7487a0SJunchao Zhang   (*pshmcomm)->globcomm = globcomm;
1035f7487a0SJunchao Zhang 
1049566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm));
1055f7487a0SJunchao Zhang 
1069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize));
1079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group(globcomm, &globgroup));
1089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup));
1099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks));
1109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks));
1115f7487a0SJunchao Zhang   for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i;
1129566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks));
1139566063dSJacob Faibussowitsch   PetscCall(PetscFree(shmranks));
1149566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&globgroup));
1159566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&shmgroup));
1165f7487a0SJunchao Zhang 
117*48a46eb9SPierre Jolivet   for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i]));
1189566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm));
1195f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1205f7487a0SJunchao Zhang #else
1215f7487a0SJunchao Zhang   SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
1225f7487a0SJunchao Zhang #endif
1235f7487a0SJunchao Zhang }
1245f7487a0SJunchao Zhang 
1255f7487a0SJunchao Zhang /*@C
1265f7487a0SJunchao Zhang     PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
1275f7487a0SJunchao Zhang 
1285f7487a0SJunchao Zhang     Input Parameters:
1295f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
1305f7487a0SJunchao Zhang -   grank    - the global rank
1315f7487a0SJunchao Zhang 
1325f7487a0SJunchao Zhang     Output Parameter:
1335f7487a0SJunchao Zhang .   lrank - the local rank, or MPI_PROC_NULL if it does not exist
1345f7487a0SJunchao Zhang 
1355f7487a0SJunchao Zhang     Level: developer
1365f7487a0SJunchao Zhang 
1375f7487a0SJunchao Zhang     Developer Notes:
1385f7487a0SJunchao Zhang     Assumes the pshmcomm->globranks[] is sorted
1395f7487a0SJunchao Zhang 
1405f7487a0SJunchao Zhang     It may be better to rewrite this to map multiple global ranks to local in the same function call
1415f7487a0SJunchao Zhang 
1425f7487a0SJunchao Zhang @*/
1439371c9d4SSatish Balay PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank) {
1445f7487a0SJunchao Zhang   PetscMPIInt low, high, t, i;
1455f7487a0SJunchao Zhang   PetscBool   flg = PETSC_FALSE;
1465f7487a0SJunchao Zhang 
1475f7487a0SJunchao Zhang   PetscFunctionBegin;
1483ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 1);
149dadcf809SJacob Faibussowitsch   PetscValidIntPointer(lrank, 3);
1505f7487a0SJunchao Zhang   *lrank = MPI_PROC_NULL;
1515f7487a0SJunchao Zhang   if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0);
1525f7487a0SJunchao Zhang   if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(0);
1539566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL));
1545f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
1555f7487a0SJunchao Zhang   low  = 0;
1565f7487a0SJunchao Zhang   high = pshmcomm->shmsize;
1575f7487a0SJunchao Zhang   while (high - low > 5) {
1585f7487a0SJunchao Zhang     t = (low + high) / 2;
1595f7487a0SJunchao Zhang     if (pshmcomm->globranks[t] > grank) high = t;
1605f7487a0SJunchao Zhang     else low = t;
1615f7487a0SJunchao Zhang   }
1625f7487a0SJunchao Zhang   for (i = low; i < high; i++) {
1635f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0);
1645f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] == grank) {
1655f7487a0SJunchao Zhang       *lrank = i;
1665f7487a0SJunchao Zhang       PetscFunctionReturn(0);
1675f7487a0SJunchao Zhang     }
1685f7487a0SJunchao Zhang   }
1695f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1705f7487a0SJunchao Zhang }
1715f7487a0SJunchao Zhang 
1725f7487a0SJunchao Zhang /*@C
1735f7487a0SJunchao Zhang     PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
1745f7487a0SJunchao Zhang 
1755f7487a0SJunchao Zhang     Input Parameters:
1765f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
1775f7487a0SJunchao Zhang -   lrank    - the local rank in the shared memory communicator
1785f7487a0SJunchao Zhang 
1795f7487a0SJunchao Zhang     Output Parameter:
1805f7487a0SJunchao Zhang .   grank - the global rank in the global communicator where the shared memory communicator is built
1815f7487a0SJunchao Zhang 
1825f7487a0SJunchao Zhang     Level: developer
1835f7487a0SJunchao Zhang 
1845f7487a0SJunchao Zhang @*/
1859371c9d4SSatish Balay PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank) {
1865f7487a0SJunchao Zhang   PetscFunctionBegin;
1873ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 1);
188dadcf809SJacob Faibussowitsch   PetscValidIntPointer(grank, 3);
1892c71b3e2SJacob Faibussowitsch   PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank);
1905f7487a0SJunchao Zhang   *grank = pshmcomm->globranks[lrank];
1915f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1925f7487a0SJunchao Zhang }
1935f7487a0SJunchao Zhang 
1945f7487a0SJunchao Zhang /*@C
1955f7487a0SJunchao Zhang     PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
1965f7487a0SJunchao Zhang 
1975f7487a0SJunchao Zhang     Input Parameter:
1985f7487a0SJunchao Zhang .   pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
1995f7487a0SJunchao Zhang 
2005f7487a0SJunchao Zhang     Output Parameter:
2015f7487a0SJunchao Zhang .   comm     - the MPI communicator
2025f7487a0SJunchao Zhang 
2035f7487a0SJunchao Zhang     Level: developer
2045f7487a0SJunchao Zhang 
2055f7487a0SJunchao Zhang @*/
2069371c9d4SSatish Balay PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm) {
2075f7487a0SJunchao Zhang   PetscFunctionBegin;
2083ca90d2dSJacob Faibussowitsch   PetscValidPointer(pshmcomm, 1);
2093ca90d2dSJacob Faibussowitsch   PetscValidPointer(comm, 2);
2105f7487a0SJunchao Zhang   *comm = pshmcomm->shmcomm;
2115f7487a0SJunchao Zhang   PetscFunctionReturn(0);
2125f7487a0SJunchao Zhang }
2135f7487a0SJunchao Zhang 
21420b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT)
215a32e93adSJunchao Zhang #include <pthread.h>
216a32e93adSJunchao Zhang #include <hwloc.h>
217a32e93adSJunchao Zhang #include <omp.h>
218a32e93adSJunchao Zhang 
219eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available,
220eff715bbSJunchao Zhang    otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much
2214df5c2c7SJunchao Zhang    simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance
2224df5c2c7SJunchao Zhang    by 50%. Until the reason is found out, we use mmap() instead.
2234df5c2c7SJunchao Zhang */
2244df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY
2254df5c2c7SJunchao Zhang 
2264df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2274df5c2c7SJunchao Zhang #include <sys/mman.h>
2284df5c2c7SJunchao Zhang #include <sys/types.h>
2294df5c2c7SJunchao Zhang #include <sys/stat.h>
2304df5c2c7SJunchao Zhang #include <fcntl.h>
2314df5c2c7SJunchao Zhang #endif
2324df5c2c7SJunchao Zhang 
233a32e93adSJunchao Zhang struct _n_PetscOmpCtrl {
234a32e93adSJunchao Zhang   MPI_Comm           omp_comm;        /* a shared memory communicator to spawn omp threads */
235a32e93adSJunchao Zhang   MPI_Comm           omp_master_comm; /* a communicator to give to third party libraries */
236a32e93adSJunchao Zhang   PetscMPIInt        omp_comm_size;   /* size of omp_comm, a kind of OMP_NUM_THREADS */
237a32e93adSJunchao Zhang   PetscBool          is_omp_master;   /* rank 0's in omp_comm */
238a32e93adSJunchao Zhang   MPI_Win            omp_win;         /* a shared memory window containing a barrier */
239a32e93adSJunchao Zhang   pthread_barrier_t *barrier;         /* pointer to the barrier */
240a32e93adSJunchao Zhang   hwloc_topology_t   topology;
241a32e93adSJunchao Zhang   hwloc_cpuset_t     cpuset;     /* cpu bindings of omp master */
242a32e93adSJunchao Zhang   hwloc_cpuset_t     omp_cpuset; /* union of cpu bindings of ranks in omp_comm */
243a32e93adSJunchao Zhang };
244a32e93adSJunchao Zhang 
245eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm
2468fcaa860SBarry Smith    contained by the controller.
247eff715bbSJunchao Zhang 
2488fcaa860SBarry Smith    PETSc OpenMP controller users do not call this function directly. This function exists
249eff715bbSJunchao Zhang    only because we want to separate shared memory allocation methods from other code.
250eff715bbSJunchao Zhang  */
2519371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) {
252a32e93adSJunchao Zhang   MPI_Aint              size;
253a32e93adSJunchao Zhang   void                 *baseptr;
254a32e93adSJunchao Zhang   pthread_barrierattr_t attr;
255a32e93adSJunchao Zhang 
2564df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2574df5c2c7SJunchao Zhang   PetscInt  fd;
2584df5c2c7SJunchao Zhang   PetscChar pathname[PETSC_MAX_PATH_LEN];
2594df5c2c7SJunchao Zhang #else
2604df5c2c7SJunchao Zhang   PetscMPIInt disp_unit;
2614df5c2c7SJunchao Zhang #endif
2624df5c2c7SJunchao Zhang 
2634df5c2c7SJunchao Zhang   PetscFunctionBegin;
2644df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2654df5c2c7SJunchao Zhang   size = sizeof(pthread_barrier_t);
2664df5c2c7SJunchao Zhang   if (ctrl->is_omp_master) {
267eff715bbSJunchao Zhang     /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */
2689566063dSJacob Faibussowitsch     PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN));
2699566063dSJacob Faibussowitsch     PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN));
2704df5c2c7SJunchao Zhang     /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */
2719371c9d4SSatish Balay     fd = mkstemp(pathname);
2729371c9d4SSatish Balay     PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname);
2739566063dSJacob Faibussowitsch     PetscCall(ftruncate(fd, size));
2749371c9d4SSatish Balay     baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2759371c9d4SSatish Balay     PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed");
2769566063dSJacob Faibussowitsch     PetscCall(close(fd));
2779566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm));
278eff715bbSJunchao Zhang     /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */
2799566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
2809566063dSJacob Faibussowitsch     PetscCall(unlink(pathname));
2814df5c2c7SJunchao Zhang   } else {
2829566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm));
2839371c9d4SSatish Balay     fd = open(pathname, O_RDWR);
2849371c9d4SSatish Balay     PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname);
2859371c9d4SSatish Balay     baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2869371c9d4SSatish Balay     PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed");
2879566063dSJacob Faibussowitsch     PetscCall(close(fd));
2889566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
2894df5c2c7SJunchao Zhang   }
2904df5c2c7SJunchao Zhang #else
291a32e93adSJunchao Zhang   size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0;
2929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win));
2939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr));
2944df5c2c7SJunchao Zhang #endif
295a32e93adSJunchao Zhang   ctrl->barrier = (pthread_barrier_t *)baseptr;
296a32e93adSJunchao Zhang 
297a32e93adSJunchao Zhang   /* omp master initializes the barrier */
298a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
2999566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size));
3009566063dSJacob Faibussowitsch     PetscCall(pthread_barrierattr_init(&attr));
3019566063dSJacob Faibussowitsch     PetscCall(pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); /* make the barrier also work for processes */
3029566063dSJacob Faibussowitsch     PetscCall(pthread_barrier_init(ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size));
3039566063dSJacob Faibussowitsch     PetscCall(pthread_barrierattr_destroy(&attr));
304a32e93adSJunchao Zhang   }
305a32e93adSJunchao Zhang 
3064df5c2c7SJunchao Zhang   /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */
3079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
308a32e93adSJunchao Zhang   PetscFunctionReturn(0);
309a32e93adSJunchao Zhang }
310a32e93adSJunchao Zhang 
3118fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */
3129371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) {
3134df5c2c7SJunchao Zhang   PetscFunctionBegin;
3144df5c2c7SJunchao Zhang   /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */
3159566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
3169566063dSJacob Faibussowitsch   if (ctrl->is_omp_master) PetscCall(pthread_barrier_destroy(ctrl->barrier));
3174df5c2c7SJunchao Zhang 
3184df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
3199566063dSJacob Faibussowitsch   PetscCall(munmap(ctrl->barrier, sizeof(pthread_barrier_t)));
3204df5c2c7SJunchao Zhang #else
3219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Win_free(&ctrl->omp_win));
3224df5c2c7SJunchao Zhang #endif
323a32e93adSJunchao Zhang   PetscFunctionReturn(0);
324a32e93adSJunchao Zhang }
325a32e93adSJunchao Zhang 
326eff715bbSJunchao Zhang /*@C
3278fcaa860SBarry Smith     PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP
328eff715bbSJunchao Zhang 
329d8d19677SJose E. Roman     Input Parameters:
330eff715bbSJunchao Zhang +   petsc_comm - a communicator some PETSc object (for example, a matrix) lives in
331a2b725a8SWilliam Gropp -   nthreads   - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value
332eff715bbSJunchao Zhang 
333eff715bbSJunchao Zhang     Output Parameter:
3348fcaa860SBarry Smith .   pctrl      - a PETSc OpenMP controller
335eff715bbSJunchao Zhang 
336eff715bbSJunchao Zhang     Level: developer
337eff715bbSJunchao Zhang 
3388fcaa860SBarry Smith     TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use
3398fcaa860SBarry Smith 
340db781477SPatrick Sanan .seealso `PetscOmpCtrlDestroy()`
341eff715bbSJunchao Zhang @*/
3429371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) {
343a32e93adSJunchao Zhang   PetscOmpCtrl   ctrl;
344a32e93adSJunchao Zhang   unsigned long *cpu_ulongs = NULL;
345a32e93adSJunchao Zhang   PetscInt       i, nr_cpu_ulongs;
346a32e93adSJunchao Zhang   PetscShmComm   pshmcomm;
347a32e93adSJunchao Zhang   MPI_Comm       shm_comm;
348a32e93adSJunchao Zhang   PetscMPIInt    shm_rank, shm_comm_size, omp_rank, color;
3497c405c4aSJunchao Zhang   PetscInt       num_packages, num_cores;
350a32e93adSJunchao Zhang 
351a32e93adSJunchao Zhang   PetscFunctionBegin;
3529566063dSJacob Faibussowitsch   PetscCall(PetscNew(&ctrl));
353a32e93adSJunchao Zhang 
354a32e93adSJunchao Zhang   /*=================================================================================
3557c405c4aSJunchao Zhang     Init hwloc
3567c405c4aSJunchao Zhang    ==================================================================================*/
3579566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_init(&ctrl->topology));
3587c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
3597c405c4aSJunchao Zhang   /* to filter out unneeded info and have faster hwloc_topology_load */
3609566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_set_all_types_filter(ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE));
3619566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_set_type_filter(ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL));
3627c405c4aSJunchao Zhang #endif
3639566063dSJacob Faibussowitsch   PetscCall(hwloc_topology_load(ctrl->topology));
3647c405c4aSJunchao Zhang 
3657c405c4aSJunchao Zhang   /*=================================================================================
366a32e93adSJunchao Zhang     Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to
367a32e93adSJunchao Zhang     physically shared memory. Rank 0 of each omp_comm is called an OMP master, and
368a32e93adSJunchao Zhang     others are called slaves. OMP Masters make up a new comm called omp_master_comm,
369a32e93adSJunchao Zhang     which is usually passed to third party libraries.
370a32e93adSJunchao Zhang    ==================================================================================*/
371a32e93adSJunchao Zhang 
372a32e93adSJunchao Zhang   /* fetch the stored shared memory communicator */
3739566063dSJacob Faibussowitsch   PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm));
3749566063dSJacob Faibussowitsch   PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm));
375a32e93adSJunchao Zhang 
3769566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank));
3779566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size));
378a32e93adSJunchao Zhang 
3797c405c4aSJunchao Zhang   /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */
3807c405c4aSJunchao Zhang   if (nthreads == -1) {
381a312e481SBarry Smith     num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE);
382a312e481SBarry Smith     num_cores    = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE);
3837c405c4aSJunchao Zhang     nthreads     = num_cores / num_packages;
3847c405c4aSJunchao Zhang     if (nthreads > shm_comm_size) nthreads = shm_comm_size;
3857c405c4aSJunchao Zhang   }
3867c405c4aSJunchao Zhang 
3875f80ce2aSJacob Faibussowitsch   PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size);
3889566063dSJacob Faibussowitsch   if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size));
389a32e93adSJunchao Zhang 
390a32e93adSJunchao Zhang   /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if
391a32e93adSJunchao Zhang      shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get
392a32e93adSJunchao Zhang      color 1. They are put in two omp_comms. Note that petsc_ranks may or may not
393a32e93adSJunchao Zhang      be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1.
394a32e93adSJunchao Zhang      Use 0 as key so that rank ordering wont change in new comm.
395a32e93adSJunchao Zhang    */
396a32e93adSJunchao Zhang   color = shm_rank / nthreads;
3979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm));
398a32e93adSJunchao Zhang 
399a32e93adSJunchao Zhang   /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */
4009566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank));
401a32e93adSJunchao Zhang   if (!omp_rank) {
402a32e93adSJunchao Zhang     ctrl->is_omp_master = PETSC_TRUE; /* master */
403a32e93adSJunchao Zhang     color               = 0;
404a32e93adSJunchao Zhang   } else {
405a32e93adSJunchao Zhang     ctrl->is_omp_master = PETSC_FALSE;   /* slave */
406a32e93adSJunchao Zhang     color               = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */
407a32e93adSJunchao Zhang   }
4089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm));
409a32e93adSJunchao Zhang 
410a32e93adSJunchao Zhang   /*=================================================================================
411a32e93adSJunchao Zhang     Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put
412a32e93adSJunchao Zhang     slave ranks in sleep and idle their CPU, so that the master can fork OMP threads
413a32e93adSJunchao Zhang     and run them on the idle CPUs.
414a32e93adSJunchao Zhang    ==================================================================================*/
4159566063dSJacob Faibussowitsch   PetscCall(PetscOmpCtrlCreateBarrier(ctrl));
416a32e93adSJunchao Zhang 
417a32e93adSJunchao Zhang   /*=================================================================================
418a32e93adSJunchao Zhang     omp master logs its cpu binding (i.e., cpu set) and computes a new binding that
419a32e93adSJunchao Zhang     is the union of the bindings of all ranks in the omp_comm
420a32e93adSJunchao Zhang     =================================================================================*/
421a32e93adSJunchao Zhang 
4229371c9d4SSatish Balay   ctrl->cpuset = hwloc_bitmap_alloc();
4239371c9d4SSatish Balay   PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed");
4249566063dSJacob Faibussowitsch   PetscCall(hwloc_get_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS));
425a32e93adSJunchao Zhang 
4263ab56b82SJunchao Zhang   /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */
427a32e93adSJunchao Zhang   nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8;
4289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs));
429a32e93adSJunchao Zhang   if (nr_cpu_ulongs == 1) {
430a32e93adSJunchao Zhang     cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset);
431a32e93adSJunchao Zhang   } else {
432a32e93adSJunchao Zhang     for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i);
433a32e93adSJunchao Zhang   }
434a32e93adSJunchao Zhang 
4359566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm));
436a32e93adSJunchao Zhang 
437a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
4389371c9d4SSatish Balay     ctrl->omp_cpuset = hwloc_bitmap_alloc();
4399371c9d4SSatish Balay     PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed");
440a32e93adSJunchao Zhang     if (nr_cpu_ulongs == 1) {
4413ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
4429566063dSJacob Faibussowitsch       PetscCall(hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]));
4433ab56b82SJunchao Zhang #else
4443ab56b82SJunchao Zhang       hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]);
4453ab56b82SJunchao Zhang #endif
446a32e93adSJunchao Zhang     } else {
4473ab56b82SJunchao Zhang       for (i = 0; i < nr_cpu_ulongs; i++) {
4483ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
4499566063dSJacob Faibussowitsch         PetscCall(hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]));
4503ab56b82SJunchao Zhang #else
4513ab56b82SJunchao Zhang         hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]);
4523ab56b82SJunchao Zhang #endif
4533ab56b82SJunchao Zhang       }
454a32e93adSJunchao Zhang     }
455a32e93adSJunchao Zhang   }
456a32e93adSJunchao Zhang 
4579566063dSJacob Faibussowitsch   PetscCall(PetscFree(cpu_ulongs));
458a32e93adSJunchao Zhang   *pctrl = ctrl;
459a32e93adSJunchao Zhang   PetscFunctionReturn(0);
460a32e93adSJunchao Zhang }
461a32e93adSJunchao Zhang 
462eff715bbSJunchao Zhang /*@C
46364f49babSJed Brown     PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller
464eff715bbSJunchao Zhang 
465eff715bbSJunchao Zhang     Input Parameter:
4668fcaa860SBarry Smith .   pctrl  - a PETSc OpenMP controller
467eff715bbSJunchao Zhang 
468eff715bbSJunchao Zhang     Level: developer
469eff715bbSJunchao Zhang 
470db781477SPatrick Sanan .seealso `PetscOmpCtrlCreate()`
471eff715bbSJunchao Zhang @*/
4729371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) {
473a32e93adSJunchao Zhang   PetscOmpCtrl ctrl = *pctrl;
474a32e93adSJunchao Zhang 
475a32e93adSJunchao Zhang   PetscFunctionBegin;
476a32e93adSJunchao Zhang   hwloc_bitmap_free(ctrl->cpuset);
477a32e93adSJunchao Zhang   hwloc_topology_destroy(ctrl->topology);
478a32e93adSJunchao Zhang   PetscOmpCtrlDestroyBarrier(ctrl);
4799566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm));
480a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
481a32e93adSJunchao Zhang     hwloc_bitmap_free(ctrl->omp_cpuset);
4829566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm));
483a32e93adSJunchao Zhang   }
4849566063dSJacob Faibussowitsch   PetscCall(PetscFree(ctrl));
485a32e93adSJunchao Zhang   PetscFunctionReturn(0);
486a32e93adSJunchao Zhang }
487a32e93adSJunchao Zhang 
488a32e93adSJunchao Zhang /*@C
4898fcaa860SBarry Smith     PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller
490a32e93adSJunchao Zhang 
491a32e93adSJunchao Zhang     Input Parameter:
4928fcaa860SBarry Smith .   ctrl - a PETSc OMP controller
493a32e93adSJunchao Zhang 
494d8d19677SJose E. Roman     Output Parameters:
495eff715bbSJunchao Zhang +   omp_comm         - a communicator that includes a master rank and slave ranks where master spawns threads
496a32e93adSJunchao Zhang .   omp_master_comm  - on master ranks, return a communicator that include master ranks of each omp_comm;
497a32e93adSJunchao Zhang                        on slave ranks, MPI_COMM_NULL will be return in reality.
498a32e93adSJunchao Zhang -   is_omp_master    - true if the calling process is an OMP master rank.
499a32e93adSJunchao Zhang 
500eff715bbSJunchao Zhang     Notes: any output parameter can be NULL. The parameter is just ignored.
501eff715bbSJunchao Zhang 
502a32e93adSJunchao Zhang     Level: developer
503a32e93adSJunchao Zhang @*/
5049371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) {
505a32e93adSJunchao Zhang   PetscFunctionBegin;
506a32e93adSJunchao Zhang   if (omp_comm) *omp_comm = ctrl->omp_comm;
507a32e93adSJunchao Zhang   if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm;
508a32e93adSJunchao Zhang   if (is_omp_master) *is_omp_master = ctrl->is_omp_master;
509a32e93adSJunchao Zhang   PetscFunctionReturn(0);
510a32e93adSJunchao Zhang }
511a32e93adSJunchao Zhang 
512eff715bbSJunchao Zhang /*@C
5138fcaa860SBarry Smith     PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU)
514eff715bbSJunchao Zhang 
515eff715bbSJunchao Zhang     Input Parameter:
5168fcaa860SBarry Smith .   ctrl - a PETSc OMP controller
517eff715bbSJunchao Zhang 
518eff715bbSJunchao Zhang     Notes:
519eff715bbSJunchao Zhang     this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not
520eff715bbSJunchao Zhang     require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency,
521eff715bbSJunchao Zhang     MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement.
522eff715bbSJunchao Zhang 
523eff715bbSJunchao Zhang     A code using PetscOmpCtrlBarrier() would be like this,
524eff715bbSJunchao Zhang 
525eff715bbSJunchao Zhang     if (is_omp_master) {
526eff715bbSJunchao Zhang       PetscOmpCtrlOmpRegionOnMasterBegin(ctrl);
527eff715bbSJunchao Zhang       Call the library using OpenMP
528eff715bbSJunchao Zhang       PetscOmpCtrlOmpRegionOnMasterEnd(ctrl);
529eff715bbSJunchao Zhang     }
530eff715bbSJunchao Zhang     PetscOmpCtrlBarrier(ctrl);
531eff715bbSJunchao Zhang 
532eff715bbSJunchao Zhang     Level: developer
533eff715bbSJunchao Zhang 
534db781477SPatrick Sanan .seealso `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`
535eff715bbSJunchao Zhang @*/
5369371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) {
5372da392ccSBarry Smith   int err;
538a32e93adSJunchao Zhang 
539a32e93adSJunchao Zhang   PetscFunctionBegin;
5402da392ccSBarry Smith   err = pthread_barrier_wait(ctrl->barrier);
54139619372SPierre Jolivet   PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err);
542a32e93adSJunchao Zhang   PetscFunctionReturn(0);
543a32e93adSJunchao Zhang }
544a32e93adSJunchao Zhang 
545eff715bbSJunchao Zhang /*@C
546eff715bbSJunchao Zhang     PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks
547eff715bbSJunchao Zhang 
548eff715bbSJunchao Zhang     Input Parameter:
5498fcaa860SBarry Smith .   ctrl - a PETSc OMP controller
550eff715bbSJunchao Zhang 
551eff715bbSJunchao Zhang     Notes:
5528fcaa860SBarry Smith     Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank.
553eff715bbSJunchao Zhang     This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime
554eff715bbSJunchao Zhang 
555eff715bbSJunchao Zhang     Level: developer
556eff715bbSJunchao Zhang 
557db781477SPatrick Sanan .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`
558eff715bbSJunchao Zhang @*/
5599371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) {
560a32e93adSJunchao Zhang   PetscFunctionBegin;
5619566063dSJacob Faibussowitsch   PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS));
562eff715bbSJunchao Zhang   omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */
563a32e93adSJunchao Zhang   PetscFunctionReturn(0);
564a32e93adSJunchao Zhang }
565a32e93adSJunchao Zhang 
566eff715bbSJunchao Zhang /*@C
567eff715bbSJunchao Zhang    PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks
568eff715bbSJunchao Zhang 
569eff715bbSJunchao Zhang    Input Parameter:
5708fcaa860SBarry Smith .  ctrl - a PETSc OMP controller
571eff715bbSJunchao Zhang 
572eff715bbSJunchao Zhang    Notes:
5738fcaa860SBarry Smith    Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank.
574eff715bbSJunchao Zhang    This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1.
575eff715bbSJunchao Zhang 
576eff715bbSJunchao Zhang    Level: developer
577eff715bbSJunchao Zhang 
578db781477SPatrick Sanan .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`
579eff715bbSJunchao Zhang @*/
5809371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) {
581a32e93adSJunchao Zhang   PetscFunctionBegin;
5829566063dSJacob Faibussowitsch   PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS));
583eff715bbSJunchao Zhang   omp_set_num_threads(1);
584a32e93adSJunchao Zhang   PetscFunctionReturn(0);
585a32e93adSJunchao Zhang }
586a32e93adSJunchao Zhang 
5874df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY
58820b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */
589