xref: /petsc/src/sys/utils/mpishm.c (revision eff715bbf1a92b57a39590b123ea1f6a46dc3c94)
15f7487a0SJunchao Zhang #include <petscsys.h>        /*I  "petscsys.h"  I*/
25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h>
35f7487a0SJunchao Zhang 
45f7487a0SJunchao Zhang struct _n_PetscShmComm {
55f7487a0SJunchao Zhang   PetscMPIInt *globranks;       /* global ranks of each rank in the shared memory communicator */
65f7487a0SJunchao Zhang   PetscMPIInt shmsize;          /* size of the shared memory communicator */
75f7487a0SJunchao Zhang   MPI_Comm    globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
85f7487a0SJunchao Zhang };
95f7487a0SJunchao Zhang 
105f7487a0SJunchao Zhang /*
115f7487a0SJunchao Zhang    Private routine to delete internal tag/name shared memory communicator when a communicator is freed.
125f7487a0SJunchao Zhang 
135f7487a0SJunchao Zhang    This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this  data as an attribute is freed.
145f7487a0SJunchao Zhang 
155f7487a0SJunchao Zhang    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
165f7487a0SJunchao Zhang 
175f7487a0SJunchao Zhang */
185f7487a0SJunchao Zhang PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelComm_Shm(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state)
195f7487a0SJunchao Zhang {
205f7487a0SJunchao Zhang   PetscErrorCode  ierr;
215f7487a0SJunchao Zhang   PetscShmComm p = (PetscShmComm)val;
225f7487a0SJunchao Zhang 
235f7487a0SJunchao Zhang   PetscFunctionBegin;
245f7487a0SJunchao Zhang   ierr = PetscInfo1(0,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
255f7487a0SJunchao Zhang   ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr);
265f7487a0SJunchao Zhang   ierr = PetscFree(p->globranks);CHKERRMPI(ierr);
275f7487a0SJunchao Zhang   ierr = PetscFree(val);CHKERRMPI(ierr);
285f7487a0SJunchao Zhang   PetscFunctionReturn(MPI_SUCCESS);
295f7487a0SJunchao Zhang }
305f7487a0SJunchao Zhang 
315f7487a0SJunchao Zhang /*@C
325f7487a0SJunchao Zhang     PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory
335f7487a0SJunchao Zhang 
345f7487a0SJunchao Zhang 
355f7487a0SJunchao Zhang     Collective on comm.
365f7487a0SJunchao Zhang 
375f7487a0SJunchao Zhang     Input Parameter:
385f7487a0SJunchao Zhang .   globcomm - MPI_Comm
395f7487a0SJunchao Zhang 
405f7487a0SJunchao Zhang     Output Parameter:
415f7487a0SJunchao Zhang .   pshmcomm - the PETSc shared memory communicator object
425f7487a0SJunchao Zhang 
435f7487a0SJunchao Zhang     Level: developer
445f7487a0SJunchao Zhang 
455f7487a0SJunchao Zhang     Notes:
465f7487a0SJunchao Zhang     This should be called only with an PetscCommDuplicate() communictor
475f7487a0SJunchao Zhang 
485f7487a0SJunchao Zhang            When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis
495f7487a0SJunchao Zhang 
505f7487a0SJunchao Zhang     Concepts: MPI subcomm^numbering
515f7487a0SJunchao Zhang 
525f7487a0SJunchao Zhang @*/
535f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm)
545f7487a0SJunchao Zhang {
555f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
565f7487a0SJunchao Zhang   PetscErrorCode   ierr;
575f7487a0SJunchao Zhang   MPI_Group        globgroup,shmgroup;
585f7487a0SJunchao Zhang   PetscMPIInt      *shmranks,i,flg;
595f7487a0SJunchao Zhang   PetscCommCounter *counter;
605f7487a0SJunchao Zhang 
615f7487a0SJunchao Zhang   PetscFunctionBegin;
625f7487a0SJunchao Zhang   ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr);
635f7487a0SJunchao Zhang   if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator");
645f7487a0SJunchao Zhang 
655f7487a0SJunchao Zhang   ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRQ(ierr);
665f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
675f7487a0SJunchao Zhang 
685f7487a0SJunchao Zhang   ierr        = PetscNew(pshmcomm);CHKERRQ(ierr);
695f7487a0SJunchao Zhang   (*pshmcomm)->globcomm = globcomm;
705f7487a0SJunchao Zhang 
715f7487a0SJunchao Zhang   ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRQ(ierr);
725f7487a0SJunchao Zhang 
735f7487a0SJunchao Zhang   ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRQ(ierr);
745f7487a0SJunchao Zhang   ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRQ(ierr);
755f7487a0SJunchao Zhang   ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRQ(ierr);
765f7487a0SJunchao Zhang   ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr);
775f7487a0SJunchao Zhang   ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr);
785f7487a0SJunchao Zhang   for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i;
795f7487a0SJunchao Zhang   ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRQ(ierr);
805f7487a0SJunchao Zhang   ierr = PetscFree(shmranks);CHKERRQ(ierr);
815f7487a0SJunchao Zhang   ierr = MPI_Group_free(&globgroup);CHKERRQ(ierr);
825f7487a0SJunchao Zhang   ierr = MPI_Group_free(&shmgroup);CHKERRQ(ierr);
835f7487a0SJunchao Zhang 
845f7487a0SJunchao Zhang   for (i=0; i<(*pshmcomm)->shmsize; i++) {
855f7487a0SJunchao Zhang     ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr);
865f7487a0SJunchao Zhang   }
875f7487a0SJunchao Zhang   ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRQ(ierr);
885f7487a0SJunchao Zhang   PetscFunctionReturn(0);
895f7487a0SJunchao Zhang #else
905f7487a0SJunchao Zhang   SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
915f7487a0SJunchao Zhang #endif
925f7487a0SJunchao Zhang }
935f7487a0SJunchao Zhang 
945f7487a0SJunchao Zhang /*@C
955f7487a0SJunchao Zhang     PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
965f7487a0SJunchao Zhang 
975f7487a0SJunchao Zhang     Input Parameters:
985f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
995f7487a0SJunchao Zhang -   grank    - the global rank
1005f7487a0SJunchao Zhang 
1015f7487a0SJunchao Zhang     Output Parameter:
1025f7487a0SJunchao Zhang .   lrank - the local rank, or MPI_PROC_NULL if it does not exist
1035f7487a0SJunchao Zhang 
1045f7487a0SJunchao Zhang     Level: developer
1055f7487a0SJunchao Zhang 
1065f7487a0SJunchao Zhang     Developer Notes:
1075f7487a0SJunchao Zhang     Assumes the pshmcomm->globranks[] is sorted
1085f7487a0SJunchao Zhang 
1095f7487a0SJunchao Zhang     It may be better to rewrite this to map multiple global ranks to local in the same function call
1105f7487a0SJunchao Zhang 
1115f7487a0SJunchao Zhang     Concepts: MPI subcomm^numbering
1125f7487a0SJunchao Zhang 
1135f7487a0SJunchao Zhang @*/
1145f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank)
1155f7487a0SJunchao Zhang {
1165f7487a0SJunchao Zhang   PetscMPIInt    low,high,t,i;
1175f7487a0SJunchao Zhang   PetscBool      flg = PETSC_FALSE;
1185f7487a0SJunchao Zhang   PetscErrorCode ierr;
1195f7487a0SJunchao Zhang 
1205f7487a0SJunchao Zhang   PetscFunctionBegin;
1215f7487a0SJunchao Zhang   *lrank = MPI_PROC_NULL;
1225f7487a0SJunchao Zhang   if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0);
1235f7487a0SJunchao Zhang   if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0);
1245f7487a0SJunchao Zhang   ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr);
1255f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
1265f7487a0SJunchao Zhang   low  = 0;
1275f7487a0SJunchao Zhang   high = pshmcomm->shmsize;
1285f7487a0SJunchao Zhang   while (high-low > 5) {
1295f7487a0SJunchao Zhang     t = (low+high)/2;
1305f7487a0SJunchao Zhang     if (pshmcomm->globranks[t] > grank) high = t;
1315f7487a0SJunchao Zhang     else low = t;
1325f7487a0SJunchao Zhang   }
1335f7487a0SJunchao Zhang   for (i=low; i<high; i++) {
1345f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0);
1355f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] == grank) {
1365f7487a0SJunchao Zhang       *lrank = i;
1375f7487a0SJunchao Zhang       PetscFunctionReturn(0);
1385f7487a0SJunchao Zhang     }
1395f7487a0SJunchao Zhang   }
1405f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1415f7487a0SJunchao Zhang }
1425f7487a0SJunchao Zhang 
1435f7487a0SJunchao Zhang /*@C
1445f7487a0SJunchao Zhang     PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
1455f7487a0SJunchao Zhang 
1465f7487a0SJunchao Zhang     Input Parameters:
1475f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
1485f7487a0SJunchao Zhang -   lrank    - the local rank in the shared memory communicator
1495f7487a0SJunchao Zhang 
1505f7487a0SJunchao Zhang     Output Parameter:
1515f7487a0SJunchao Zhang .   grank - the global rank in the global communicator where the shared memory communicator is built
1525f7487a0SJunchao Zhang 
1535f7487a0SJunchao Zhang     Level: developer
1545f7487a0SJunchao Zhang 
1555f7487a0SJunchao Zhang     Concepts: MPI subcomm^numbering
1565f7487a0SJunchao Zhang @*/
1575f7487a0SJunchao Zhang PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank)
1585f7487a0SJunchao Zhang {
1595f7487a0SJunchao Zhang   PetscFunctionBegin;
1605f7487a0SJunchao Zhang #ifdef PETSC_USE_DEBUG
1615f7487a0SJunchao Zhang   {
1625f7487a0SJunchao Zhang     PetscErrorCode ierr;
1635f7487a0SJunchao Zhang     if (lrank < 0 || lrank >= pshmcomm->shmsize) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank);CHKERRQ(ierr); }
1645f7487a0SJunchao Zhang   }
1655f7487a0SJunchao Zhang #endif
1665f7487a0SJunchao Zhang   *grank = pshmcomm->globranks[lrank];
1675f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1685f7487a0SJunchao Zhang }
1695f7487a0SJunchao Zhang 
1705f7487a0SJunchao Zhang /*@C
1715f7487a0SJunchao Zhang     PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
1725f7487a0SJunchao Zhang 
1735f7487a0SJunchao Zhang     Input Parameter:
1745f7487a0SJunchao Zhang .   pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
1755f7487a0SJunchao Zhang 
1765f7487a0SJunchao Zhang     Output Parameter:
1775f7487a0SJunchao Zhang .   comm     - the MPI communicator
1785f7487a0SJunchao Zhang 
1795f7487a0SJunchao Zhang     Level: developer
1805f7487a0SJunchao Zhang 
1815f7487a0SJunchao Zhang @*/
1825f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm)
1835f7487a0SJunchao Zhang {
1845f7487a0SJunchao Zhang   PetscFunctionBegin;
1855f7487a0SJunchao Zhang   *comm = pshmcomm->shmcomm;
1865f7487a0SJunchao Zhang   PetscFunctionReturn(0);
1875f7487a0SJunchao Zhang }
1885f7487a0SJunchao Zhang 
189*eff715bbSJunchao Zhang #if defined(PETSC_HAVE_OPENMP) && defined(PETSC_HAVE_PTHREAD) && defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) && defined(PETSC_HAVE_HWLOC)
190a32e93adSJunchao Zhang #include <pthread.h>
191a32e93adSJunchao Zhang #include <hwloc.h>
192a32e93adSJunchao Zhang #include <omp.h>
193a32e93adSJunchao Zhang 
194*eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available,
195*eff715bbSJunchao Zhang    otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much
1964df5c2c7SJunchao Zhang    simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance
1974df5c2c7SJunchao Zhang    by 50%. Until the reason is found out, we use mmap() instead.
1984df5c2c7SJunchao Zhang */
1994df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY
2004df5c2c7SJunchao Zhang 
2014df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2024df5c2c7SJunchao Zhang #include <sys/mman.h>
2034df5c2c7SJunchao Zhang #include <sys/types.h>
2044df5c2c7SJunchao Zhang #include <sys/stat.h>
2054df5c2c7SJunchao Zhang #include <fcntl.h>
2064df5c2c7SJunchao Zhang #endif
2074df5c2c7SJunchao Zhang 
208a32e93adSJunchao Zhang struct _n_PetscOmpCtrl {
209a32e93adSJunchao Zhang   MPI_Comm          omp_comm;        /* a shared memory communicator to spawn omp threads */
210a32e93adSJunchao Zhang   MPI_Comm          omp_master_comm; /* a communicator to give to third party libraries */
211a32e93adSJunchao Zhang   PetscMPIInt       omp_comm_size;   /* size of omp_comm, a kind of OMP_NUM_THREADS */
212a32e93adSJunchao Zhang   PetscBool         is_omp_master;   /* rank 0's in omp_comm */
213a32e93adSJunchao Zhang   MPI_Win           omp_win;         /* a shared memory window containing a barrier */
214a32e93adSJunchao Zhang   pthread_barrier_t *barrier;        /* pointer to the barrier */
215a32e93adSJunchao Zhang   hwloc_topology_t  topology;
216a32e93adSJunchao Zhang   hwloc_cpuset_t    cpuset;          /* cpu bindings of omp master */
217a32e93adSJunchao Zhang   hwloc_cpuset_t    omp_cpuset;      /* union of cpu bindings of ranks in omp_comm */
218a32e93adSJunchao Zhang };
219a32e93adSJunchao Zhang 
2204df5c2c7SJunchao Zhang 
221*eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm
222*eff715bbSJunchao Zhang    contained by the controler.
223*eff715bbSJunchao Zhang 
224*eff715bbSJunchao Zhang    PETSc OpenMP controler users do not call this function directly. This function exists
225*eff715bbSJunchao Zhang    only because we want to separate shared memory allocation methods from other code.
226*eff715bbSJunchao Zhang  */
227a32e93adSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl)
228a32e93adSJunchao Zhang {
229a32e93adSJunchao Zhang   PetscErrorCode        ierr;
230a32e93adSJunchao Zhang   MPI_Aint              size;
231a32e93adSJunchao Zhang   void                  *baseptr;
232a32e93adSJunchao Zhang   pthread_barrierattr_t  attr;
233a32e93adSJunchao Zhang 
2344df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2354df5c2c7SJunchao Zhang   PetscInt              fd;
2364df5c2c7SJunchao Zhang   PetscChar             pathname[PETSC_MAX_PATH_LEN];
2374df5c2c7SJunchao Zhang #else
2384df5c2c7SJunchao Zhang   PetscMPIInt           disp_unit;
2394df5c2c7SJunchao Zhang #endif
2404df5c2c7SJunchao Zhang 
2414df5c2c7SJunchao Zhang   PetscFunctionBegin;
2424df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2434df5c2c7SJunchao Zhang   size = sizeof(pthread_barrier_t);
2444df5c2c7SJunchao Zhang   if (ctrl->is_omp_master) {
245*eff715bbSJunchao Zhang     /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */
2464df5c2c7SJunchao Zhang     ierr    = PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
2474df5c2c7SJunchao Zhang     ierr    = PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
2484df5c2c7SJunchao Zhang     /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */
2494df5c2c7SJunchao Zhang     fd      = mkstemp(pathname); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname);
2504df5c2c7SJunchao Zhang     ierr    = ftruncate(fd,size);CHKERRQ(ierr);
2514df5c2c7SJunchao Zhang     baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n");
2524df5c2c7SJunchao Zhang     ierr    = close(fd);CHKERRQ(ierr);
2534df5c2c7SJunchao Zhang     ierr    = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr);
254*eff715bbSJunchao Zhang     /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */
2554df5c2c7SJunchao Zhang     ierr    = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr);
2564df5c2c7SJunchao Zhang     ierr    = unlink(pathname);CHKERRQ(ierr);
2574df5c2c7SJunchao Zhang   } else {
2584df5c2c7SJunchao Zhang     ierr    = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr);
2594df5c2c7SJunchao Zhang     fd      = open(pathname,O_RDWR); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname);
2604df5c2c7SJunchao Zhang     baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n");
2614df5c2c7SJunchao Zhang     ierr    = close(fd);CHKERRQ(ierr);
2624df5c2c7SJunchao Zhang     ierr    = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr);
2634df5c2c7SJunchao Zhang   }
2644df5c2c7SJunchao Zhang #else
265a32e93adSJunchao Zhang   size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0;
266a32e93adSJunchao Zhang   ierr = MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);CHKERRQ(ierr);
267a32e93adSJunchao Zhang   ierr = MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);CHKERRQ(ierr);
2684df5c2c7SJunchao Zhang #endif
269a32e93adSJunchao Zhang   ctrl->barrier = (pthread_barrier_t*)baseptr;
270a32e93adSJunchao Zhang 
271a32e93adSJunchao Zhang   /* omp master initializes the barrier */
272a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
273a32e93adSJunchao Zhang     ierr = MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);CHKERRQ(ierr);
274a32e93adSJunchao Zhang     ierr = pthread_barrierattr_init(&attr);CHKERRQ(ierr);
275a32e93adSJunchao Zhang     ierr = pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED);CHKERRQ(ierr); /* make the barrier also work for processes */
276a32e93adSJunchao Zhang     ierr = pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);CHKERRQ(ierr);
277a32e93adSJunchao Zhang     ierr = pthread_barrierattr_destroy(&attr);CHKERRQ(ierr);
278a32e93adSJunchao Zhang   }
279a32e93adSJunchao Zhang 
2804df5c2c7SJunchao Zhang   /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */
2814df5c2c7SJunchao Zhang   ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr);
282a32e93adSJunchao Zhang   PetscFunctionReturn(0);
283a32e93adSJunchao Zhang }
284a32e93adSJunchao Zhang 
285*eff715bbSJunchao Zhang /* Destroy the pthread barrier in the PETSc OpenMP controler */
286a32e93adSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl)
287a32e93adSJunchao Zhang {
288a32e93adSJunchao Zhang   PetscErrorCode ierr;
289a32e93adSJunchao Zhang 
2904df5c2c7SJunchao Zhang   PetscFunctionBegin;
2914df5c2c7SJunchao Zhang   /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */
292a32e93adSJunchao Zhang   ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr);
293a32e93adSJunchao Zhang   if (ctrl->is_omp_master) { ierr = pthread_barrier_destroy(ctrl->barrier);CHKERRQ(ierr); }
2944df5c2c7SJunchao Zhang 
2954df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
2964df5c2c7SJunchao Zhang   ierr = munmap(ctrl->barrier,sizeof(pthread_barrier_t));CHKERRQ(ierr);
2974df5c2c7SJunchao Zhang #else
298a32e93adSJunchao Zhang   ierr = MPI_Win_free(&ctrl->omp_win);CHKERRQ(ierr);
2994df5c2c7SJunchao Zhang #endif
300a32e93adSJunchao Zhang   PetscFunctionReturn(0);
301a32e93adSJunchao Zhang }
302a32e93adSJunchao Zhang 
303*eff715bbSJunchao Zhang /*@C
304*eff715bbSJunchao Zhang     PetscOmpCtrlCreate - create a PETSc OpenMP controler, which manages PETSc's interaction with third party libraries using OpenMP
305*eff715bbSJunchao Zhang 
306*eff715bbSJunchao Zhang     Input Parameter:
307*eff715bbSJunchao Zhang +   petsc_comm - a communicator some PETSc object (for example, a matrix) lives in
308*eff715bbSJunchao Zhang .   nthreads   - number of threads per MPI rank to spawn in a library using OpenMP
309*eff715bbSJunchao Zhang 
310*eff715bbSJunchao Zhang     Output Parameter:
311*eff715bbSJunchao Zhang .   pctrl      - a PETSc OpenMP controler
312*eff715bbSJunchao Zhang 
313*eff715bbSJunchao Zhang     Level: developer
314*eff715bbSJunchao Zhang 
315*eff715bbSJunchao Zhang .seealso PetscOmpCtrlDestroy()
316*eff715bbSJunchao Zhang @*/
317a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl)
318a32e93adSJunchao Zhang {
319a32e93adSJunchao Zhang   PetscErrorCode        ierr;
320a32e93adSJunchao Zhang   PetscOmpCtrl          ctrl;
321a32e93adSJunchao Zhang   unsigned long         *cpu_ulongs=NULL;
322a32e93adSJunchao Zhang   PetscInt              i,nr_cpu_ulongs;
323a32e93adSJunchao Zhang   PetscShmComm          pshmcomm;
324a32e93adSJunchao Zhang   MPI_Comm              shm_comm;
325a32e93adSJunchao Zhang   PetscMPIInt           shm_rank,shm_comm_size,omp_rank,color;
326a32e93adSJunchao Zhang 
327a32e93adSJunchao Zhang   PetscFunctionBegin;
328a32e93adSJunchao Zhang   ierr = PetscNew(&ctrl);CHKERRQ(ierr);
329a32e93adSJunchao Zhang 
330a32e93adSJunchao Zhang   /*=================================================================================
331a32e93adSJunchao Zhang     Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to
332a32e93adSJunchao Zhang     physically shared memory. Rank 0 of each omp_comm is called an OMP master, and
333a32e93adSJunchao Zhang     others are called slaves. OMP Masters make up a new comm called omp_master_comm,
334a32e93adSJunchao Zhang     which is usually passed to third party libraries.
335a32e93adSJunchao Zhang    ==================================================================================*/
336a32e93adSJunchao Zhang 
337a32e93adSJunchao Zhang   /* fetch the stored shared memory communicator */
338a32e93adSJunchao Zhang   ierr = PetscShmCommGet(petsc_comm,&pshmcomm);CHKERRQ(ierr);
339a32e93adSJunchao Zhang   ierr = PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);CHKERRQ(ierr);
340a32e93adSJunchao Zhang 
341a32e93adSJunchao Zhang   ierr = MPI_Comm_rank(shm_comm,&shm_rank);CHKERRQ(ierr);
342a32e93adSJunchao Zhang   ierr = MPI_Comm_size(shm_comm,&shm_comm_size);CHKERRQ(ierr);
343a32e93adSJunchao Zhang 
344a32e93adSJunchao Zhang   if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %d can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size);
345a32e93adSJunchao Zhang   if (shm_comm_size % nthreads) { ierr = PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %d is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);CHKERRQ(ierr); }
346a32e93adSJunchao Zhang 
347a32e93adSJunchao Zhang   /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if
348a32e93adSJunchao Zhang      shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get
349a32e93adSJunchao Zhang      color 1. They are put in two omp_comms. Note that petsc_ranks may or may not
350a32e93adSJunchao Zhang      be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1.
351a32e93adSJunchao Zhang      Use 0 as key so that rank ordering wont change in new comm.
352a32e93adSJunchao Zhang    */
353a32e93adSJunchao Zhang   color = shm_rank / nthreads;
3543ab56b82SJunchao Zhang   ierr  = MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);CHKERRQ(ierr);
355a32e93adSJunchao Zhang 
356a32e93adSJunchao Zhang   /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */
357a32e93adSJunchao Zhang   ierr = MPI_Comm_rank(ctrl->omp_comm,&omp_rank);CHKERRQ(ierr);
358a32e93adSJunchao Zhang   if (!omp_rank) {
359a32e93adSJunchao Zhang     ctrl->is_omp_master = PETSC_TRUE;  /* master */
360a32e93adSJunchao Zhang     color = 0;
361a32e93adSJunchao Zhang   } else {
362a32e93adSJunchao Zhang     ctrl->is_omp_master = PETSC_FALSE; /* slave */
363a32e93adSJunchao Zhang     color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */
364a32e93adSJunchao Zhang   }
3653ab56b82SJunchao Zhang   ierr = MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);CHKERRQ(ierr); /* rank 0 in omp_master_comm is rank 0 in petsc_comm */
366a32e93adSJunchao Zhang 
367a32e93adSJunchao Zhang   /*=================================================================================
368a32e93adSJunchao Zhang     Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put
369a32e93adSJunchao Zhang     slave ranks in sleep and idle their CPU, so that the master can fork OMP threads
370a32e93adSJunchao Zhang     and run them on the idle CPUs.
371a32e93adSJunchao Zhang    ==================================================================================*/
372a32e93adSJunchao Zhang   ierr = PetscOmpCtrlCreateBarrier(ctrl);CHKERRQ(ierr);
373a32e93adSJunchao Zhang 
374a32e93adSJunchao Zhang   /*=================================================================================
375a32e93adSJunchao Zhang     omp master logs its cpu binding (i.e., cpu set) and computes a new binding that
376a32e93adSJunchao Zhang     is the union of the bindings of all ranks in the omp_comm
377a32e93adSJunchao Zhang     =================================================================================*/
378a32e93adSJunchao Zhang   ierr = hwloc_topology_init(&ctrl->topology);CHKERRQ(ierr);
379a32e93adSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
380a32e93adSJunchao Zhang   /* to filter out unneeded info and have faster hwloc_topology_load */
381a32e93adSJunchao Zhang   ierr = hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);CHKERRQ(ierr);
382a32e93adSJunchao Zhang   ierr = hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);CHKERRQ(ierr);
383a32e93adSJunchao Zhang #endif
384a32e93adSJunchao Zhang   ierr = hwloc_topology_load(ctrl->topology);CHKERRQ(ierr);
385a32e93adSJunchao Zhang 
3863ab56b82SJunchao Zhang   ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n");
387a32e93adSJunchao Zhang   ierr = hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr);
388a32e93adSJunchao Zhang 
3893ab56b82SJunchao Zhang   /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */
390a32e93adSJunchao Zhang   nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8;
391a32e93adSJunchao Zhang   ierr = PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);CHKERRQ(ierr);
392a32e93adSJunchao Zhang   if (nr_cpu_ulongs == 1) {
393a32e93adSJunchao Zhang     cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset);
394a32e93adSJunchao Zhang   } else {
395a32e93adSJunchao Zhang     for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i);
396a32e93adSJunchao Zhang   }
397a32e93adSJunchao Zhang 
398a32e93adSJunchao Zhang   ierr = MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);CHKERRQ(ierr);
399a32e93adSJunchao Zhang 
400a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
4013ab56b82SJunchao Zhang     ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n");
402a32e93adSJunchao Zhang     if (nr_cpu_ulongs == 1) {
4033ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
404a32e93adSJunchao Zhang       ierr = hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);CHKERRQ(ierr);
4053ab56b82SJunchao Zhang #else
4063ab56b82SJunchao Zhang       hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);
4073ab56b82SJunchao Zhang #endif
408a32e93adSJunchao Zhang     } else {
4093ab56b82SJunchao Zhang       for (i=0; i<nr_cpu_ulongs; i++)  {
4103ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000
4113ab56b82SJunchao Zhang         ierr = hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);CHKERRQ(ierr);
4123ab56b82SJunchao Zhang #else
4133ab56b82SJunchao Zhang         hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);
4143ab56b82SJunchao Zhang #endif
4153ab56b82SJunchao Zhang       }
416a32e93adSJunchao Zhang     }
417a32e93adSJunchao Zhang   }
418a32e93adSJunchao Zhang 
419a32e93adSJunchao Zhang   ierr = PetscFree(cpu_ulongs);CHKERRQ(ierr);
420a32e93adSJunchao Zhang   *pctrl = ctrl;
421a32e93adSJunchao Zhang   PetscFunctionReturn(0);
422a32e93adSJunchao Zhang }
423a32e93adSJunchao Zhang 
424*eff715bbSJunchao Zhang /*@C
425*eff715bbSJunchao Zhang     PetscOmpCtrlDestroy - destory the PETSc OpenMP controler
426*eff715bbSJunchao Zhang 
427*eff715bbSJunchao Zhang     Input Parameter:
428*eff715bbSJunchao Zhang .   pctrl  - a PETSc OpenMP controler
429*eff715bbSJunchao Zhang 
430*eff715bbSJunchao Zhang     Level: developer
431*eff715bbSJunchao Zhang 
432*eff715bbSJunchao Zhang .seealso PetscOmpCtrlCreate()
433*eff715bbSJunchao Zhang @*/
434a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl)
435a32e93adSJunchao Zhang {
436a32e93adSJunchao Zhang   PetscErrorCode  ierr;
437a32e93adSJunchao Zhang   PetscOmpCtrl    ctrl = *pctrl;
438a32e93adSJunchao Zhang 
439a32e93adSJunchao Zhang   PetscFunctionBegin;
440a32e93adSJunchao Zhang   hwloc_bitmap_free(ctrl->cpuset);
441a32e93adSJunchao Zhang   hwloc_topology_destroy(ctrl->topology);
442a32e93adSJunchao Zhang   PetscOmpCtrlDestroyBarrier(ctrl);
443a32e93adSJunchao Zhang   ierr = MPI_Comm_free(&ctrl->omp_comm);CHKERRQ(ierr);
444a32e93adSJunchao Zhang   if (ctrl->is_omp_master) {
445a32e93adSJunchao Zhang     hwloc_bitmap_free(ctrl->omp_cpuset);
446a32e93adSJunchao Zhang     ierr = MPI_Comm_free(&ctrl->omp_master_comm);CHKERRQ(ierr);
447a32e93adSJunchao Zhang   }
448a32e93adSJunchao Zhang   ierr = PetscFree(ctrl);CHKERRQ(ierr);
449a32e93adSJunchao Zhang   PetscFunctionReturn(0);
450a32e93adSJunchao Zhang }
451a32e93adSJunchao Zhang 
452a32e93adSJunchao Zhang /*@C
453*eff715bbSJunchao Zhang     PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controler
454a32e93adSJunchao Zhang 
455a32e93adSJunchao Zhang     Input Parameter:
456*eff715bbSJunchao Zhang .   ctrl - a PETSc OMP controler
457a32e93adSJunchao Zhang 
458a32e93adSJunchao Zhang     Output Parameter:
459*eff715bbSJunchao Zhang +   omp_comm         - a communicator that includes a master rank and slave ranks where master spawns threads
460a32e93adSJunchao Zhang .   omp_master_comm  - on master ranks, return a communicator that include master ranks of each omp_comm;
461a32e93adSJunchao Zhang                        on slave ranks, MPI_COMM_NULL will be return in reality.
462a32e93adSJunchao Zhang -   is_omp_master    - true if the calling process is an OMP master rank.
463a32e93adSJunchao Zhang 
464*eff715bbSJunchao Zhang     Notes: any output parameter can be NULL. The parameter is just ignored.
465*eff715bbSJunchao Zhang 
466a32e93adSJunchao Zhang     Level: developer
467a32e93adSJunchao Zhang @*/
468a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master)
469a32e93adSJunchao Zhang {
470a32e93adSJunchao Zhang   PetscFunctionBegin;
471a32e93adSJunchao Zhang   if (omp_comm)        *omp_comm        = ctrl->omp_comm;
472a32e93adSJunchao Zhang   if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm;
473a32e93adSJunchao Zhang   if (is_omp_master)   *is_omp_master   = ctrl->is_omp_master;
474a32e93adSJunchao Zhang   PetscFunctionReturn(0);
475a32e93adSJunchao Zhang }
476a32e93adSJunchao Zhang 
477*eff715bbSJunchao Zhang /*@C
478*eff715bbSJunchao Zhang     PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controler (to let slave ranks free their CPU)
479*eff715bbSJunchao Zhang 
480*eff715bbSJunchao Zhang     Input Parameter:
481*eff715bbSJunchao Zhang .   ctrl - a PETSc OMP controler
482*eff715bbSJunchao Zhang 
483*eff715bbSJunchao Zhang     Notes:
484*eff715bbSJunchao Zhang     this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not
485*eff715bbSJunchao Zhang     require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency,
486*eff715bbSJunchao Zhang     MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement.
487*eff715bbSJunchao Zhang 
488*eff715bbSJunchao Zhang     A code using PetscOmpCtrlBarrier() would be like this,
489*eff715bbSJunchao Zhang 
490*eff715bbSJunchao Zhang     if (is_omp_master) {
491*eff715bbSJunchao Zhang       PetscOmpCtrlOmpRegionOnMasterBegin(ctrl);
492*eff715bbSJunchao Zhang       Call the library using OpenMP
493*eff715bbSJunchao Zhang       PetscOmpCtrlOmpRegionOnMasterEnd(ctrl);
494*eff715bbSJunchao Zhang     }
495*eff715bbSJunchao Zhang     PetscOmpCtrlBarrier(ctrl);
496*eff715bbSJunchao Zhang 
497*eff715bbSJunchao Zhang     Level: developer
498*eff715bbSJunchao Zhang 
499*eff715bbSJunchao Zhang .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd()
500*eff715bbSJunchao Zhang @*/
501a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl)
502a32e93adSJunchao Zhang {
503a32e93adSJunchao Zhang   PetscErrorCode ierr;
504a32e93adSJunchao Zhang 
505a32e93adSJunchao Zhang   PetscFunctionBegin;
506a32e93adSJunchao Zhang   ierr = pthread_barrier_wait(ctrl->barrier);
507a32e93adSJunchao Zhang   if (ierr && ierr != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", ierr);
508a32e93adSJunchao Zhang   PetscFunctionReturn(0);
509a32e93adSJunchao Zhang }
510a32e93adSJunchao Zhang 
511*eff715bbSJunchao Zhang /*@C
512*eff715bbSJunchao Zhang     PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks
513*eff715bbSJunchao Zhang 
514*eff715bbSJunchao Zhang     Input Parameter:
515*eff715bbSJunchao Zhang .   ctrl - a PETSc OMP controler
516*eff715bbSJunchao Zhang 
517*eff715bbSJunchao Zhang     Notes:
518*eff715bbSJunchao Zhang     Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms to know if this is a master rank.
519*eff715bbSJunchao Zhang     This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime
520*eff715bbSJunchao Zhang 
521*eff715bbSJunchao Zhang     Level: developer
522*eff715bbSJunchao Zhang 
523*eff715bbSJunchao Zhang .seealso: PetscOmpCtrlOmpRegionOnMasterEnd()
524*eff715bbSJunchao Zhang @*/
525a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl)
526a32e93adSJunchao Zhang {
527a32e93adSJunchao Zhang   PetscErrorCode ierr;
528a32e93adSJunchao Zhang 
529a32e93adSJunchao Zhang   PetscFunctionBegin;
530a32e93adSJunchao Zhang   ierr = hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr);
531*eff715bbSJunchao Zhang   omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */
532a32e93adSJunchao Zhang   PetscFunctionReturn(0);
533a32e93adSJunchao Zhang }
534a32e93adSJunchao Zhang 
535*eff715bbSJunchao Zhang /*@C
536*eff715bbSJunchao Zhang    PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks
537*eff715bbSJunchao Zhang 
538*eff715bbSJunchao Zhang    Input Parameter:
539*eff715bbSJunchao Zhang .  ctrl - a PETSc OMP controler
540*eff715bbSJunchao Zhang 
541*eff715bbSJunchao Zhang    Notes:
542*eff715bbSJunchao Zhang    Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms to know if this is a master rank.
543*eff715bbSJunchao Zhang    This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1.
544*eff715bbSJunchao Zhang 
545*eff715bbSJunchao Zhang    Level: developer
546*eff715bbSJunchao Zhang 
547*eff715bbSJunchao Zhang .seealso: PetscOmpCtrlOmpRegionOnMasterBegin()
548*eff715bbSJunchao Zhang @*/
549a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl)
550a32e93adSJunchao Zhang {
551a32e93adSJunchao Zhang   PetscErrorCode ierr;
552a32e93adSJunchao Zhang 
553a32e93adSJunchao Zhang   PetscFunctionBegin;
554a32e93adSJunchao Zhang   ierr = hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr);
555*eff715bbSJunchao Zhang   omp_set_num_threads(1);
556a32e93adSJunchao Zhang   PetscFunctionReturn(0);
557a32e93adSJunchao Zhang }
558a32e93adSJunchao Zhang 
5594df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY
560a32e93adSJunchao Zhang #endif /* defined(PETSC_HAVE_PTHREAD) && .. */
561