1ce78bad3SBarry Smith #include <petscsys.h> /*I "petscsys.h" I*/ 2ce78bad3SBarry Smith #include <petsc/private/petscimpl.h> 3ce78bad3SBarry Smith #include <pthread.h> 4ce78bad3SBarry Smith #include <hwloc.h> 5ce78bad3SBarry Smith #include <omp.h> 6ce78bad3SBarry Smith 7ce78bad3SBarry Smith /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 8ce78bad3SBarry Smith otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 9ce78bad3SBarry Smith simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 10ce78bad3SBarry Smith by 50%. Until the reason is found out, we use mmap() instead. 11ce78bad3SBarry Smith */ 12ce78bad3SBarry Smith #define USE_MMAP_ALLOCATE_SHARED_MEMORY 13ce78bad3SBarry Smith 14ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 15ce78bad3SBarry Smith #include <sys/mman.h> 16ce78bad3SBarry Smith #include <sys/types.h> 17ce78bad3SBarry Smith #include <sys/stat.h> 18ce78bad3SBarry Smith #include <fcntl.h> 19ce78bad3SBarry Smith #endif 20ce78bad3SBarry Smith 21ce78bad3SBarry Smith struct _n_PetscOmpCtrl { 22ce78bad3SBarry Smith MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 23ce78bad3SBarry Smith MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 24ce78bad3SBarry Smith PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 25ce78bad3SBarry Smith PetscBool is_omp_master; /* rank 0's in omp_comm */ 26ce78bad3SBarry Smith MPI_Win omp_win; /* a shared memory window containing a barrier */ 27ce78bad3SBarry Smith pthread_barrier_t *barrier; /* pointer to the barrier */ 28ce78bad3SBarry Smith hwloc_topology_t topology; 29ce78bad3SBarry Smith hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 30ce78bad3SBarry Smith hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 31ce78bad3SBarry Smith }; 32ce78bad3SBarry Smith 33ce78bad3SBarry Smith /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 34ce78bad3SBarry Smith contained by the controller. 35ce78bad3SBarry Smith 36ce78bad3SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 37ce78bad3SBarry Smith only because we want to separate shared memory allocation methods from other code. 38ce78bad3SBarry Smith */ 39ce78bad3SBarry Smith static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 40ce78bad3SBarry Smith { 41ce78bad3SBarry Smith MPI_Aint size; 42ce78bad3SBarry Smith void *baseptr; 43ce78bad3SBarry Smith pthread_barrierattr_t attr; 44ce78bad3SBarry Smith 45ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 46ce78bad3SBarry Smith int fd; 47ce78bad3SBarry Smith char pathname[PETSC_MAX_PATH_LEN]; 48ce78bad3SBarry Smith #else 49ce78bad3SBarry Smith PetscMPIInt disp_unit; 50ce78bad3SBarry Smith #endif 51ce78bad3SBarry Smith 52ce78bad3SBarry Smith PetscFunctionBegin; 53ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 54ce78bad3SBarry Smith size = sizeof(pthread_barrier_t); 55ce78bad3SBarry Smith if (ctrl->is_omp_master) { 56ce78bad3SBarry Smith /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 57ce78bad3SBarry Smith PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN)); 58ce78bad3SBarry Smith PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN)); 59ce78bad3SBarry Smith /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 60ce78bad3SBarry Smith fd = mkstemp(pathname); 61ce78bad3SBarry Smith PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname); 62ce78bad3SBarry Smith PetscCallExternal(ftruncate, fd, size); 63ce78bad3SBarry Smith baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 64ce78bad3SBarry Smith PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 65ce78bad3SBarry Smith PetscCallExternal(close, fd); 66ce78bad3SBarry Smith PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 67ce78bad3SBarry Smith /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 68ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 69ce78bad3SBarry Smith PetscCallExternal(unlink, pathname); 70ce78bad3SBarry Smith } else { 71ce78bad3SBarry Smith PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 72ce78bad3SBarry Smith fd = open(pathname, O_RDWR); 73ce78bad3SBarry Smith PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname); 74ce78bad3SBarry Smith baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 75ce78bad3SBarry Smith PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 76ce78bad3SBarry Smith PetscCallExternal(close, fd); 77ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 78ce78bad3SBarry Smith } 79ce78bad3SBarry Smith #else 80ce78bad3SBarry Smith size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 81ce78bad3SBarry Smith PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win)); 82ce78bad3SBarry Smith PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr)); 83ce78bad3SBarry Smith #endif 84ce78bad3SBarry Smith ctrl->barrier = (pthread_barrier_t *)baseptr; 85ce78bad3SBarry Smith 86ce78bad3SBarry Smith /* omp master initializes the barrier */ 87ce78bad3SBarry Smith if (ctrl->is_omp_master) { 88ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size)); 89ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_init, &attr); 90ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_setpshared, &attr, PTHREAD_PROCESS_SHARED); /* make the barrier also work for processes */ 91ce78bad3SBarry Smith PetscCallExternal(pthread_barrier_init, ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size); 92ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_destroy, &attr); 93ce78bad3SBarry Smith } 94ce78bad3SBarry Smith 95ce78bad3SBarry Smith /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 96ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 97ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 98ce78bad3SBarry Smith } 99ce78bad3SBarry Smith 100ce78bad3SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 101ce78bad3SBarry Smith static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 102ce78bad3SBarry Smith { 103ce78bad3SBarry Smith PetscFunctionBegin; 104ce78bad3SBarry Smith /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 105ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 106ce78bad3SBarry Smith if (ctrl->is_omp_master) PetscCallExternal(pthread_barrier_destroy, ctrl->barrier); 107ce78bad3SBarry Smith 108ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 109ce78bad3SBarry Smith PetscCallExternal(munmap, ctrl->barrier, sizeof(pthread_barrier_t)); 110ce78bad3SBarry Smith #else 111ce78bad3SBarry Smith PetscCallMPI(MPI_Win_free(&ctrl->omp_win)); 112ce78bad3SBarry Smith #endif 113ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 114ce78bad3SBarry Smith } 115ce78bad3SBarry Smith 116ce78bad3SBarry Smith /*@C 117ce78bad3SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries that use OpenMP 118ce78bad3SBarry Smith 119ce78bad3SBarry Smith Input Parameters: 120ce78bad3SBarry Smith + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 121ce78bad3SBarry Smith - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 122ce78bad3SBarry Smith 123ce78bad3SBarry Smith Output Parameter: 124ce78bad3SBarry Smith . pctrl - a PETSc OpenMP controller 125ce78bad3SBarry Smith 126ce78bad3SBarry Smith Level: developer 127ce78bad3SBarry Smith 128ce78bad3SBarry Smith Developer Note: 129ce78bad3SBarry Smith Possibly use the variable `PetscNumOMPThreads` to determine the number for threads to use 130ce78bad3SBarry Smith 131ce78bad3SBarry Smith .seealso: `PetscOmpCtrlDestroy()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 132ce78bad3SBarry Smith @*/ 133ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) 134ce78bad3SBarry Smith { 135ce78bad3SBarry Smith PetscOmpCtrl ctrl; 136ce78bad3SBarry Smith unsigned long *cpu_ulongs = NULL; 137ce78bad3SBarry Smith PetscShmComm pshmcomm; 138ce78bad3SBarry Smith MPI_Comm shm_comm; 139*d83f41f0SPierre Jolivet PetscMPIInt shm_rank, shm_comm_size, omp_rank, color, nr_cpu_ulongs; 140ce78bad3SBarry Smith PetscInt num_packages, num_cores; 141ce78bad3SBarry Smith 142ce78bad3SBarry Smith PetscFunctionBegin; 143ce78bad3SBarry Smith PetscCall(PetscNew(&ctrl)); 144ce78bad3SBarry Smith 145ce78bad3SBarry Smith /*================================================================================= 146ce78bad3SBarry Smith Init hwloc 147ce78bad3SBarry Smith ==================================================================================*/ 148ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_init, &ctrl->topology); 149ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000 150ce78bad3SBarry Smith /* to filter out unneeded info and have faster hwloc_topology_load */ 151ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_set_all_types_filter, ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE); 152ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_set_type_filter, ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL); 153ce78bad3SBarry Smith #endif 154ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_load, ctrl->topology); 155ce78bad3SBarry Smith 156ce78bad3SBarry Smith /*================================================================================= 157ce78bad3SBarry Smith Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 158ce78bad3SBarry Smith physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 159ce78bad3SBarry Smith others are called slaves. OMP Masters make up a new comm called omp_master_comm, 160ce78bad3SBarry Smith which is usually passed to third party libraries. 161ce78bad3SBarry Smith ==================================================================================*/ 162ce78bad3SBarry Smith 163ce78bad3SBarry Smith /* fetch the stored shared memory communicator */ 164ce78bad3SBarry Smith PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm)); 165ce78bad3SBarry Smith PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm)); 166ce78bad3SBarry Smith 167ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank)); 168ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size)); 169ce78bad3SBarry Smith 170ce78bad3SBarry Smith /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 171ce78bad3SBarry Smith if (nthreads == -1) { 172ce78bad3SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE); 173ce78bad3SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE); 174ce78bad3SBarry Smith nthreads = num_cores / num_packages; 175ce78bad3SBarry Smith if (nthreads > shm_comm_size) nthreads = shm_comm_size; 176ce78bad3SBarry Smith } 177ce78bad3SBarry Smith 178ce78bad3SBarry Smith PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size); 179ce78bad3SBarry Smith if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size)); 180ce78bad3SBarry Smith 181ce78bad3SBarry Smith /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 182ce78bad3SBarry Smith shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 183ce78bad3SBarry Smith color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 184ce78bad3SBarry Smith be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 185ce78bad3SBarry Smith Use 0 as key so that rank ordering wont change in new comm. 186ce78bad3SBarry Smith */ 187ce78bad3SBarry Smith color = shm_rank / nthreads; 188ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm)); 189ce78bad3SBarry Smith 190ce78bad3SBarry Smith /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 191ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank)); 192ce78bad3SBarry Smith if (!omp_rank) { 193ce78bad3SBarry Smith ctrl->is_omp_master = PETSC_TRUE; /* master */ 194ce78bad3SBarry Smith color = 0; 195ce78bad3SBarry Smith } else { 196ce78bad3SBarry Smith ctrl->is_omp_master = PETSC_FALSE; /* slave */ 197ce78bad3SBarry Smith color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 198ce78bad3SBarry Smith } 199ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm)); 200ce78bad3SBarry Smith 201ce78bad3SBarry Smith /*================================================================================= 202ce78bad3SBarry Smith Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 203ce78bad3SBarry Smith slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 204ce78bad3SBarry Smith and run them on the idle CPUs. 205ce78bad3SBarry Smith ==================================================================================*/ 206ce78bad3SBarry Smith PetscCall(PetscOmpCtrlCreateBarrier(ctrl)); 207ce78bad3SBarry Smith 208ce78bad3SBarry Smith /*================================================================================= 209ce78bad3SBarry Smith omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 210ce78bad3SBarry Smith is the union of the bindings of all ranks in the omp_comm 211ce78bad3SBarry Smith =================================================================================*/ 212ce78bad3SBarry Smith 213ce78bad3SBarry Smith ctrl->cpuset = hwloc_bitmap_alloc(); 214ce78bad3SBarry Smith PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 215ce78bad3SBarry Smith PetscCallExternal(hwloc_get_cpubind, ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS); 216ce78bad3SBarry Smith 217ce78bad3SBarry Smith /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 218ce78bad3SBarry Smith nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8; 219ce78bad3SBarry Smith PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs)); 220ce78bad3SBarry Smith if (nr_cpu_ulongs == 1) { 221ce78bad3SBarry Smith cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 222ce78bad3SBarry Smith } else { 223*d83f41f0SPierre Jolivet for (PetscMPIInt i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); 224ce78bad3SBarry Smith } 225ce78bad3SBarry Smith 226ce78bad3SBarry Smith PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm)); 227ce78bad3SBarry Smith 228ce78bad3SBarry Smith if (ctrl->is_omp_master) { 229ce78bad3SBarry Smith ctrl->omp_cpuset = hwloc_bitmap_alloc(); 230ce78bad3SBarry Smith PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 231ce78bad3SBarry Smith if (nr_cpu_ulongs == 1) { 232ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000 233ce78bad3SBarry Smith PetscCallExternal(hwloc_bitmap_from_ulong, ctrl->omp_cpuset, cpu_ulongs[0]); 234ce78bad3SBarry Smith #else 235ce78bad3SBarry Smith hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]); 236ce78bad3SBarry Smith #endif 237ce78bad3SBarry Smith } else { 238*d83f41f0SPierre Jolivet for (PetscMPIInt i = 0; i < nr_cpu_ulongs; i++) { 239ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000 240ce78bad3SBarry Smith PetscCallExternal(hwloc_bitmap_set_ith_ulong, ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 241ce78bad3SBarry Smith #else 242ce78bad3SBarry Smith hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 243ce78bad3SBarry Smith #endif 244ce78bad3SBarry Smith } 245ce78bad3SBarry Smith } 246ce78bad3SBarry Smith } 247ce78bad3SBarry Smith PetscCall(PetscFree(cpu_ulongs)); 248ce78bad3SBarry Smith *pctrl = ctrl; 249ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 250ce78bad3SBarry Smith } 251ce78bad3SBarry Smith 252ce78bad3SBarry Smith /*@C 253ce78bad3SBarry Smith PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 254ce78bad3SBarry Smith 255ce78bad3SBarry Smith Input Parameter: 256ce78bad3SBarry Smith . pctrl - a PETSc OpenMP controller 257ce78bad3SBarry Smith 258ce78bad3SBarry Smith Level: developer 259ce78bad3SBarry Smith 260ce78bad3SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 261ce78bad3SBarry Smith @*/ 262ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 263ce78bad3SBarry Smith { 264ce78bad3SBarry Smith PetscOmpCtrl ctrl = *pctrl; 265ce78bad3SBarry Smith 266ce78bad3SBarry Smith PetscFunctionBegin; 267ce78bad3SBarry Smith hwloc_bitmap_free(ctrl->cpuset); 268ce78bad3SBarry Smith hwloc_topology_destroy(ctrl->topology); 269ce78bad3SBarry Smith PetscCall(PetscOmpCtrlDestroyBarrier(ctrl)); 270ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm)); 271ce78bad3SBarry Smith if (ctrl->is_omp_master) { 272ce78bad3SBarry Smith hwloc_bitmap_free(ctrl->omp_cpuset); 273ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm)); 274ce78bad3SBarry Smith } 275ce78bad3SBarry Smith PetscCall(PetscFree(ctrl)); 276ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 277ce78bad3SBarry Smith } 278ce78bad3SBarry Smith 279ce78bad3SBarry Smith /*@C 280ce78bad3SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 281ce78bad3SBarry Smith 282ce78bad3SBarry Smith Input Parameter: 283ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 284ce78bad3SBarry Smith 285ce78bad3SBarry Smith Output Parameters: 286ce78bad3SBarry Smith + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 287ce78bad3SBarry Smith . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 288ce78bad3SBarry Smith on slave ranks, `MPI_COMM_NULL` will be return in reality. 289ce78bad3SBarry Smith - is_omp_master - true if the calling process is an OMP master rank. 290ce78bad3SBarry Smith 291ce78bad3SBarry Smith Note: 292ce78bad3SBarry Smith Any output parameter can be `NULL`. The parameter is just ignored. 293ce78bad3SBarry Smith 294ce78bad3SBarry Smith Level: developer 295ce78bad3SBarry Smith 296ce78bad3SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 297ce78bad3SBarry Smith @*/ 298ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) 299ce78bad3SBarry Smith { 300ce78bad3SBarry Smith PetscFunctionBegin; 301ce78bad3SBarry Smith if (omp_comm) *omp_comm = ctrl->omp_comm; 302ce78bad3SBarry Smith if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 303ce78bad3SBarry Smith if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 304ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 305ce78bad3SBarry Smith } 306ce78bad3SBarry Smith 307ce78bad3SBarry Smith /*@C 308ce78bad3SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 309ce78bad3SBarry Smith 310ce78bad3SBarry Smith Input Parameter: 311ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 312ce78bad3SBarry Smith 313ce78bad3SBarry Smith Notes: 314ce78bad3SBarry Smith This is a pthread barrier on MPI ranks. Using `MPI_Barrier()` instead is conceptually correct. But MPI standard does not 315ce78bad3SBarry Smith require processes blocked by `MPI_Barrier()` free their CPUs to let other processes progress. In practice, to minilize latency, 316ce78bad3SBarry Smith MPI ranks stuck in `MPI_Barrier()` keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 317ce78bad3SBarry Smith 318ce78bad3SBarry Smith A code using `PetscOmpCtrlBarrier()` would be like this, 319ce78bad3SBarry Smith .vb 320ce78bad3SBarry Smith if (is_omp_master) { 321ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 322ce78bad3SBarry Smith Call the library using OpenMP 323ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 324ce78bad3SBarry Smith } 325ce78bad3SBarry Smith PetscOmpCtrlBarrier(ctrl); 326ce78bad3SBarry Smith .ve 327ce78bad3SBarry Smith 328ce78bad3SBarry Smith Level: developer 329ce78bad3SBarry Smith 330ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, 331ce78bad3SBarry Smith @*/ 332ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 333ce78bad3SBarry Smith { 334ce78bad3SBarry Smith int err; 335ce78bad3SBarry Smith 336ce78bad3SBarry Smith PetscFunctionBegin; 337ce78bad3SBarry Smith err = pthread_barrier_wait(ctrl->barrier); 338ce78bad3SBarry Smith PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err); 339ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 340ce78bad3SBarry Smith } 341ce78bad3SBarry Smith 342ce78bad3SBarry Smith /*@C 343ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 344ce78bad3SBarry Smith 345ce78bad3SBarry Smith Input Parameter: 346ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 347ce78bad3SBarry Smith 348ce78bad3SBarry Smith Note: 349ce78bad3SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 350ce78bad3SBarry Smith This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 351ce78bad3SBarry Smith 352ce78bad3SBarry Smith Level: developer 353ce78bad3SBarry Smith 354ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 355ce78bad3SBarry Smith @*/ 356ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 357ce78bad3SBarry Smith { 358ce78bad3SBarry Smith PetscFunctionBegin; 359ce78bad3SBarry Smith PetscCallExternal(hwloc_set_cpubind, ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS); 360ce78bad3SBarry Smith omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 361ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 362ce78bad3SBarry Smith } 363ce78bad3SBarry Smith 364ce78bad3SBarry Smith /*@C 365ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 366ce78bad3SBarry Smith 367ce78bad3SBarry Smith Input Parameter: 368ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 369ce78bad3SBarry Smith 370ce78bad3SBarry Smith Note: 371ce78bad3SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 372ce78bad3SBarry Smith This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 373ce78bad3SBarry Smith 374ce78bad3SBarry Smith Level: developer 375ce78bad3SBarry Smith 376ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 377ce78bad3SBarry Smith @*/ 378ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 379ce78bad3SBarry Smith { 380ce78bad3SBarry Smith PetscFunctionBegin; 381ce78bad3SBarry Smith PetscCallExternal(hwloc_set_cpubind, ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS); 382ce78bad3SBarry Smith omp_set_num_threads(1); 383ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 384ce78bad3SBarry Smith } 385ce78bad3SBarry Smith 386ce78bad3SBarry Smith #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 387