1*ce78bad3SBarry Smith #include <petscsys.h> /*I "petscsys.h" I*/ 2*ce78bad3SBarry Smith #include <petsc/private/petscimpl.h> 3*ce78bad3SBarry Smith #include <pthread.h> 4*ce78bad3SBarry Smith #include <hwloc.h> 5*ce78bad3SBarry Smith #include <omp.h> 6*ce78bad3SBarry Smith 7*ce78bad3SBarry Smith /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 8*ce78bad3SBarry Smith otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 9*ce78bad3SBarry Smith simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 10*ce78bad3SBarry Smith by 50%. Until the reason is found out, we use mmap() instead. 11*ce78bad3SBarry Smith */ 12*ce78bad3SBarry Smith #define USE_MMAP_ALLOCATE_SHARED_MEMORY 13*ce78bad3SBarry Smith 14*ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 15*ce78bad3SBarry Smith #include <sys/mman.h> 16*ce78bad3SBarry Smith #include <sys/types.h> 17*ce78bad3SBarry Smith #include <sys/stat.h> 18*ce78bad3SBarry Smith #include <fcntl.h> 19*ce78bad3SBarry Smith #endif 20*ce78bad3SBarry Smith 21*ce78bad3SBarry Smith struct _n_PetscOmpCtrl { 22*ce78bad3SBarry Smith MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 23*ce78bad3SBarry Smith MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 24*ce78bad3SBarry Smith PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 25*ce78bad3SBarry Smith PetscBool is_omp_master; /* rank 0's in omp_comm */ 26*ce78bad3SBarry Smith MPI_Win omp_win; /* a shared memory window containing a barrier */ 27*ce78bad3SBarry Smith pthread_barrier_t *barrier; /* pointer to the barrier */ 28*ce78bad3SBarry Smith hwloc_topology_t topology; 29*ce78bad3SBarry Smith hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 30*ce78bad3SBarry Smith hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 31*ce78bad3SBarry Smith }; 32*ce78bad3SBarry Smith 33*ce78bad3SBarry Smith /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 34*ce78bad3SBarry Smith contained by the controller. 35*ce78bad3SBarry Smith 36*ce78bad3SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 37*ce78bad3SBarry Smith only because we want to separate shared memory allocation methods from other code. 38*ce78bad3SBarry Smith */ 39*ce78bad3SBarry Smith static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 40*ce78bad3SBarry Smith { 41*ce78bad3SBarry Smith MPI_Aint size; 42*ce78bad3SBarry Smith void *baseptr; 43*ce78bad3SBarry Smith pthread_barrierattr_t attr; 44*ce78bad3SBarry Smith 45*ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 46*ce78bad3SBarry Smith int fd; 47*ce78bad3SBarry Smith char pathname[PETSC_MAX_PATH_LEN]; 48*ce78bad3SBarry Smith #else 49*ce78bad3SBarry Smith PetscMPIInt disp_unit; 50*ce78bad3SBarry Smith #endif 51*ce78bad3SBarry Smith 52*ce78bad3SBarry Smith PetscFunctionBegin; 53*ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 54*ce78bad3SBarry Smith size = sizeof(pthread_barrier_t); 55*ce78bad3SBarry Smith if (ctrl->is_omp_master) { 56*ce78bad3SBarry Smith /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 57*ce78bad3SBarry Smith PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN)); 58*ce78bad3SBarry Smith PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN)); 59*ce78bad3SBarry Smith /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 60*ce78bad3SBarry Smith fd = mkstemp(pathname); 61*ce78bad3SBarry Smith PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname); 62*ce78bad3SBarry Smith PetscCallExternal(ftruncate, fd, size); 63*ce78bad3SBarry Smith baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 64*ce78bad3SBarry Smith PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 65*ce78bad3SBarry Smith PetscCallExternal(close, fd); 66*ce78bad3SBarry Smith PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 67*ce78bad3SBarry Smith /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 68*ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 69*ce78bad3SBarry Smith PetscCallExternal(unlink, pathname); 70*ce78bad3SBarry Smith } else { 71*ce78bad3SBarry Smith PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 72*ce78bad3SBarry Smith fd = open(pathname, O_RDWR); 73*ce78bad3SBarry Smith PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname); 74*ce78bad3SBarry Smith baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 75*ce78bad3SBarry Smith PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 76*ce78bad3SBarry Smith PetscCallExternal(close, fd); 77*ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 78*ce78bad3SBarry Smith } 79*ce78bad3SBarry Smith #else 80*ce78bad3SBarry Smith size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 81*ce78bad3SBarry Smith PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win)); 82*ce78bad3SBarry Smith PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr)); 83*ce78bad3SBarry Smith #endif 84*ce78bad3SBarry Smith ctrl->barrier = (pthread_barrier_t *)baseptr; 85*ce78bad3SBarry Smith 86*ce78bad3SBarry Smith /* omp master initializes the barrier */ 87*ce78bad3SBarry Smith if (ctrl->is_omp_master) { 88*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size)); 89*ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_init, &attr); 90*ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_setpshared, &attr, PTHREAD_PROCESS_SHARED); /* make the barrier also work for processes */ 91*ce78bad3SBarry Smith PetscCallExternal(pthread_barrier_init, ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size); 92*ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_destroy, &attr); 93*ce78bad3SBarry Smith } 94*ce78bad3SBarry Smith 95*ce78bad3SBarry Smith /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 96*ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 97*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 98*ce78bad3SBarry Smith } 99*ce78bad3SBarry Smith 100*ce78bad3SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 101*ce78bad3SBarry Smith static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 102*ce78bad3SBarry Smith { 103*ce78bad3SBarry Smith PetscFunctionBegin; 104*ce78bad3SBarry Smith /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 105*ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 106*ce78bad3SBarry Smith if (ctrl->is_omp_master) PetscCallExternal(pthread_barrier_destroy, ctrl->barrier); 107*ce78bad3SBarry Smith 108*ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 109*ce78bad3SBarry Smith PetscCallExternal(munmap, ctrl->barrier, sizeof(pthread_barrier_t)); 110*ce78bad3SBarry Smith #else 111*ce78bad3SBarry Smith PetscCallMPI(MPI_Win_free(&ctrl->omp_win)); 112*ce78bad3SBarry Smith #endif 113*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 114*ce78bad3SBarry Smith } 115*ce78bad3SBarry Smith 116*ce78bad3SBarry Smith /*@C 117*ce78bad3SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries that use OpenMP 118*ce78bad3SBarry Smith 119*ce78bad3SBarry Smith Input Parameters: 120*ce78bad3SBarry Smith + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 121*ce78bad3SBarry Smith - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 122*ce78bad3SBarry Smith 123*ce78bad3SBarry Smith Output Parameter: 124*ce78bad3SBarry Smith . pctrl - a PETSc OpenMP controller 125*ce78bad3SBarry Smith 126*ce78bad3SBarry Smith Level: developer 127*ce78bad3SBarry Smith 128*ce78bad3SBarry Smith Developer Note: 129*ce78bad3SBarry Smith Possibly use the variable `PetscNumOMPThreads` to determine the number for threads to use 130*ce78bad3SBarry Smith 131*ce78bad3SBarry Smith .seealso: `PetscOmpCtrlDestroy()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 132*ce78bad3SBarry Smith @*/ 133*ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) 134*ce78bad3SBarry Smith { 135*ce78bad3SBarry Smith PetscOmpCtrl ctrl; 136*ce78bad3SBarry Smith unsigned long *cpu_ulongs = NULL; 137*ce78bad3SBarry Smith PetscInt i, nr_cpu_ulongs; 138*ce78bad3SBarry Smith PetscShmComm pshmcomm; 139*ce78bad3SBarry Smith MPI_Comm shm_comm; 140*ce78bad3SBarry Smith PetscMPIInt shm_rank, shm_comm_size, omp_rank, color; 141*ce78bad3SBarry Smith PetscInt num_packages, num_cores; 142*ce78bad3SBarry Smith 143*ce78bad3SBarry Smith PetscFunctionBegin; 144*ce78bad3SBarry Smith PetscCall(PetscNew(&ctrl)); 145*ce78bad3SBarry Smith 146*ce78bad3SBarry Smith /*================================================================================= 147*ce78bad3SBarry Smith Init hwloc 148*ce78bad3SBarry Smith ==================================================================================*/ 149*ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_init, &ctrl->topology); 150*ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000 151*ce78bad3SBarry Smith /* to filter out unneeded info and have faster hwloc_topology_load */ 152*ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_set_all_types_filter, ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE); 153*ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_set_type_filter, ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL); 154*ce78bad3SBarry Smith #endif 155*ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_load, ctrl->topology); 156*ce78bad3SBarry Smith 157*ce78bad3SBarry Smith /*================================================================================= 158*ce78bad3SBarry Smith Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 159*ce78bad3SBarry Smith physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 160*ce78bad3SBarry Smith others are called slaves. OMP Masters make up a new comm called omp_master_comm, 161*ce78bad3SBarry Smith which is usually passed to third party libraries. 162*ce78bad3SBarry Smith ==================================================================================*/ 163*ce78bad3SBarry Smith 164*ce78bad3SBarry Smith /* fetch the stored shared memory communicator */ 165*ce78bad3SBarry Smith PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm)); 166*ce78bad3SBarry Smith PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm)); 167*ce78bad3SBarry Smith 168*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank)); 169*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size)); 170*ce78bad3SBarry Smith 171*ce78bad3SBarry Smith /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 172*ce78bad3SBarry Smith if (nthreads == -1) { 173*ce78bad3SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE); 174*ce78bad3SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE); 175*ce78bad3SBarry Smith nthreads = num_cores / num_packages; 176*ce78bad3SBarry Smith if (nthreads > shm_comm_size) nthreads = shm_comm_size; 177*ce78bad3SBarry Smith } 178*ce78bad3SBarry Smith 179*ce78bad3SBarry Smith PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size); 180*ce78bad3SBarry Smith if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size)); 181*ce78bad3SBarry Smith 182*ce78bad3SBarry Smith /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 183*ce78bad3SBarry Smith shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 184*ce78bad3SBarry Smith color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 185*ce78bad3SBarry Smith be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 186*ce78bad3SBarry Smith Use 0 as key so that rank ordering wont change in new comm. 187*ce78bad3SBarry Smith */ 188*ce78bad3SBarry Smith color = shm_rank / nthreads; 189*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm)); 190*ce78bad3SBarry Smith 191*ce78bad3SBarry Smith /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 192*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank)); 193*ce78bad3SBarry Smith if (!omp_rank) { 194*ce78bad3SBarry Smith ctrl->is_omp_master = PETSC_TRUE; /* master */ 195*ce78bad3SBarry Smith color = 0; 196*ce78bad3SBarry Smith } else { 197*ce78bad3SBarry Smith ctrl->is_omp_master = PETSC_FALSE; /* slave */ 198*ce78bad3SBarry Smith color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 199*ce78bad3SBarry Smith } 200*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm)); 201*ce78bad3SBarry Smith 202*ce78bad3SBarry Smith /*================================================================================= 203*ce78bad3SBarry Smith Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 204*ce78bad3SBarry Smith slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 205*ce78bad3SBarry Smith and run them on the idle CPUs. 206*ce78bad3SBarry Smith ==================================================================================*/ 207*ce78bad3SBarry Smith PetscCall(PetscOmpCtrlCreateBarrier(ctrl)); 208*ce78bad3SBarry Smith 209*ce78bad3SBarry Smith /*================================================================================= 210*ce78bad3SBarry Smith omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 211*ce78bad3SBarry Smith is the union of the bindings of all ranks in the omp_comm 212*ce78bad3SBarry Smith =================================================================================*/ 213*ce78bad3SBarry Smith 214*ce78bad3SBarry Smith ctrl->cpuset = hwloc_bitmap_alloc(); 215*ce78bad3SBarry Smith PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 216*ce78bad3SBarry Smith PetscCallExternal(hwloc_get_cpubind, ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS); 217*ce78bad3SBarry Smith 218*ce78bad3SBarry Smith /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 219*ce78bad3SBarry Smith nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8; 220*ce78bad3SBarry Smith PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs)); 221*ce78bad3SBarry Smith if (nr_cpu_ulongs == 1) { 222*ce78bad3SBarry Smith cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 223*ce78bad3SBarry Smith } else { 224*ce78bad3SBarry Smith for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); 225*ce78bad3SBarry Smith } 226*ce78bad3SBarry Smith 227*ce78bad3SBarry Smith PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm)); 228*ce78bad3SBarry Smith 229*ce78bad3SBarry Smith if (ctrl->is_omp_master) { 230*ce78bad3SBarry Smith ctrl->omp_cpuset = hwloc_bitmap_alloc(); 231*ce78bad3SBarry Smith PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 232*ce78bad3SBarry Smith if (nr_cpu_ulongs == 1) { 233*ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000 234*ce78bad3SBarry Smith PetscCallExternal(hwloc_bitmap_from_ulong, ctrl->omp_cpuset, cpu_ulongs[0]); 235*ce78bad3SBarry Smith #else 236*ce78bad3SBarry Smith hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]); 237*ce78bad3SBarry Smith #endif 238*ce78bad3SBarry Smith } else { 239*ce78bad3SBarry Smith for (i = 0; i < nr_cpu_ulongs; i++) { 240*ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000 241*ce78bad3SBarry Smith PetscCallExternal(hwloc_bitmap_set_ith_ulong, ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 242*ce78bad3SBarry Smith #else 243*ce78bad3SBarry Smith hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 244*ce78bad3SBarry Smith #endif 245*ce78bad3SBarry Smith } 246*ce78bad3SBarry Smith } 247*ce78bad3SBarry Smith } 248*ce78bad3SBarry Smith PetscCall(PetscFree(cpu_ulongs)); 249*ce78bad3SBarry Smith *pctrl = ctrl; 250*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 251*ce78bad3SBarry Smith } 252*ce78bad3SBarry Smith 253*ce78bad3SBarry Smith /*@C 254*ce78bad3SBarry Smith PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 255*ce78bad3SBarry Smith 256*ce78bad3SBarry Smith Input Parameter: 257*ce78bad3SBarry Smith . pctrl - a PETSc OpenMP controller 258*ce78bad3SBarry Smith 259*ce78bad3SBarry Smith Level: developer 260*ce78bad3SBarry Smith 261*ce78bad3SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 262*ce78bad3SBarry Smith @*/ 263*ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 264*ce78bad3SBarry Smith { 265*ce78bad3SBarry Smith PetscOmpCtrl ctrl = *pctrl; 266*ce78bad3SBarry Smith 267*ce78bad3SBarry Smith PetscFunctionBegin; 268*ce78bad3SBarry Smith hwloc_bitmap_free(ctrl->cpuset); 269*ce78bad3SBarry Smith hwloc_topology_destroy(ctrl->topology); 270*ce78bad3SBarry Smith PetscCall(PetscOmpCtrlDestroyBarrier(ctrl)); 271*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm)); 272*ce78bad3SBarry Smith if (ctrl->is_omp_master) { 273*ce78bad3SBarry Smith hwloc_bitmap_free(ctrl->omp_cpuset); 274*ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm)); 275*ce78bad3SBarry Smith } 276*ce78bad3SBarry Smith PetscCall(PetscFree(ctrl)); 277*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 278*ce78bad3SBarry Smith } 279*ce78bad3SBarry Smith 280*ce78bad3SBarry Smith /*@C 281*ce78bad3SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 282*ce78bad3SBarry Smith 283*ce78bad3SBarry Smith Input Parameter: 284*ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 285*ce78bad3SBarry Smith 286*ce78bad3SBarry Smith Output Parameters: 287*ce78bad3SBarry Smith + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 288*ce78bad3SBarry Smith . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 289*ce78bad3SBarry Smith on slave ranks, `MPI_COMM_NULL` will be return in reality. 290*ce78bad3SBarry Smith - is_omp_master - true if the calling process is an OMP master rank. 291*ce78bad3SBarry Smith 292*ce78bad3SBarry Smith Note: 293*ce78bad3SBarry Smith Any output parameter can be `NULL`. The parameter is just ignored. 294*ce78bad3SBarry Smith 295*ce78bad3SBarry Smith Level: developer 296*ce78bad3SBarry Smith 297*ce78bad3SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 298*ce78bad3SBarry Smith @*/ 299*ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) 300*ce78bad3SBarry Smith { 301*ce78bad3SBarry Smith PetscFunctionBegin; 302*ce78bad3SBarry Smith if (omp_comm) *omp_comm = ctrl->omp_comm; 303*ce78bad3SBarry Smith if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 304*ce78bad3SBarry Smith if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 305*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 306*ce78bad3SBarry Smith } 307*ce78bad3SBarry Smith 308*ce78bad3SBarry Smith /*@C 309*ce78bad3SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 310*ce78bad3SBarry Smith 311*ce78bad3SBarry Smith Input Parameter: 312*ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 313*ce78bad3SBarry Smith 314*ce78bad3SBarry Smith Notes: 315*ce78bad3SBarry Smith This is a pthread barrier on MPI ranks. Using `MPI_Barrier()` instead is conceptually correct. But MPI standard does not 316*ce78bad3SBarry Smith require processes blocked by `MPI_Barrier()` free their CPUs to let other processes progress. In practice, to minilize latency, 317*ce78bad3SBarry Smith MPI ranks stuck in `MPI_Barrier()` keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 318*ce78bad3SBarry Smith 319*ce78bad3SBarry Smith A code using `PetscOmpCtrlBarrier()` would be like this, 320*ce78bad3SBarry Smith .vb 321*ce78bad3SBarry Smith if (is_omp_master) { 322*ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 323*ce78bad3SBarry Smith Call the library using OpenMP 324*ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 325*ce78bad3SBarry Smith } 326*ce78bad3SBarry Smith PetscOmpCtrlBarrier(ctrl); 327*ce78bad3SBarry Smith .ve 328*ce78bad3SBarry Smith 329*ce78bad3SBarry Smith Level: developer 330*ce78bad3SBarry Smith 331*ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, 332*ce78bad3SBarry Smith @*/ 333*ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 334*ce78bad3SBarry Smith { 335*ce78bad3SBarry Smith int err; 336*ce78bad3SBarry Smith 337*ce78bad3SBarry Smith PetscFunctionBegin; 338*ce78bad3SBarry Smith err = pthread_barrier_wait(ctrl->barrier); 339*ce78bad3SBarry Smith PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err); 340*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 341*ce78bad3SBarry Smith } 342*ce78bad3SBarry Smith 343*ce78bad3SBarry Smith /*@C 344*ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 345*ce78bad3SBarry Smith 346*ce78bad3SBarry Smith Input Parameter: 347*ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 348*ce78bad3SBarry Smith 349*ce78bad3SBarry Smith Note: 350*ce78bad3SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 351*ce78bad3SBarry Smith This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 352*ce78bad3SBarry Smith 353*ce78bad3SBarry Smith Level: developer 354*ce78bad3SBarry Smith 355*ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 356*ce78bad3SBarry Smith @*/ 357*ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 358*ce78bad3SBarry Smith { 359*ce78bad3SBarry Smith PetscFunctionBegin; 360*ce78bad3SBarry Smith PetscCallExternal(hwloc_set_cpubind, ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS); 361*ce78bad3SBarry Smith omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 362*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 363*ce78bad3SBarry Smith } 364*ce78bad3SBarry Smith 365*ce78bad3SBarry Smith /*@C 366*ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 367*ce78bad3SBarry Smith 368*ce78bad3SBarry Smith Input Parameter: 369*ce78bad3SBarry Smith . ctrl - a PETSc OMP controller 370*ce78bad3SBarry Smith 371*ce78bad3SBarry Smith Note: 372*ce78bad3SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 373*ce78bad3SBarry Smith This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 374*ce78bad3SBarry Smith 375*ce78bad3SBarry Smith Level: developer 376*ce78bad3SBarry Smith 377*ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 378*ce78bad3SBarry Smith @*/ 379*ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 380*ce78bad3SBarry Smith { 381*ce78bad3SBarry Smith PetscFunctionBegin; 382*ce78bad3SBarry Smith PetscCallExternal(hwloc_set_cpubind, ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS); 383*ce78bad3SBarry Smith omp_set_num_threads(1); 384*ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 385*ce78bad3SBarry Smith } 386*ce78bad3SBarry Smith 387*ce78bad3SBarry Smith #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 388