15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h> 35f7487a0SJunchao Zhang 45f7487a0SJunchao Zhang struct _n_PetscShmComm { 55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */ 75f7487a0SJunchao Zhang MPI_Comm globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 85f7487a0SJunchao Zhang }; 95f7487a0SJunchao Zhang 105f7487a0SJunchao Zhang /* 1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed. 125f7487a0SJunchao Zhang 135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 145f7487a0SJunchao Zhang 155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 165f7487a0SJunchao Zhang 175f7487a0SJunchao Zhang */ 189371c9d4SSatish Balay PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state) { 195f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val; 205f7487a0SJunchao Zhang 215f7487a0SJunchao Zhang PetscFunctionBegin; 229566063dSJacob Faibussowitsch PetscCallMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm)); 239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&p->shmcomm)); 249566063dSJacob Faibussowitsch PetscCallMPI(PetscFree(p->globranks)); 259566063dSJacob Faibussowitsch PetscCallMPI(PetscFree(val)); 265f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS); 275f7487a0SJunchao Zhang } 285f7487a0SJunchao Zhang 29b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 30b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet(). 31b48189acSJunchao Zhang Since we predict communicators passed to PetscShmCommGet() are very likely 32b48189acSJunchao Zhang either a petsc inner communicator or an MPI communicator with a linked petsc 33b48189acSJunchao Zhang inner communicator, we use a simple static array to store dupped communicators 34b48189acSJunchao Zhang on rare cases otherwise. 35b48189acSJunchao Zhang */ 36b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16 37b48189acSJunchao Zhang static PetscInt num_dupped_comms = 0; 38b48189acSJunchao Zhang static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS]; 399371c9d4SSatish Balay static PetscErrorCode PetscShmCommDestroyDuppedComms(void) { 40b48189acSJunchao Zhang PetscInt i; 41b48189acSJunchao Zhang PetscFunctionBegin; 429566063dSJacob Faibussowitsch for (i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i])); 43b48189acSJunchao Zhang num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */ 44b48189acSJunchao Zhang PetscFunctionReturn(0); 45b48189acSJunchao Zhang } 46b48189acSJunchao Zhang #endif 47b48189acSJunchao Zhang 485f7487a0SJunchao Zhang /*@C 49*811af0c4SBarry Smith PetscShmCommGet - Returns a sub-communicator of all ranks that share a common memory 505f7487a0SJunchao Zhang 51d083f849SBarry Smith Collective. 525f7487a0SJunchao Zhang 535f7487a0SJunchao Zhang Input Parameter: 54*811af0c4SBarry Smith . globcomm - `MPI_Comm`, which can be a user MPI_Comm or a PETSc inner MPI_Comm 555f7487a0SJunchao Zhang 565f7487a0SJunchao Zhang Output Parameter: 575f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object 585f7487a0SJunchao Zhang 595f7487a0SJunchao Zhang Level: developer 605f7487a0SJunchao Zhang 61*811af0c4SBarry Smith Note: 625f7487a0SJunchao Zhang When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 635f7487a0SJunchao Zhang 64*811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()` 655f7487a0SJunchao Zhang @*/ 669371c9d4SSatish Balay PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm) { 675f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 685f7487a0SJunchao Zhang MPI_Group globgroup, shmgroup; 695f7487a0SJunchao Zhang PetscMPIInt *shmranks, i, flg; 705f7487a0SJunchao Zhang PetscCommCounter *counter; 715f7487a0SJunchao Zhang 725f7487a0SJunchao Zhang PetscFunctionBegin; 733ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 2); 74b48189acSJunchao Zhang /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */ 759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg)); 76b48189acSJunchao Zhang if (!flg) { /* globcomm is not a petsc comm */ 779371c9d4SSatish Balay union 789371c9d4SSatish Balay { 799371c9d4SSatish Balay MPI_Comm comm; 809371c9d4SSatish Balay void *ptr; 819371c9d4SSatish Balay } ucomm; 82b48189acSJunchao Zhang /* check if globcomm already has a linked petsc inner comm */ 839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg)); 84b48189acSJunchao Zhang if (!flg) { 85b48189acSJunchao Zhang /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */ 8608401ef6SPierre Jolivet PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS); 879566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL)); 88b48189acSJunchao Zhang /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */ 899566063dSJacob Faibussowitsch if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms)); 90b48189acSJunchao Zhang shmcomm_dupped_comms[num_dupped_comms] = globcomm; 91b48189acSJunchao Zhang num_dupped_comms++; 92b48189acSJunchao Zhang } else { 93b48189acSJunchao Zhang /* otherwise, we pull out the inner comm and use it as globcomm */ 94b48189acSJunchao Zhang globcomm = ucomm.comm; 95b48189acSJunchao Zhang } 96b48189acSJunchao Zhang } 975f7487a0SJunchao Zhang 98b48189acSJunchao Zhang /* Check if globcomm already has an attached pshmcomm. If no, create one */ 999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg)); 1005f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1015f7487a0SJunchao Zhang 1029566063dSJacob Faibussowitsch PetscCall(PetscNew(pshmcomm)); 1035f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm; 1045f7487a0SJunchao Zhang 1059566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm)); 1065f7487a0SJunchao Zhang 1079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize)); 1089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(globcomm, &globgroup)); 1099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup)); 1109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks)); 1119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks)); 1125f7487a0SJunchao Zhang for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i; 1139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks)); 1149566063dSJacob Faibussowitsch PetscCall(PetscFree(shmranks)); 1159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&globgroup)); 1169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&shmgroup)); 1175f7487a0SJunchao Zhang 11848a46eb9SPierre Jolivet for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i])); 1199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm)); 1205f7487a0SJunchao Zhang PetscFunctionReturn(0); 1215f7487a0SJunchao Zhang #else 1225f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 1235f7487a0SJunchao Zhang #endif 1245f7487a0SJunchao Zhang } 1255f7487a0SJunchao Zhang 1265f7487a0SJunchao Zhang /*@C 1275f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 1285f7487a0SJunchao Zhang 1295f7487a0SJunchao Zhang Input Parameters: 1305f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1315f7487a0SJunchao Zhang - grank - the global rank 1325f7487a0SJunchao Zhang 1335f7487a0SJunchao Zhang Output Parameter: 134*811af0c4SBarry Smith . lrank - the local rank, or `MPI_PROC_NULL` if it does not exist 1355f7487a0SJunchao Zhang 1365f7487a0SJunchao Zhang Level: developer 1375f7487a0SJunchao Zhang 1385f7487a0SJunchao Zhang Developer Notes: 1395f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted 1405f7487a0SJunchao Zhang 1415f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call 1425f7487a0SJunchao Zhang 143*811af0c4SBarry Smith .seealso: `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()` 1445f7487a0SJunchao Zhang @*/ 1459371c9d4SSatish Balay PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank) { 1465f7487a0SJunchao Zhang PetscMPIInt low, high, t, i; 1475f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE; 1485f7487a0SJunchao Zhang 1495f7487a0SJunchao Zhang PetscFunctionBegin; 1503ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 151dadcf809SJacob Faibussowitsch PetscValidIntPointer(lrank, 3); 1525f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL; 1535f7487a0SJunchao Zhang if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 1545f7487a0SJunchao Zhang if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(0); 1559566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL)); 1565f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1575f7487a0SJunchao Zhang low = 0; 1585f7487a0SJunchao Zhang high = pshmcomm->shmsize; 1595f7487a0SJunchao Zhang while (high - low > 5) { 1605f7487a0SJunchao Zhang t = (low + high) / 2; 1615f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t; 1625f7487a0SJunchao Zhang else low = t; 1635f7487a0SJunchao Zhang } 1645f7487a0SJunchao Zhang for (i = low; i < high; i++) { 1655f7487a0SJunchao Zhang if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 1665f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) { 1675f7487a0SJunchao Zhang *lrank = i; 1685f7487a0SJunchao Zhang PetscFunctionReturn(0); 1695f7487a0SJunchao Zhang } 1705f7487a0SJunchao Zhang } 1715f7487a0SJunchao Zhang PetscFunctionReturn(0); 1725f7487a0SJunchao Zhang } 1735f7487a0SJunchao Zhang 1745f7487a0SJunchao Zhang /*@C 1755f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 1765f7487a0SJunchao Zhang 1775f7487a0SJunchao Zhang Input Parameters: 1785f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1795f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator 1805f7487a0SJunchao Zhang 1815f7487a0SJunchao Zhang Output Parameter: 1825f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built 1835f7487a0SJunchao Zhang 1845f7487a0SJunchao Zhang Level: developer 1855f7487a0SJunchao Zhang 186*811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommGetMpiShmComm()` 1875f7487a0SJunchao Zhang @*/ 1889371c9d4SSatish Balay PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank) { 1895f7487a0SJunchao Zhang PetscFunctionBegin; 1903ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 191dadcf809SJacob Faibussowitsch PetscValidIntPointer(grank, 3); 1922c71b3e2SJacob Faibussowitsch PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank); 1935f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank]; 1945f7487a0SJunchao Zhang PetscFunctionReturn(0); 1955f7487a0SJunchao Zhang } 1965f7487a0SJunchao Zhang 1975f7487a0SJunchao Zhang /*@C 1985f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 1995f7487a0SJunchao Zhang 2005f7487a0SJunchao Zhang Input Parameter: 2015f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 2025f7487a0SJunchao Zhang 2035f7487a0SJunchao Zhang Output Parameter: 2045f7487a0SJunchao Zhang . comm - the MPI communicator 2055f7487a0SJunchao Zhang 2065f7487a0SJunchao Zhang Level: developer 2075f7487a0SJunchao Zhang 208*811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()` 2095f7487a0SJunchao Zhang @*/ 2109371c9d4SSatish Balay PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm) { 2115f7487a0SJunchao Zhang PetscFunctionBegin; 2123ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 2133ca90d2dSJacob Faibussowitsch PetscValidPointer(comm, 2); 2145f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm; 2155f7487a0SJunchao Zhang PetscFunctionReturn(0); 2165f7487a0SJunchao Zhang } 2175f7487a0SJunchao Zhang 21820b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 219a32e93adSJunchao Zhang #include <pthread.h> 220a32e93adSJunchao Zhang #include <hwloc.h> 221a32e93adSJunchao Zhang #include <omp.h> 222a32e93adSJunchao Zhang 223eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 224eff715bbSJunchao Zhang otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 2254df5c2c7SJunchao Zhang simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 2264df5c2c7SJunchao Zhang by 50%. Until the reason is found out, we use mmap() instead. 2274df5c2c7SJunchao Zhang */ 2284df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY 2294df5c2c7SJunchao Zhang 2304df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2314df5c2c7SJunchao Zhang #include <sys/mman.h> 2324df5c2c7SJunchao Zhang #include <sys/types.h> 2334df5c2c7SJunchao Zhang #include <sys/stat.h> 2344df5c2c7SJunchao Zhang #include <fcntl.h> 2354df5c2c7SJunchao Zhang #endif 2364df5c2c7SJunchao Zhang 237a32e93adSJunchao Zhang struct _n_PetscOmpCtrl { 238a32e93adSJunchao Zhang MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 239a32e93adSJunchao Zhang MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 240a32e93adSJunchao Zhang PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 241a32e93adSJunchao Zhang PetscBool is_omp_master; /* rank 0's in omp_comm */ 242a32e93adSJunchao Zhang MPI_Win omp_win; /* a shared memory window containing a barrier */ 243a32e93adSJunchao Zhang pthread_barrier_t *barrier; /* pointer to the barrier */ 244a32e93adSJunchao Zhang hwloc_topology_t topology; 245a32e93adSJunchao Zhang hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 246a32e93adSJunchao Zhang hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 247a32e93adSJunchao Zhang }; 248a32e93adSJunchao Zhang 249eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 2508fcaa860SBarry Smith contained by the controller. 251eff715bbSJunchao Zhang 2528fcaa860SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 253eff715bbSJunchao Zhang only because we want to separate shared memory allocation methods from other code. 254eff715bbSJunchao Zhang */ 2559371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) { 256a32e93adSJunchao Zhang MPI_Aint size; 257a32e93adSJunchao Zhang void *baseptr; 258a32e93adSJunchao Zhang pthread_barrierattr_t attr; 259a32e93adSJunchao Zhang 2604df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2614df5c2c7SJunchao Zhang PetscInt fd; 2624df5c2c7SJunchao Zhang PetscChar pathname[PETSC_MAX_PATH_LEN]; 2634df5c2c7SJunchao Zhang #else 2644df5c2c7SJunchao Zhang PetscMPIInt disp_unit; 2654df5c2c7SJunchao Zhang #endif 2664df5c2c7SJunchao Zhang 2674df5c2c7SJunchao Zhang PetscFunctionBegin; 2684df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2694df5c2c7SJunchao Zhang size = sizeof(pthread_barrier_t); 2704df5c2c7SJunchao Zhang if (ctrl->is_omp_master) { 271eff715bbSJunchao Zhang /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 2729566063dSJacob Faibussowitsch PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN)); 2739566063dSJacob Faibussowitsch PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN)); 2744df5c2c7SJunchao Zhang /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 2759371c9d4SSatish Balay fd = mkstemp(pathname); 2769371c9d4SSatish Balay PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname); 2779566063dSJacob Faibussowitsch PetscCall(ftruncate(fd, size)); 2789371c9d4SSatish Balay baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2799371c9d4SSatish Balay PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 2809566063dSJacob Faibussowitsch PetscCall(close(fd)); 2819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 282eff715bbSJunchao Zhang /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 2839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 2849566063dSJacob Faibussowitsch PetscCall(unlink(pathname)); 2854df5c2c7SJunchao Zhang } else { 2869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 2879371c9d4SSatish Balay fd = open(pathname, O_RDWR); 2889371c9d4SSatish Balay PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname); 2899371c9d4SSatish Balay baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2909371c9d4SSatish Balay PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 2919566063dSJacob Faibussowitsch PetscCall(close(fd)); 2929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 2934df5c2c7SJunchao Zhang } 2944df5c2c7SJunchao Zhang #else 295a32e93adSJunchao Zhang size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 2969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win)); 2979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr)); 2984df5c2c7SJunchao Zhang #endif 299a32e93adSJunchao Zhang ctrl->barrier = (pthread_barrier_t *)baseptr; 300a32e93adSJunchao Zhang 301a32e93adSJunchao Zhang /* omp master initializes the barrier */ 302a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 3039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size)); 3049566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_init(&attr)); 3059566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); /* make the barrier also work for processes */ 3069566063dSJacob Faibussowitsch PetscCall(pthread_barrier_init(ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size)); 3079566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_destroy(&attr)); 308a32e93adSJunchao Zhang } 309a32e93adSJunchao Zhang 3104df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 3119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 312a32e93adSJunchao Zhang PetscFunctionReturn(0); 313a32e93adSJunchao Zhang } 314a32e93adSJunchao Zhang 3158fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 3169371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) { 3174df5c2c7SJunchao Zhang PetscFunctionBegin; 3184df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 3199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 3209566063dSJacob Faibussowitsch if (ctrl->is_omp_master) PetscCall(pthread_barrier_destroy(ctrl->barrier)); 3214df5c2c7SJunchao Zhang 3224df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 3239566063dSJacob Faibussowitsch PetscCall(munmap(ctrl->barrier, sizeof(pthread_barrier_t))); 3244df5c2c7SJunchao Zhang #else 3259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_free(&ctrl->omp_win)); 3264df5c2c7SJunchao Zhang #endif 327a32e93adSJunchao Zhang PetscFunctionReturn(0); 328a32e93adSJunchao Zhang } 329a32e93adSJunchao Zhang 330eff715bbSJunchao Zhang /*@C 331*811af0c4SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries that use OpenMP 332eff715bbSJunchao Zhang 333d8d19677SJose E. Roman Input Parameters: 334eff715bbSJunchao Zhang + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 335a2b725a8SWilliam Gropp - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 336eff715bbSJunchao Zhang 337eff715bbSJunchao Zhang Output Parameter: 3388fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 339eff715bbSJunchao Zhang 340eff715bbSJunchao Zhang Level: developer 341eff715bbSJunchao Zhang 342*811af0c4SBarry Smith Developer Note: 343*811af0c4SBarry Smith Possibly use the variable `PetscNumOMPThreads` to determine the number for threads to use 3448fcaa860SBarry Smith 345*811af0c4SBarry Smith .seealso: `PetscOmpCtrlDestroy()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 346eff715bbSJunchao Zhang @*/ 3479371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) { 348a32e93adSJunchao Zhang PetscOmpCtrl ctrl; 349a32e93adSJunchao Zhang unsigned long *cpu_ulongs = NULL; 350a32e93adSJunchao Zhang PetscInt i, nr_cpu_ulongs; 351a32e93adSJunchao Zhang PetscShmComm pshmcomm; 352a32e93adSJunchao Zhang MPI_Comm shm_comm; 353a32e93adSJunchao Zhang PetscMPIInt shm_rank, shm_comm_size, omp_rank, color; 3547c405c4aSJunchao Zhang PetscInt num_packages, num_cores; 355a32e93adSJunchao Zhang 356a32e93adSJunchao Zhang PetscFunctionBegin; 3579566063dSJacob Faibussowitsch PetscCall(PetscNew(&ctrl)); 358a32e93adSJunchao Zhang 359a32e93adSJunchao Zhang /*================================================================================= 3607c405c4aSJunchao Zhang Init hwloc 3617c405c4aSJunchao Zhang ==================================================================================*/ 3629566063dSJacob Faibussowitsch PetscCall(hwloc_topology_init(&ctrl->topology)); 3637c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 3647c405c4aSJunchao Zhang /* to filter out unneeded info and have faster hwloc_topology_load */ 3659566063dSJacob Faibussowitsch PetscCall(hwloc_topology_set_all_types_filter(ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE)); 3669566063dSJacob Faibussowitsch PetscCall(hwloc_topology_set_type_filter(ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL)); 3677c405c4aSJunchao Zhang #endif 3689566063dSJacob Faibussowitsch PetscCall(hwloc_topology_load(ctrl->topology)); 3697c405c4aSJunchao Zhang 3707c405c4aSJunchao Zhang /*================================================================================= 371a32e93adSJunchao Zhang Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 372a32e93adSJunchao Zhang physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 373a32e93adSJunchao Zhang others are called slaves. OMP Masters make up a new comm called omp_master_comm, 374a32e93adSJunchao Zhang which is usually passed to third party libraries. 375a32e93adSJunchao Zhang ==================================================================================*/ 376a32e93adSJunchao Zhang 377a32e93adSJunchao Zhang /* fetch the stored shared memory communicator */ 3789566063dSJacob Faibussowitsch PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm)); 3799566063dSJacob Faibussowitsch PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm)); 380a32e93adSJunchao Zhang 3819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank)); 3829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size)); 383a32e93adSJunchao Zhang 3847c405c4aSJunchao Zhang /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 3857c405c4aSJunchao Zhang if (nthreads == -1) { 386a312e481SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE); 387a312e481SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE); 3887c405c4aSJunchao Zhang nthreads = num_cores / num_packages; 3897c405c4aSJunchao Zhang if (nthreads > shm_comm_size) nthreads = shm_comm_size; 3907c405c4aSJunchao Zhang } 3917c405c4aSJunchao Zhang 3925f80ce2aSJacob Faibussowitsch PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size); 3939566063dSJacob Faibussowitsch if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size)); 394a32e93adSJunchao Zhang 395a32e93adSJunchao Zhang /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 396a32e93adSJunchao Zhang shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 397a32e93adSJunchao Zhang color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 398a32e93adSJunchao Zhang be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 399a32e93adSJunchao Zhang Use 0 as key so that rank ordering wont change in new comm. 400a32e93adSJunchao Zhang */ 401a32e93adSJunchao Zhang color = shm_rank / nthreads; 4029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm)); 403a32e93adSJunchao Zhang 404a32e93adSJunchao Zhang /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 4059566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank)); 406a32e93adSJunchao Zhang if (!omp_rank) { 407a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_TRUE; /* master */ 408a32e93adSJunchao Zhang color = 0; 409a32e93adSJunchao Zhang } else { 410a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_FALSE; /* slave */ 411a32e93adSJunchao Zhang color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 412a32e93adSJunchao Zhang } 4139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm)); 414a32e93adSJunchao Zhang 415a32e93adSJunchao Zhang /*================================================================================= 416a32e93adSJunchao Zhang Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 417a32e93adSJunchao Zhang slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 418a32e93adSJunchao Zhang and run them on the idle CPUs. 419a32e93adSJunchao Zhang ==================================================================================*/ 4209566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlCreateBarrier(ctrl)); 421a32e93adSJunchao Zhang 422a32e93adSJunchao Zhang /*================================================================================= 423a32e93adSJunchao Zhang omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 424a32e93adSJunchao Zhang is the union of the bindings of all ranks in the omp_comm 425a32e93adSJunchao Zhang =================================================================================*/ 426a32e93adSJunchao Zhang 4279371c9d4SSatish Balay ctrl->cpuset = hwloc_bitmap_alloc(); 4289371c9d4SSatish Balay PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 4299566063dSJacob Faibussowitsch PetscCall(hwloc_get_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS)); 430a32e93adSJunchao Zhang 4313ab56b82SJunchao Zhang /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 432a32e93adSJunchao Zhang nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8; 4339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs)); 434a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 435a32e93adSJunchao Zhang cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 436a32e93adSJunchao Zhang } else { 437a32e93adSJunchao Zhang for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); 438a32e93adSJunchao Zhang } 439a32e93adSJunchao Zhang 4409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm)); 441a32e93adSJunchao Zhang 442a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 4439371c9d4SSatish Balay ctrl->omp_cpuset = hwloc_bitmap_alloc(); 4449371c9d4SSatish Balay PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 445a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 4463ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4479566063dSJacob Faibussowitsch PetscCall(hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0])); 4483ab56b82SJunchao Zhang #else 4493ab56b82SJunchao Zhang hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]); 4503ab56b82SJunchao Zhang #endif 451a32e93adSJunchao Zhang } else { 4523ab56b82SJunchao Zhang for (i = 0; i < nr_cpu_ulongs; i++) { 4533ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4549566063dSJacob Faibussowitsch PetscCall(hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i])); 4553ab56b82SJunchao Zhang #else 4563ab56b82SJunchao Zhang hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 4573ab56b82SJunchao Zhang #endif 4583ab56b82SJunchao Zhang } 459a32e93adSJunchao Zhang } 460a32e93adSJunchao Zhang } 4619566063dSJacob Faibussowitsch PetscCall(PetscFree(cpu_ulongs)); 462a32e93adSJunchao Zhang *pctrl = ctrl; 463a32e93adSJunchao Zhang PetscFunctionReturn(0); 464a32e93adSJunchao Zhang } 465a32e93adSJunchao Zhang 466eff715bbSJunchao Zhang /*@C 46764f49babSJed Brown PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 468eff715bbSJunchao Zhang 469eff715bbSJunchao Zhang Input Parameter: 4708fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 471eff715bbSJunchao Zhang 472eff715bbSJunchao Zhang Level: developer 473eff715bbSJunchao Zhang 474*811af0c4SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 475eff715bbSJunchao Zhang @*/ 4769371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) { 477a32e93adSJunchao Zhang PetscOmpCtrl ctrl = *pctrl; 478a32e93adSJunchao Zhang 479a32e93adSJunchao Zhang PetscFunctionBegin; 480a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->cpuset); 481a32e93adSJunchao Zhang hwloc_topology_destroy(ctrl->topology); 482a32e93adSJunchao Zhang PetscOmpCtrlDestroyBarrier(ctrl); 4839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm)); 484a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 485a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->omp_cpuset); 4869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm)); 487a32e93adSJunchao Zhang } 4889566063dSJacob Faibussowitsch PetscCall(PetscFree(ctrl)); 489a32e93adSJunchao Zhang PetscFunctionReturn(0); 490a32e93adSJunchao Zhang } 491a32e93adSJunchao Zhang 492a32e93adSJunchao Zhang /*@C 4938fcaa860SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 494a32e93adSJunchao Zhang 495a32e93adSJunchao Zhang Input Parameter: 4968fcaa860SBarry Smith . ctrl - a PETSc OMP controller 497a32e93adSJunchao Zhang 498d8d19677SJose E. Roman Output Parameters: 499eff715bbSJunchao Zhang + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 500a32e93adSJunchao Zhang . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 501*811af0c4SBarry Smith on slave ranks, `MPI_COMM_NULL` will be return in reality. 502a32e93adSJunchao Zhang - is_omp_master - true if the calling process is an OMP master rank. 503a32e93adSJunchao Zhang 504*811af0c4SBarry Smith Note: 505*811af0c4SBarry Smith Any output parameter can be NULL. The parameter is just ignored. 506eff715bbSJunchao Zhang 507a32e93adSJunchao Zhang Level: developer 508*811af0c4SBarry Smith 509*811af0c4SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 510a32e93adSJunchao Zhang @*/ 5119371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) { 512a32e93adSJunchao Zhang PetscFunctionBegin; 513a32e93adSJunchao Zhang if (omp_comm) *omp_comm = ctrl->omp_comm; 514a32e93adSJunchao Zhang if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 515a32e93adSJunchao Zhang if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 516a32e93adSJunchao Zhang PetscFunctionReturn(0); 517a32e93adSJunchao Zhang } 518a32e93adSJunchao Zhang 519eff715bbSJunchao Zhang /*@C 5208fcaa860SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 521eff715bbSJunchao Zhang 522eff715bbSJunchao Zhang Input Parameter: 5238fcaa860SBarry Smith . ctrl - a PETSc OMP controller 524eff715bbSJunchao Zhang 525eff715bbSJunchao Zhang Notes: 526*811af0c4SBarry Smith this is a pthread barrier on MPI ranks. Using `MPI_Barrier()` instead is conceptually correct. But MPI standard does not 527*811af0c4SBarry Smith require processes blocked by `MPI_Barrier()` free their CPUs to let other processes progress. In practice, to minilize latency, 528*811af0c4SBarry Smith MPI ranks stuck in `MPI_Barrier()` keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 529eff715bbSJunchao Zhang 530*811af0c4SBarry Smith A code using `PetscOmpCtrlBarrier()` would be like this, 531*811af0c4SBarry Smith .vb 532eff715bbSJunchao Zhang if (is_omp_master) { 533eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 534eff715bbSJunchao Zhang Call the library using OpenMP 535eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 536eff715bbSJunchao Zhang } 537eff715bbSJunchao Zhang PetscOmpCtrlBarrier(ctrl); 538*811af0c4SBarry Smith .ve 539eff715bbSJunchao Zhang 540eff715bbSJunchao Zhang Level: developer 541eff715bbSJunchao Zhang 542*811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, 543eff715bbSJunchao Zhang @*/ 5449371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) { 5452da392ccSBarry Smith int err; 546a32e93adSJunchao Zhang 547a32e93adSJunchao Zhang PetscFunctionBegin; 5482da392ccSBarry Smith err = pthread_barrier_wait(ctrl->barrier); 54939619372SPierre Jolivet PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err); 550a32e93adSJunchao Zhang PetscFunctionReturn(0); 551a32e93adSJunchao Zhang } 552a32e93adSJunchao Zhang 553eff715bbSJunchao Zhang /*@C 554eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 555eff715bbSJunchao Zhang 556eff715bbSJunchao Zhang Input Parameter: 5578fcaa860SBarry Smith . ctrl - a PETSc OMP controller 558eff715bbSJunchao Zhang 559*811af0c4SBarry Smith Note: 560*811af0c4SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 561eff715bbSJunchao Zhang This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 562eff715bbSJunchao Zhang 563eff715bbSJunchao Zhang Level: developer 564eff715bbSJunchao Zhang 565*811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 566eff715bbSJunchao Zhang @*/ 5679371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) { 568a32e93adSJunchao Zhang PetscFunctionBegin; 5699566063dSJacob Faibussowitsch PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS)); 570eff715bbSJunchao Zhang omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 571a32e93adSJunchao Zhang PetscFunctionReturn(0); 572a32e93adSJunchao Zhang } 573a32e93adSJunchao Zhang 574eff715bbSJunchao Zhang /*@C 575eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 576eff715bbSJunchao Zhang 577eff715bbSJunchao Zhang Input Parameter: 5788fcaa860SBarry Smith . ctrl - a PETSc OMP controller 579eff715bbSJunchao Zhang 580*811af0c4SBarry Smith Note: 581*811af0c4SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 582eff715bbSJunchao Zhang This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 583eff715bbSJunchao Zhang 584eff715bbSJunchao Zhang Level: developer 585eff715bbSJunchao Zhang 586*811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 587eff715bbSJunchao Zhang @*/ 5889371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) { 589a32e93adSJunchao Zhang PetscFunctionBegin; 5909566063dSJacob Faibussowitsch PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS)); 591eff715bbSJunchao Zhang omp_set_num_threads(1); 592a32e93adSJunchao Zhang PetscFunctionReturn(0); 593a32e93adSJunchao Zhang } 594a32e93adSJunchao Zhang 5954df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 59620b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 597