15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h> 35f7487a0SJunchao Zhang 45f7487a0SJunchao Zhang struct _n_PetscShmComm { 55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */ 75f7487a0SJunchao Zhang MPI_Comm globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 85f7487a0SJunchao Zhang }; 95f7487a0SJunchao Zhang 105f7487a0SJunchao Zhang /* 1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed. 125f7487a0SJunchao Zhang 135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 145f7487a0SJunchao Zhang 155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 165f7487a0SJunchao Zhang 175f7487a0SJunchao Zhang */ 189371c9d4SSatish Balay PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state) { 195f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val; 205f7487a0SJunchao Zhang 215f7487a0SJunchao Zhang PetscFunctionBegin; 229566063dSJacob Faibussowitsch PetscCallMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm)); 239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&p->shmcomm)); 249566063dSJacob Faibussowitsch PetscCallMPI(PetscFree(p->globranks)); 259566063dSJacob Faibussowitsch PetscCallMPI(PetscFree(val)); 265f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS); 275f7487a0SJunchao Zhang } 285f7487a0SJunchao Zhang 29b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 30b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet(). 31b48189acSJunchao Zhang Since we predict communicators passed to PetscShmCommGet() are very likely 32b48189acSJunchao Zhang either a petsc inner communicator or an MPI communicator with a linked petsc 33b48189acSJunchao Zhang inner communicator, we use a simple static array to store dupped communicators 34b48189acSJunchao Zhang on rare cases otherwise. 35b48189acSJunchao Zhang */ 36b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16 37b48189acSJunchao Zhang static PetscInt num_dupped_comms = 0; 38b48189acSJunchao Zhang static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS]; 399371c9d4SSatish Balay static PetscErrorCode PetscShmCommDestroyDuppedComms(void) { 40b48189acSJunchao Zhang PetscInt i; 41b48189acSJunchao Zhang PetscFunctionBegin; 429566063dSJacob Faibussowitsch for (i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i])); 43b48189acSJunchao Zhang num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */ 44b48189acSJunchao Zhang PetscFunctionReturn(0); 45b48189acSJunchao Zhang } 46b48189acSJunchao Zhang #endif 47b48189acSJunchao Zhang 485f7487a0SJunchao Zhang /*@C 49b48189acSJunchao Zhang PetscShmCommGet - Given a communicator returns a sub-communicator of all ranks that share a common memory 505f7487a0SJunchao Zhang 51d083f849SBarry Smith Collective. 525f7487a0SJunchao Zhang 535f7487a0SJunchao Zhang Input Parameter: 54b48189acSJunchao Zhang . globcomm - MPI_Comm, which can be a user MPI_Comm or a PETSc inner MPI_Comm 555f7487a0SJunchao Zhang 565f7487a0SJunchao Zhang Output Parameter: 575f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object 585f7487a0SJunchao Zhang 595f7487a0SJunchao Zhang Level: developer 605f7487a0SJunchao Zhang 615f7487a0SJunchao Zhang Notes: 625f7487a0SJunchao Zhang When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 635f7487a0SJunchao Zhang 645f7487a0SJunchao Zhang @*/ 659371c9d4SSatish Balay PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm) { 665f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 675f7487a0SJunchao Zhang MPI_Group globgroup, shmgroup; 685f7487a0SJunchao Zhang PetscMPIInt *shmranks, i, flg; 695f7487a0SJunchao Zhang PetscCommCounter *counter; 705f7487a0SJunchao Zhang 715f7487a0SJunchao Zhang PetscFunctionBegin; 723ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 2); 73b48189acSJunchao Zhang /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */ 749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg)); 75b48189acSJunchao Zhang if (!flg) { /* globcomm is not a petsc comm */ 769371c9d4SSatish Balay union 779371c9d4SSatish Balay { 789371c9d4SSatish Balay MPI_Comm comm; 799371c9d4SSatish Balay void *ptr; 809371c9d4SSatish Balay } ucomm; 81b48189acSJunchao Zhang /* check if globcomm already has a linked petsc inner comm */ 829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg)); 83b48189acSJunchao Zhang if (!flg) { 84b48189acSJunchao Zhang /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */ 8508401ef6SPierre Jolivet PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS); 869566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL)); 87b48189acSJunchao Zhang /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */ 889566063dSJacob Faibussowitsch if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms)); 89b48189acSJunchao Zhang shmcomm_dupped_comms[num_dupped_comms] = globcomm; 90b48189acSJunchao Zhang num_dupped_comms++; 91b48189acSJunchao Zhang } else { 92b48189acSJunchao Zhang /* otherwise, we pull out the inner comm and use it as globcomm */ 93b48189acSJunchao Zhang globcomm = ucomm.comm; 94b48189acSJunchao Zhang } 95b48189acSJunchao Zhang } 965f7487a0SJunchao Zhang 97b48189acSJunchao Zhang /* Check if globcomm already has an attached pshmcomm. If no, create one */ 989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg)); 995f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1005f7487a0SJunchao Zhang 1019566063dSJacob Faibussowitsch PetscCall(PetscNew(pshmcomm)); 1025f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm; 1035f7487a0SJunchao Zhang 1049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm)); 1055f7487a0SJunchao Zhang 1069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize)); 1079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(globcomm, &globgroup)); 1089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup)); 1099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks)); 1109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks)); 1115f7487a0SJunchao Zhang for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i; 1129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks)); 1139566063dSJacob Faibussowitsch PetscCall(PetscFree(shmranks)); 1149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&globgroup)); 1159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&shmgroup)); 1165f7487a0SJunchao Zhang 117*48a46eb9SPierre Jolivet for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i])); 1189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm)); 1195f7487a0SJunchao Zhang PetscFunctionReturn(0); 1205f7487a0SJunchao Zhang #else 1215f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 1225f7487a0SJunchao Zhang #endif 1235f7487a0SJunchao Zhang } 1245f7487a0SJunchao Zhang 1255f7487a0SJunchao Zhang /*@C 1265f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 1275f7487a0SJunchao Zhang 1285f7487a0SJunchao Zhang Input Parameters: 1295f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1305f7487a0SJunchao Zhang - grank - the global rank 1315f7487a0SJunchao Zhang 1325f7487a0SJunchao Zhang Output Parameter: 1335f7487a0SJunchao Zhang . lrank - the local rank, or MPI_PROC_NULL if it does not exist 1345f7487a0SJunchao Zhang 1355f7487a0SJunchao Zhang Level: developer 1365f7487a0SJunchao Zhang 1375f7487a0SJunchao Zhang Developer Notes: 1385f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted 1395f7487a0SJunchao Zhang 1405f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call 1415f7487a0SJunchao Zhang 1425f7487a0SJunchao Zhang @*/ 1439371c9d4SSatish Balay PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank) { 1445f7487a0SJunchao Zhang PetscMPIInt low, high, t, i; 1455f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE; 1465f7487a0SJunchao Zhang 1475f7487a0SJunchao Zhang PetscFunctionBegin; 1483ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 149dadcf809SJacob Faibussowitsch PetscValidIntPointer(lrank, 3); 1505f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL; 1515f7487a0SJunchao Zhang if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 1525f7487a0SJunchao Zhang if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(0); 1539566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL)); 1545f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1555f7487a0SJunchao Zhang low = 0; 1565f7487a0SJunchao Zhang high = pshmcomm->shmsize; 1575f7487a0SJunchao Zhang while (high - low > 5) { 1585f7487a0SJunchao Zhang t = (low + high) / 2; 1595f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t; 1605f7487a0SJunchao Zhang else low = t; 1615f7487a0SJunchao Zhang } 1625f7487a0SJunchao Zhang for (i = low; i < high; i++) { 1635f7487a0SJunchao Zhang if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 1645f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) { 1655f7487a0SJunchao Zhang *lrank = i; 1665f7487a0SJunchao Zhang PetscFunctionReturn(0); 1675f7487a0SJunchao Zhang } 1685f7487a0SJunchao Zhang } 1695f7487a0SJunchao Zhang PetscFunctionReturn(0); 1705f7487a0SJunchao Zhang } 1715f7487a0SJunchao Zhang 1725f7487a0SJunchao Zhang /*@C 1735f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 1745f7487a0SJunchao Zhang 1755f7487a0SJunchao Zhang Input Parameters: 1765f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1775f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator 1785f7487a0SJunchao Zhang 1795f7487a0SJunchao Zhang Output Parameter: 1805f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built 1815f7487a0SJunchao Zhang 1825f7487a0SJunchao Zhang Level: developer 1835f7487a0SJunchao Zhang 1845f7487a0SJunchao Zhang @*/ 1859371c9d4SSatish Balay PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank) { 1865f7487a0SJunchao Zhang PetscFunctionBegin; 1873ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 188dadcf809SJacob Faibussowitsch PetscValidIntPointer(grank, 3); 1892c71b3e2SJacob Faibussowitsch PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank); 1905f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank]; 1915f7487a0SJunchao Zhang PetscFunctionReturn(0); 1925f7487a0SJunchao Zhang } 1935f7487a0SJunchao Zhang 1945f7487a0SJunchao Zhang /*@C 1955f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 1965f7487a0SJunchao Zhang 1975f7487a0SJunchao Zhang Input Parameter: 1985f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 1995f7487a0SJunchao Zhang 2005f7487a0SJunchao Zhang Output Parameter: 2015f7487a0SJunchao Zhang . comm - the MPI communicator 2025f7487a0SJunchao Zhang 2035f7487a0SJunchao Zhang Level: developer 2045f7487a0SJunchao Zhang 2055f7487a0SJunchao Zhang @*/ 2069371c9d4SSatish Balay PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm) { 2075f7487a0SJunchao Zhang PetscFunctionBegin; 2083ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 2093ca90d2dSJacob Faibussowitsch PetscValidPointer(comm, 2); 2105f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm; 2115f7487a0SJunchao Zhang PetscFunctionReturn(0); 2125f7487a0SJunchao Zhang } 2135f7487a0SJunchao Zhang 21420b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 215a32e93adSJunchao Zhang #include <pthread.h> 216a32e93adSJunchao Zhang #include <hwloc.h> 217a32e93adSJunchao Zhang #include <omp.h> 218a32e93adSJunchao Zhang 219eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 220eff715bbSJunchao Zhang otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 2214df5c2c7SJunchao Zhang simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 2224df5c2c7SJunchao Zhang by 50%. Until the reason is found out, we use mmap() instead. 2234df5c2c7SJunchao Zhang */ 2244df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY 2254df5c2c7SJunchao Zhang 2264df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2274df5c2c7SJunchao Zhang #include <sys/mman.h> 2284df5c2c7SJunchao Zhang #include <sys/types.h> 2294df5c2c7SJunchao Zhang #include <sys/stat.h> 2304df5c2c7SJunchao Zhang #include <fcntl.h> 2314df5c2c7SJunchao Zhang #endif 2324df5c2c7SJunchao Zhang 233a32e93adSJunchao Zhang struct _n_PetscOmpCtrl { 234a32e93adSJunchao Zhang MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 235a32e93adSJunchao Zhang MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 236a32e93adSJunchao Zhang PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 237a32e93adSJunchao Zhang PetscBool is_omp_master; /* rank 0's in omp_comm */ 238a32e93adSJunchao Zhang MPI_Win omp_win; /* a shared memory window containing a barrier */ 239a32e93adSJunchao Zhang pthread_barrier_t *barrier; /* pointer to the barrier */ 240a32e93adSJunchao Zhang hwloc_topology_t topology; 241a32e93adSJunchao Zhang hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 242a32e93adSJunchao Zhang hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 243a32e93adSJunchao Zhang }; 244a32e93adSJunchao Zhang 245eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 2468fcaa860SBarry Smith contained by the controller. 247eff715bbSJunchao Zhang 2488fcaa860SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 249eff715bbSJunchao Zhang only because we want to separate shared memory allocation methods from other code. 250eff715bbSJunchao Zhang */ 2519371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) { 252a32e93adSJunchao Zhang MPI_Aint size; 253a32e93adSJunchao Zhang void *baseptr; 254a32e93adSJunchao Zhang pthread_barrierattr_t attr; 255a32e93adSJunchao Zhang 2564df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2574df5c2c7SJunchao Zhang PetscInt fd; 2584df5c2c7SJunchao Zhang PetscChar pathname[PETSC_MAX_PATH_LEN]; 2594df5c2c7SJunchao Zhang #else 2604df5c2c7SJunchao Zhang PetscMPIInt disp_unit; 2614df5c2c7SJunchao Zhang #endif 2624df5c2c7SJunchao Zhang 2634df5c2c7SJunchao Zhang PetscFunctionBegin; 2644df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2654df5c2c7SJunchao Zhang size = sizeof(pthread_barrier_t); 2664df5c2c7SJunchao Zhang if (ctrl->is_omp_master) { 267eff715bbSJunchao Zhang /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 2689566063dSJacob Faibussowitsch PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN)); 2699566063dSJacob Faibussowitsch PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN)); 2704df5c2c7SJunchao Zhang /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 2719371c9d4SSatish Balay fd = mkstemp(pathname); 2729371c9d4SSatish Balay PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname); 2739566063dSJacob Faibussowitsch PetscCall(ftruncate(fd, size)); 2749371c9d4SSatish Balay baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2759371c9d4SSatish Balay PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 2769566063dSJacob Faibussowitsch PetscCall(close(fd)); 2779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 278eff715bbSJunchao Zhang /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 2799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 2809566063dSJacob Faibussowitsch PetscCall(unlink(pathname)); 2814df5c2c7SJunchao Zhang } else { 2829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 2839371c9d4SSatish Balay fd = open(pathname, O_RDWR); 2849371c9d4SSatish Balay PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname); 2859371c9d4SSatish Balay baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2869371c9d4SSatish Balay PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 2879566063dSJacob Faibussowitsch PetscCall(close(fd)); 2889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 2894df5c2c7SJunchao Zhang } 2904df5c2c7SJunchao Zhang #else 291a32e93adSJunchao Zhang size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 2929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win)); 2939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr)); 2944df5c2c7SJunchao Zhang #endif 295a32e93adSJunchao Zhang ctrl->barrier = (pthread_barrier_t *)baseptr; 296a32e93adSJunchao Zhang 297a32e93adSJunchao Zhang /* omp master initializes the barrier */ 298a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 2999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size)); 3009566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_init(&attr)); 3019566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); /* make the barrier also work for processes */ 3029566063dSJacob Faibussowitsch PetscCall(pthread_barrier_init(ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size)); 3039566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_destroy(&attr)); 304a32e93adSJunchao Zhang } 305a32e93adSJunchao Zhang 3064df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 3079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 308a32e93adSJunchao Zhang PetscFunctionReturn(0); 309a32e93adSJunchao Zhang } 310a32e93adSJunchao Zhang 3118fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 3129371c9d4SSatish Balay static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) { 3134df5c2c7SJunchao Zhang PetscFunctionBegin; 3144df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 3159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 3169566063dSJacob Faibussowitsch if (ctrl->is_omp_master) PetscCall(pthread_barrier_destroy(ctrl->barrier)); 3174df5c2c7SJunchao Zhang 3184df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 3199566063dSJacob Faibussowitsch PetscCall(munmap(ctrl->barrier, sizeof(pthread_barrier_t))); 3204df5c2c7SJunchao Zhang #else 3219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_free(&ctrl->omp_win)); 3224df5c2c7SJunchao Zhang #endif 323a32e93adSJunchao Zhang PetscFunctionReturn(0); 324a32e93adSJunchao Zhang } 325a32e93adSJunchao Zhang 326eff715bbSJunchao Zhang /*@C 3278fcaa860SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP 328eff715bbSJunchao Zhang 329d8d19677SJose E. Roman Input Parameters: 330eff715bbSJunchao Zhang + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 331a2b725a8SWilliam Gropp - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 332eff715bbSJunchao Zhang 333eff715bbSJunchao Zhang Output Parameter: 3348fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 335eff715bbSJunchao Zhang 336eff715bbSJunchao Zhang Level: developer 337eff715bbSJunchao Zhang 3388fcaa860SBarry Smith TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use 3398fcaa860SBarry Smith 340db781477SPatrick Sanan .seealso `PetscOmpCtrlDestroy()` 341eff715bbSJunchao Zhang @*/ 3429371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) { 343a32e93adSJunchao Zhang PetscOmpCtrl ctrl; 344a32e93adSJunchao Zhang unsigned long *cpu_ulongs = NULL; 345a32e93adSJunchao Zhang PetscInt i, nr_cpu_ulongs; 346a32e93adSJunchao Zhang PetscShmComm pshmcomm; 347a32e93adSJunchao Zhang MPI_Comm shm_comm; 348a32e93adSJunchao Zhang PetscMPIInt shm_rank, shm_comm_size, omp_rank, color; 3497c405c4aSJunchao Zhang PetscInt num_packages, num_cores; 350a32e93adSJunchao Zhang 351a32e93adSJunchao Zhang PetscFunctionBegin; 3529566063dSJacob Faibussowitsch PetscCall(PetscNew(&ctrl)); 353a32e93adSJunchao Zhang 354a32e93adSJunchao Zhang /*================================================================================= 3557c405c4aSJunchao Zhang Init hwloc 3567c405c4aSJunchao Zhang ==================================================================================*/ 3579566063dSJacob Faibussowitsch PetscCall(hwloc_topology_init(&ctrl->topology)); 3587c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 3597c405c4aSJunchao Zhang /* to filter out unneeded info and have faster hwloc_topology_load */ 3609566063dSJacob Faibussowitsch PetscCall(hwloc_topology_set_all_types_filter(ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE)); 3619566063dSJacob Faibussowitsch PetscCall(hwloc_topology_set_type_filter(ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL)); 3627c405c4aSJunchao Zhang #endif 3639566063dSJacob Faibussowitsch PetscCall(hwloc_topology_load(ctrl->topology)); 3647c405c4aSJunchao Zhang 3657c405c4aSJunchao Zhang /*================================================================================= 366a32e93adSJunchao Zhang Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 367a32e93adSJunchao Zhang physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 368a32e93adSJunchao Zhang others are called slaves. OMP Masters make up a new comm called omp_master_comm, 369a32e93adSJunchao Zhang which is usually passed to third party libraries. 370a32e93adSJunchao Zhang ==================================================================================*/ 371a32e93adSJunchao Zhang 372a32e93adSJunchao Zhang /* fetch the stored shared memory communicator */ 3739566063dSJacob Faibussowitsch PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm)); 3749566063dSJacob Faibussowitsch PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm)); 375a32e93adSJunchao Zhang 3769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank)); 3779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size)); 378a32e93adSJunchao Zhang 3797c405c4aSJunchao Zhang /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 3807c405c4aSJunchao Zhang if (nthreads == -1) { 381a312e481SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE); 382a312e481SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE); 3837c405c4aSJunchao Zhang nthreads = num_cores / num_packages; 3847c405c4aSJunchao Zhang if (nthreads > shm_comm_size) nthreads = shm_comm_size; 3857c405c4aSJunchao Zhang } 3867c405c4aSJunchao Zhang 3875f80ce2aSJacob Faibussowitsch PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size); 3889566063dSJacob Faibussowitsch if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size)); 389a32e93adSJunchao Zhang 390a32e93adSJunchao Zhang /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 391a32e93adSJunchao Zhang shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 392a32e93adSJunchao Zhang color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 393a32e93adSJunchao Zhang be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 394a32e93adSJunchao Zhang Use 0 as key so that rank ordering wont change in new comm. 395a32e93adSJunchao Zhang */ 396a32e93adSJunchao Zhang color = shm_rank / nthreads; 3979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm)); 398a32e93adSJunchao Zhang 399a32e93adSJunchao Zhang /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 4009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank)); 401a32e93adSJunchao Zhang if (!omp_rank) { 402a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_TRUE; /* master */ 403a32e93adSJunchao Zhang color = 0; 404a32e93adSJunchao Zhang } else { 405a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_FALSE; /* slave */ 406a32e93adSJunchao Zhang color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 407a32e93adSJunchao Zhang } 4089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm)); 409a32e93adSJunchao Zhang 410a32e93adSJunchao Zhang /*================================================================================= 411a32e93adSJunchao Zhang Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 412a32e93adSJunchao Zhang slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 413a32e93adSJunchao Zhang and run them on the idle CPUs. 414a32e93adSJunchao Zhang ==================================================================================*/ 4159566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlCreateBarrier(ctrl)); 416a32e93adSJunchao Zhang 417a32e93adSJunchao Zhang /*================================================================================= 418a32e93adSJunchao Zhang omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 419a32e93adSJunchao Zhang is the union of the bindings of all ranks in the omp_comm 420a32e93adSJunchao Zhang =================================================================================*/ 421a32e93adSJunchao Zhang 4229371c9d4SSatish Balay ctrl->cpuset = hwloc_bitmap_alloc(); 4239371c9d4SSatish Balay PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 4249566063dSJacob Faibussowitsch PetscCall(hwloc_get_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS)); 425a32e93adSJunchao Zhang 4263ab56b82SJunchao Zhang /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 427a32e93adSJunchao Zhang nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8; 4289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs)); 429a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 430a32e93adSJunchao Zhang cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 431a32e93adSJunchao Zhang } else { 432a32e93adSJunchao Zhang for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); 433a32e93adSJunchao Zhang } 434a32e93adSJunchao Zhang 4359566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm)); 436a32e93adSJunchao Zhang 437a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 4389371c9d4SSatish Balay ctrl->omp_cpuset = hwloc_bitmap_alloc(); 4399371c9d4SSatish Balay PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 440a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 4413ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4429566063dSJacob Faibussowitsch PetscCall(hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0])); 4433ab56b82SJunchao Zhang #else 4443ab56b82SJunchao Zhang hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]); 4453ab56b82SJunchao Zhang #endif 446a32e93adSJunchao Zhang } else { 4473ab56b82SJunchao Zhang for (i = 0; i < nr_cpu_ulongs; i++) { 4483ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4499566063dSJacob Faibussowitsch PetscCall(hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i])); 4503ab56b82SJunchao Zhang #else 4513ab56b82SJunchao Zhang hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 4523ab56b82SJunchao Zhang #endif 4533ab56b82SJunchao Zhang } 454a32e93adSJunchao Zhang } 455a32e93adSJunchao Zhang } 456a32e93adSJunchao Zhang 4579566063dSJacob Faibussowitsch PetscCall(PetscFree(cpu_ulongs)); 458a32e93adSJunchao Zhang *pctrl = ctrl; 459a32e93adSJunchao Zhang PetscFunctionReturn(0); 460a32e93adSJunchao Zhang } 461a32e93adSJunchao Zhang 462eff715bbSJunchao Zhang /*@C 46364f49babSJed Brown PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 464eff715bbSJunchao Zhang 465eff715bbSJunchao Zhang Input Parameter: 4668fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 467eff715bbSJunchao Zhang 468eff715bbSJunchao Zhang Level: developer 469eff715bbSJunchao Zhang 470db781477SPatrick Sanan .seealso `PetscOmpCtrlCreate()` 471eff715bbSJunchao Zhang @*/ 4729371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) { 473a32e93adSJunchao Zhang PetscOmpCtrl ctrl = *pctrl; 474a32e93adSJunchao Zhang 475a32e93adSJunchao Zhang PetscFunctionBegin; 476a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->cpuset); 477a32e93adSJunchao Zhang hwloc_topology_destroy(ctrl->topology); 478a32e93adSJunchao Zhang PetscOmpCtrlDestroyBarrier(ctrl); 4799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm)); 480a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 481a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->omp_cpuset); 4829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm)); 483a32e93adSJunchao Zhang } 4849566063dSJacob Faibussowitsch PetscCall(PetscFree(ctrl)); 485a32e93adSJunchao Zhang PetscFunctionReturn(0); 486a32e93adSJunchao Zhang } 487a32e93adSJunchao Zhang 488a32e93adSJunchao Zhang /*@C 4898fcaa860SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 490a32e93adSJunchao Zhang 491a32e93adSJunchao Zhang Input Parameter: 4928fcaa860SBarry Smith . ctrl - a PETSc OMP controller 493a32e93adSJunchao Zhang 494d8d19677SJose E. Roman Output Parameters: 495eff715bbSJunchao Zhang + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 496a32e93adSJunchao Zhang . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 497a32e93adSJunchao Zhang on slave ranks, MPI_COMM_NULL will be return in reality. 498a32e93adSJunchao Zhang - is_omp_master - true if the calling process is an OMP master rank. 499a32e93adSJunchao Zhang 500eff715bbSJunchao Zhang Notes: any output parameter can be NULL. The parameter is just ignored. 501eff715bbSJunchao Zhang 502a32e93adSJunchao Zhang Level: developer 503a32e93adSJunchao Zhang @*/ 5049371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) { 505a32e93adSJunchao Zhang PetscFunctionBegin; 506a32e93adSJunchao Zhang if (omp_comm) *omp_comm = ctrl->omp_comm; 507a32e93adSJunchao Zhang if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 508a32e93adSJunchao Zhang if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 509a32e93adSJunchao Zhang PetscFunctionReturn(0); 510a32e93adSJunchao Zhang } 511a32e93adSJunchao Zhang 512eff715bbSJunchao Zhang /*@C 5138fcaa860SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 514eff715bbSJunchao Zhang 515eff715bbSJunchao Zhang Input Parameter: 5168fcaa860SBarry Smith . ctrl - a PETSc OMP controller 517eff715bbSJunchao Zhang 518eff715bbSJunchao Zhang Notes: 519eff715bbSJunchao Zhang this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not 520eff715bbSJunchao Zhang require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency, 521eff715bbSJunchao Zhang MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 522eff715bbSJunchao Zhang 523eff715bbSJunchao Zhang A code using PetscOmpCtrlBarrier() would be like this, 524eff715bbSJunchao Zhang 525eff715bbSJunchao Zhang if (is_omp_master) { 526eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 527eff715bbSJunchao Zhang Call the library using OpenMP 528eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 529eff715bbSJunchao Zhang } 530eff715bbSJunchao Zhang PetscOmpCtrlBarrier(ctrl); 531eff715bbSJunchao Zhang 532eff715bbSJunchao Zhang Level: developer 533eff715bbSJunchao Zhang 534db781477SPatrick Sanan .seealso `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()` 535eff715bbSJunchao Zhang @*/ 5369371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) { 5372da392ccSBarry Smith int err; 538a32e93adSJunchao Zhang 539a32e93adSJunchao Zhang PetscFunctionBegin; 5402da392ccSBarry Smith err = pthread_barrier_wait(ctrl->barrier); 54139619372SPierre Jolivet PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err); 542a32e93adSJunchao Zhang PetscFunctionReturn(0); 543a32e93adSJunchao Zhang } 544a32e93adSJunchao Zhang 545eff715bbSJunchao Zhang /*@C 546eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 547eff715bbSJunchao Zhang 548eff715bbSJunchao Zhang Input Parameter: 5498fcaa860SBarry Smith . ctrl - a PETSc OMP controller 550eff715bbSJunchao Zhang 551eff715bbSJunchao Zhang Notes: 5528fcaa860SBarry Smith Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 553eff715bbSJunchao Zhang This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 554eff715bbSJunchao Zhang 555eff715bbSJunchao Zhang Level: developer 556eff715bbSJunchao Zhang 557db781477SPatrick Sanan .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()` 558eff715bbSJunchao Zhang @*/ 5599371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) { 560a32e93adSJunchao Zhang PetscFunctionBegin; 5619566063dSJacob Faibussowitsch PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS)); 562eff715bbSJunchao Zhang omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 563a32e93adSJunchao Zhang PetscFunctionReturn(0); 564a32e93adSJunchao Zhang } 565a32e93adSJunchao Zhang 566eff715bbSJunchao Zhang /*@C 567eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 568eff715bbSJunchao Zhang 569eff715bbSJunchao Zhang Input Parameter: 5708fcaa860SBarry Smith . ctrl - a PETSc OMP controller 571eff715bbSJunchao Zhang 572eff715bbSJunchao Zhang Notes: 5738fcaa860SBarry Smith Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 574eff715bbSJunchao Zhang This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 575eff715bbSJunchao Zhang 576eff715bbSJunchao Zhang Level: developer 577eff715bbSJunchao Zhang 578db781477SPatrick Sanan .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()` 579eff715bbSJunchao Zhang @*/ 5809371c9d4SSatish Balay PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) { 581a32e93adSJunchao Zhang PetscFunctionBegin; 5829566063dSJacob Faibussowitsch PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS)); 583eff715bbSJunchao Zhang omp_set_num_threads(1); 584a32e93adSJunchao Zhang PetscFunctionReturn(0); 585a32e93adSJunchao Zhang } 586a32e93adSJunchao Zhang 5874df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 58820b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 589