15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h> 35f7487a0SJunchao Zhang 45f7487a0SJunchao Zhang struct _n_PetscShmComm { 55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */ 75f7487a0SJunchao Zhang MPI_Comm globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 85f7487a0SJunchao Zhang }; 95f7487a0SJunchao Zhang 105f7487a0SJunchao Zhang /* 1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed. 125f7487a0SJunchao Zhang 135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 145f7487a0SJunchao Zhang 155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 165f7487a0SJunchao Zhang 175f7487a0SJunchao Zhang */ 18d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state) 19d71ae5a4SJacob Faibussowitsch { 205f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val; 215f7487a0SJunchao Zhang 225f7487a0SJunchao Zhang PetscFunctionBegin; 239566063dSJacob Faibussowitsch PetscCallMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm)); 249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&p->shmcomm)); 259566063dSJacob Faibussowitsch PetscCallMPI(PetscFree(p->globranks)); 269566063dSJacob Faibussowitsch PetscCallMPI(PetscFree(val)); 275f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS); 285f7487a0SJunchao Zhang } 295f7487a0SJunchao Zhang 30b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 31b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet(). 32b48189acSJunchao Zhang Since we predict communicators passed to PetscShmCommGet() are very likely 33b48189acSJunchao Zhang either a petsc inner communicator or an MPI communicator with a linked petsc 34b48189acSJunchao Zhang inner communicator, we use a simple static array to store dupped communicators 35b48189acSJunchao Zhang on rare cases otherwise. 36b48189acSJunchao Zhang */ 37b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16 38b48189acSJunchao Zhang static PetscInt num_dupped_comms = 0; 39b48189acSJunchao Zhang static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS]; 40d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscShmCommDestroyDuppedComms(void) 41d71ae5a4SJacob Faibussowitsch { 42b48189acSJunchao Zhang PetscInt i; 43b48189acSJunchao Zhang PetscFunctionBegin; 449566063dSJacob Faibussowitsch for (i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i])); 45b48189acSJunchao Zhang num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */ 46*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 47b48189acSJunchao Zhang } 48b48189acSJunchao Zhang #endif 49b48189acSJunchao Zhang 505f7487a0SJunchao Zhang /*@C 51811af0c4SBarry Smith PetscShmCommGet - Returns a sub-communicator of all ranks that share a common memory 525f7487a0SJunchao Zhang 53d083f849SBarry Smith Collective. 545f7487a0SJunchao Zhang 555f7487a0SJunchao Zhang Input Parameter: 56811af0c4SBarry Smith . globcomm - `MPI_Comm`, which can be a user MPI_Comm or a PETSc inner MPI_Comm 575f7487a0SJunchao Zhang 585f7487a0SJunchao Zhang Output Parameter: 595f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object 605f7487a0SJunchao Zhang 615f7487a0SJunchao Zhang Level: developer 625f7487a0SJunchao Zhang 63811af0c4SBarry Smith Note: 645f7487a0SJunchao Zhang When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 655f7487a0SJunchao Zhang 66811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()` 675f7487a0SJunchao Zhang @*/ 68d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm) 69d71ae5a4SJacob Faibussowitsch { 705f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 715f7487a0SJunchao Zhang MPI_Group globgroup, shmgroup; 725f7487a0SJunchao Zhang PetscMPIInt *shmranks, i, flg; 735f7487a0SJunchao Zhang PetscCommCounter *counter; 745f7487a0SJunchao Zhang 755f7487a0SJunchao Zhang PetscFunctionBegin; 763ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 2); 77b48189acSJunchao Zhang /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */ 789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg)); 79b48189acSJunchao Zhang if (!flg) { /* globcomm is not a petsc comm */ 809371c9d4SSatish Balay union 819371c9d4SSatish Balay { 829371c9d4SSatish Balay MPI_Comm comm; 839371c9d4SSatish Balay void *ptr; 849371c9d4SSatish Balay } ucomm; 85b48189acSJunchao Zhang /* check if globcomm already has a linked petsc inner comm */ 869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg)); 87b48189acSJunchao Zhang if (!flg) { 88b48189acSJunchao Zhang /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */ 8908401ef6SPierre Jolivet PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS); 909566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL)); 91b48189acSJunchao Zhang /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */ 929566063dSJacob Faibussowitsch if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms)); 93b48189acSJunchao Zhang shmcomm_dupped_comms[num_dupped_comms] = globcomm; 94b48189acSJunchao Zhang num_dupped_comms++; 95b48189acSJunchao Zhang } else { 96b48189acSJunchao Zhang /* otherwise, we pull out the inner comm and use it as globcomm */ 97b48189acSJunchao Zhang globcomm = ucomm.comm; 98b48189acSJunchao Zhang } 99b48189acSJunchao Zhang } 1005f7487a0SJunchao Zhang 101b48189acSJunchao Zhang /* Check if globcomm already has an attached pshmcomm. If no, create one */ 1029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg)); 103*3ba16761SJacob Faibussowitsch if (flg) PetscFunctionReturn(PETSC_SUCCESS); 1045f7487a0SJunchao Zhang 1059566063dSJacob Faibussowitsch PetscCall(PetscNew(pshmcomm)); 1065f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm; 1075f7487a0SJunchao Zhang 1089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm)); 1095f7487a0SJunchao Zhang 1109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize)); 1119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(globcomm, &globgroup)); 1129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup)); 1139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks)); 1149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks)); 1155f7487a0SJunchao Zhang for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i; 1169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks)); 1179566063dSJacob Faibussowitsch PetscCall(PetscFree(shmranks)); 1189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&globgroup)); 1199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&shmgroup)); 1205f7487a0SJunchao Zhang 12148a46eb9SPierre Jolivet for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i])); 1229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm)); 123*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1245f7487a0SJunchao Zhang #else 1255f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 1265f7487a0SJunchao Zhang #endif 1275f7487a0SJunchao Zhang } 1285f7487a0SJunchao Zhang 1295f7487a0SJunchao Zhang /*@C 1305f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 1315f7487a0SJunchao Zhang 1325f7487a0SJunchao Zhang Input Parameters: 1335f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1345f7487a0SJunchao Zhang - grank - the global rank 1355f7487a0SJunchao Zhang 1365f7487a0SJunchao Zhang Output Parameter: 137811af0c4SBarry Smith . lrank - the local rank, or `MPI_PROC_NULL` if it does not exist 1385f7487a0SJunchao Zhang 1395f7487a0SJunchao Zhang Level: developer 1405f7487a0SJunchao Zhang 1415f7487a0SJunchao Zhang Developer Notes: 1425f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted 1435f7487a0SJunchao Zhang 1445f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call 1455f7487a0SJunchao Zhang 146811af0c4SBarry Smith .seealso: `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()` 1475f7487a0SJunchao Zhang @*/ 148d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank) 149d71ae5a4SJacob Faibussowitsch { 1505f7487a0SJunchao Zhang PetscMPIInt low, high, t, i; 1515f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE; 1525f7487a0SJunchao Zhang 1535f7487a0SJunchao Zhang PetscFunctionBegin; 1543ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 155dadcf809SJacob Faibussowitsch PetscValidIntPointer(lrank, 3); 1565f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL; 157*3ba16761SJacob Faibussowitsch if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(PETSC_SUCCESS); 158*3ba16761SJacob Faibussowitsch if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(PETSC_SUCCESS); 1599566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL)); 160*3ba16761SJacob Faibussowitsch if (flg) PetscFunctionReturn(PETSC_SUCCESS); 1615f7487a0SJunchao Zhang low = 0; 1625f7487a0SJunchao Zhang high = pshmcomm->shmsize; 1635f7487a0SJunchao Zhang while (high - low > 5) { 1645f7487a0SJunchao Zhang t = (low + high) / 2; 1655f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t; 1665f7487a0SJunchao Zhang else low = t; 1675f7487a0SJunchao Zhang } 1685f7487a0SJunchao Zhang for (i = low; i < high; i++) { 169*3ba16761SJacob Faibussowitsch if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(PETSC_SUCCESS); 1705f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) { 1715f7487a0SJunchao Zhang *lrank = i; 172*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1735f7487a0SJunchao Zhang } 1745f7487a0SJunchao Zhang } 175*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1765f7487a0SJunchao Zhang } 1775f7487a0SJunchao Zhang 1785f7487a0SJunchao Zhang /*@C 1795f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 1805f7487a0SJunchao Zhang 1815f7487a0SJunchao Zhang Input Parameters: 1825f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1835f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator 1845f7487a0SJunchao Zhang 1855f7487a0SJunchao Zhang Output Parameter: 1865f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built 1875f7487a0SJunchao Zhang 1885f7487a0SJunchao Zhang Level: developer 1895f7487a0SJunchao Zhang 190811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommGetMpiShmComm()` 1915f7487a0SJunchao Zhang @*/ 192d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank) 193d71ae5a4SJacob Faibussowitsch { 1945f7487a0SJunchao Zhang PetscFunctionBegin; 1953ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 196dadcf809SJacob Faibussowitsch PetscValidIntPointer(grank, 3); 1972c71b3e2SJacob Faibussowitsch PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank); 1985f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank]; 199*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2005f7487a0SJunchao Zhang } 2015f7487a0SJunchao Zhang 2025f7487a0SJunchao Zhang /*@C 2035f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 2045f7487a0SJunchao Zhang 2055f7487a0SJunchao Zhang Input Parameter: 2065f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 2075f7487a0SJunchao Zhang 2085f7487a0SJunchao Zhang Output Parameter: 2095f7487a0SJunchao Zhang . comm - the MPI communicator 2105f7487a0SJunchao Zhang 2115f7487a0SJunchao Zhang Level: developer 2125f7487a0SJunchao Zhang 213811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()` 2145f7487a0SJunchao Zhang @*/ 215d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm) 216d71ae5a4SJacob Faibussowitsch { 2175f7487a0SJunchao Zhang PetscFunctionBegin; 2183ca90d2dSJacob Faibussowitsch PetscValidPointer(pshmcomm, 1); 2193ca90d2dSJacob Faibussowitsch PetscValidPointer(comm, 2); 2205f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm; 221*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2225f7487a0SJunchao Zhang } 2235f7487a0SJunchao Zhang 22420b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 225a32e93adSJunchao Zhang #include <pthread.h> 226a32e93adSJunchao Zhang #include <hwloc.h> 227a32e93adSJunchao Zhang #include <omp.h> 228a32e93adSJunchao Zhang 229eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 230eff715bbSJunchao Zhang otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 2314df5c2c7SJunchao Zhang simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 2324df5c2c7SJunchao Zhang by 50%. Until the reason is found out, we use mmap() instead. 2334df5c2c7SJunchao Zhang */ 2344df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY 2354df5c2c7SJunchao Zhang 2364df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2374df5c2c7SJunchao Zhang #include <sys/mman.h> 2384df5c2c7SJunchao Zhang #include <sys/types.h> 2394df5c2c7SJunchao Zhang #include <sys/stat.h> 2404df5c2c7SJunchao Zhang #include <fcntl.h> 2414df5c2c7SJunchao Zhang #endif 2424df5c2c7SJunchao Zhang 243a32e93adSJunchao Zhang struct _n_PetscOmpCtrl { 244a32e93adSJunchao Zhang MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 245a32e93adSJunchao Zhang MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 246a32e93adSJunchao Zhang PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 247a32e93adSJunchao Zhang PetscBool is_omp_master; /* rank 0's in omp_comm */ 248a32e93adSJunchao Zhang MPI_Win omp_win; /* a shared memory window containing a barrier */ 249a32e93adSJunchao Zhang pthread_barrier_t *barrier; /* pointer to the barrier */ 250a32e93adSJunchao Zhang hwloc_topology_t topology; 251a32e93adSJunchao Zhang hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 252a32e93adSJunchao Zhang hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 253a32e93adSJunchao Zhang }; 254a32e93adSJunchao Zhang 255eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 2568fcaa860SBarry Smith contained by the controller. 257eff715bbSJunchao Zhang 2588fcaa860SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 259eff715bbSJunchao Zhang only because we want to separate shared memory allocation methods from other code. 260eff715bbSJunchao Zhang */ 261d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 262d71ae5a4SJacob Faibussowitsch { 263a32e93adSJunchao Zhang MPI_Aint size; 264a32e93adSJunchao Zhang void *baseptr; 265a32e93adSJunchao Zhang pthread_barrierattr_t attr; 266a32e93adSJunchao Zhang 2674df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2684df5c2c7SJunchao Zhang PetscInt fd; 2694df5c2c7SJunchao Zhang PetscChar pathname[PETSC_MAX_PATH_LEN]; 2704df5c2c7SJunchao Zhang #else 2714df5c2c7SJunchao Zhang PetscMPIInt disp_unit; 2724df5c2c7SJunchao Zhang #endif 2734df5c2c7SJunchao Zhang 2744df5c2c7SJunchao Zhang PetscFunctionBegin; 2754df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2764df5c2c7SJunchao Zhang size = sizeof(pthread_barrier_t); 2774df5c2c7SJunchao Zhang if (ctrl->is_omp_master) { 278eff715bbSJunchao Zhang /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 2799566063dSJacob Faibussowitsch PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN)); 2809566063dSJacob Faibussowitsch PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN)); 2814df5c2c7SJunchao Zhang /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 2829371c9d4SSatish Balay fd = mkstemp(pathname); 2839371c9d4SSatish Balay PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname); 2849566063dSJacob Faibussowitsch PetscCall(ftruncate(fd, size)); 2859371c9d4SSatish Balay baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2869371c9d4SSatish Balay PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 2879566063dSJacob Faibussowitsch PetscCall(close(fd)); 2889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 289eff715bbSJunchao Zhang /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 2909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 2919566063dSJacob Faibussowitsch PetscCall(unlink(pathname)); 2924df5c2c7SJunchao Zhang } else { 2939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm)); 2949371c9d4SSatish Balay fd = open(pathname, O_RDWR); 2959371c9d4SSatish Balay PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname); 2969371c9d4SSatish Balay baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 2979371c9d4SSatish Balay PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed"); 2989566063dSJacob Faibussowitsch PetscCall(close(fd)); 2999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 3004df5c2c7SJunchao Zhang } 3014df5c2c7SJunchao Zhang #else 302a32e93adSJunchao Zhang size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 3039566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win)); 3049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr)); 3054df5c2c7SJunchao Zhang #endif 306a32e93adSJunchao Zhang ctrl->barrier = (pthread_barrier_t *)baseptr; 307a32e93adSJunchao Zhang 308a32e93adSJunchao Zhang /* omp master initializes the barrier */ 309a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 3109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size)); 3119566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_init(&attr)); 3129566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); /* make the barrier also work for processes */ 3139566063dSJacob Faibussowitsch PetscCall(pthread_barrier_init(ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size)); 3149566063dSJacob Faibussowitsch PetscCall(pthread_barrierattr_destroy(&attr)); 315a32e93adSJunchao Zhang } 316a32e93adSJunchao Zhang 3174df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 3189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 319*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 320a32e93adSJunchao Zhang } 321a32e93adSJunchao Zhang 3228fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 323d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 324d71ae5a4SJacob Faibussowitsch { 3254df5c2c7SJunchao Zhang PetscFunctionBegin; 3264df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 3279566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(ctrl->omp_comm)); 3289566063dSJacob Faibussowitsch if (ctrl->is_omp_master) PetscCall(pthread_barrier_destroy(ctrl->barrier)); 3294df5c2c7SJunchao Zhang 3304df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 3319566063dSJacob Faibussowitsch PetscCall(munmap(ctrl->barrier, sizeof(pthread_barrier_t))); 3324df5c2c7SJunchao Zhang #else 3339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Win_free(&ctrl->omp_win)); 3344df5c2c7SJunchao Zhang #endif 335*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 336a32e93adSJunchao Zhang } 337a32e93adSJunchao Zhang 338eff715bbSJunchao Zhang /*@C 339811af0c4SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries that use OpenMP 340eff715bbSJunchao Zhang 341d8d19677SJose E. Roman Input Parameters: 342eff715bbSJunchao Zhang + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 343a2b725a8SWilliam Gropp - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 344eff715bbSJunchao Zhang 345eff715bbSJunchao Zhang Output Parameter: 3468fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 347eff715bbSJunchao Zhang 348eff715bbSJunchao Zhang Level: developer 349eff715bbSJunchao Zhang 350811af0c4SBarry Smith Developer Note: 351811af0c4SBarry Smith Possibly use the variable `PetscNumOMPThreads` to determine the number for threads to use 3528fcaa860SBarry Smith 353811af0c4SBarry Smith .seealso: `PetscOmpCtrlDestroy()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 354eff715bbSJunchao Zhang @*/ 355d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl) 356d71ae5a4SJacob Faibussowitsch { 357a32e93adSJunchao Zhang PetscOmpCtrl ctrl; 358a32e93adSJunchao Zhang unsigned long *cpu_ulongs = NULL; 359a32e93adSJunchao Zhang PetscInt i, nr_cpu_ulongs; 360a32e93adSJunchao Zhang PetscShmComm pshmcomm; 361a32e93adSJunchao Zhang MPI_Comm shm_comm; 362a32e93adSJunchao Zhang PetscMPIInt shm_rank, shm_comm_size, omp_rank, color; 3637c405c4aSJunchao Zhang PetscInt num_packages, num_cores; 364a32e93adSJunchao Zhang 365a32e93adSJunchao Zhang PetscFunctionBegin; 3669566063dSJacob Faibussowitsch PetscCall(PetscNew(&ctrl)); 367a32e93adSJunchao Zhang 368a32e93adSJunchao Zhang /*================================================================================= 3697c405c4aSJunchao Zhang Init hwloc 3707c405c4aSJunchao Zhang ==================================================================================*/ 3719566063dSJacob Faibussowitsch PetscCall(hwloc_topology_init(&ctrl->topology)); 3727c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 3737c405c4aSJunchao Zhang /* to filter out unneeded info and have faster hwloc_topology_load */ 3749566063dSJacob Faibussowitsch PetscCall(hwloc_topology_set_all_types_filter(ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE)); 3759566063dSJacob Faibussowitsch PetscCall(hwloc_topology_set_type_filter(ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL)); 3767c405c4aSJunchao Zhang #endif 3779566063dSJacob Faibussowitsch PetscCall(hwloc_topology_load(ctrl->topology)); 3787c405c4aSJunchao Zhang 3797c405c4aSJunchao Zhang /*================================================================================= 380a32e93adSJunchao Zhang Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 381a32e93adSJunchao Zhang physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 382a32e93adSJunchao Zhang others are called slaves. OMP Masters make up a new comm called omp_master_comm, 383a32e93adSJunchao Zhang which is usually passed to third party libraries. 384a32e93adSJunchao Zhang ==================================================================================*/ 385a32e93adSJunchao Zhang 386a32e93adSJunchao Zhang /* fetch the stored shared memory communicator */ 3879566063dSJacob Faibussowitsch PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm)); 3889566063dSJacob Faibussowitsch PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm)); 389a32e93adSJunchao Zhang 3909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank)); 3919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size)); 392a32e93adSJunchao Zhang 3937c405c4aSJunchao Zhang /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 3947c405c4aSJunchao Zhang if (nthreads == -1) { 395a312e481SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE); 396a312e481SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE); 3977c405c4aSJunchao Zhang nthreads = num_cores / num_packages; 3987c405c4aSJunchao Zhang if (nthreads > shm_comm_size) nthreads = shm_comm_size; 3997c405c4aSJunchao Zhang } 4007c405c4aSJunchao Zhang 4015f80ce2aSJacob Faibussowitsch PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size); 4029566063dSJacob Faibussowitsch if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size)); 403a32e93adSJunchao Zhang 404a32e93adSJunchao Zhang /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 405a32e93adSJunchao Zhang shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 406a32e93adSJunchao Zhang color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 407a32e93adSJunchao Zhang be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 408a32e93adSJunchao Zhang Use 0 as key so that rank ordering wont change in new comm. 409a32e93adSJunchao Zhang */ 410a32e93adSJunchao Zhang color = shm_rank / nthreads; 4119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm)); 412a32e93adSJunchao Zhang 413a32e93adSJunchao Zhang /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 4149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank)); 415a32e93adSJunchao Zhang if (!omp_rank) { 416a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_TRUE; /* master */ 417a32e93adSJunchao Zhang color = 0; 418a32e93adSJunchao Zhang } else { 419a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_FALSE; /* slave */ 420a32e93adSJunchao Zhang color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 421a32e93adSJunchao Zhang } 4229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm)); 423a32e93adSJunchao Zhang 424a32e93adSJunchao Zhang /*================================================================================= 425a32e93adSJunchao Zhang Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 426a32e93adSJunchao Zhang slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 427a32e93adSJunchao Zhang and run them on the idle CPUs. 428a32e93adSJunchao Zhang ==================================================================================*/ 4299566063dSJacob Faibussowitsch PetscCall(PetscOmpCtrlCreateBarrier(ctrl)); 430a32e93adSJunchao Zhang 431a32e93adSJunchao Zhang /*================================================================================= 432a32e93adSJunchao Zhang omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 433a32e93adSJunchao Zhang is the union of the bindings of all ranks in the omp_comm 434a32e93adSJunchao Zhang =================================================================================*/ 435a32e93adSJunchao Zhang 4369371c9d4SSatish Balay ctrl->cpuset = hwloc_bitmap_alloc(); 4379371c9d4SSatish Balay PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 4389566063dSJacob Faibussowitsch PetscCall(hwloc_get_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS)); 439a32e93adSJunchao Zhang 4403ab56b82SJunchao Zhang /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 441a32e93adSJunchao Zhang nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8; 4429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs)); 443a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 444a32e93adSJunchao Zhang cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 445a32e93adSJunchao Zhang } else { 446a32e93adSJunchao Zhang for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); 447a32e93adSJunchao Zhang } 448a32e93adSJunchao Zhang 4499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm)); 450a32e93adSJunchao Zhang 451a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 4529371c9d4SSatish Balay ctrl->omp_cpuset = hwloc_bitmap_alloc(); 4539371c9d4SSatish Balay PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed"); 454a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 4553ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4569566063dSJacob Faibussowitsch PetscCall(hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0])); 4573ab56b82SJunchao Zhang #else 4583ab56b82SJunchao Zhang hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]); 4593ab56b82SJunchao Zhang #endif 460a32e93adSJunchao Zhang } else { 4613ab56b82SJunchao Zhang for (i = 0; i < nr_cpu_ulongs; i++) { 4623ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4639566063dSJacob Faibussowitsch PetscCall(hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i])); 4643ab56b82SJunchao Zhang #else 4653ab56b82SJunchao Zhang hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); 4663ab56b82SJunchao Zhang #endif 4673ab56b82SJunchao Zhang } 468a32e93adSJunchao Zhang } 469a32e93adSJunchao Zhang } 4709566063dSJacob Faibussowitsch PetscCall(PetscFree(cpu_ulongs)); 471a32e93adSJunchao Zhang *pctrl = ctrl; 472*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 473a32e93adSJunchao Zhang } 474a32e93adSJunchao Zhang 475eff715bbSJunchao Zhang /*@C 47664f49babSJed Brown PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 477eff715bbSJunchao Zhang 478eff715bbSJunchao Zhang Input Parameter: 4798fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 480eff715bbSJunchao Zhang 481eff715bbSJunchao Zhang Level: developer 482eff715bbSJunchao Zhang 483811af0c4SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 484eff715bbSJunchao Zhang @*/ 485d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 486d71ae5a4SJacob Faibussowitsch { 487a32e93adSJunchao Zhang PetscOmpCtrl ctrl = *pctrl; 488a32e93adSJunchao Zhang 489a32e93adSJunchao Zhang PetscFunctionBegin; 490a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->cpuset); 491a32e93adSJunchao Zhang hwloc_topology_destroy(ctrl->topology); 492*3ba16761SJacob Faibussowitsch PetscCall(PetscOmpCtrlDestroyBarrier(ctrl)); 4939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm)); 494a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 495a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->omp_cpuset); 4969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm)); 497a32e93adSJunchao Zhang } 4989566063dSJacob Faibussowitsch PetscCall(PetscFree(ctrl)); 499*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 500a32e93adSJunchao Zhang } 501a32e93adSJunchao Zhang 502a32e93adSJunchao Zhang /*@C 5038fcaa860SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 504a32e93adSJunchao Zhang 505a32e93adSJunchao Zhang Input Parameter: 5068fcaa860SBarry Smith . ctrl - a PETSc OMP controller 507a32e93adSJunchao Zhang 508d8d19677SJose E. Roman Output Parameters: 509eff715bbSJunchao Zhang + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 510a32e93adSJunchao Zhang . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 511811af0c4SBarry Smith on slave ranks, `MPI_COMM_NULL` will be return in reality. 512a32e93adSJunchao Zhang - is_omp_master - true if the calling process is an OMP master rank. 513a32e93adSJunchao Zhang 514811af0c4SBarry Smith Note: 515811af0c4SBarry Smith Any output parameter can be NULL. The parameter is just ignored. 516eff715bbSJunchao Zhang 517a32e93adSJunchao Zhang Level: developer 518811af0c4SBarry Smith 519811af0c4SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, 520a32e93adSJunchao Zhang @*/ 521d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master) 522d71ae5a4SJacob Faibussowitsch { 523a32e93adSJunchao Zhang PetscFunctionBegin; 524a32e93adSJunchao Zhang if (omp_comm) *omp_comm = ctrl->omp_comm; 525a32e93adSJunchao Zhang if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 526a32e93adSJunchao Zhang if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 527*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 528a32e93adSJunchao Zhang } 529a32e93adSJunchao Zhang 530eff715bbSJunchao Zhang /*@C 5318fcaa860SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 532eff715bbSJunchao Zhang 533eff715bbSJunchao Zhang Input Parameter: 5348fcaa860SBarry Smith . ctrl - a PETSc OMP controller 535eff715bbSJunchao Zhang 536eff715bbSJunchao Zhang Notes: 537811af0c4SBarry Smith this is a pthread barrier on MPI ranks. Using `MPI_Barrier()` instead is conceptually correct. But MPI standard does not 538811af0c4SBarry Smith require processes blocked by `MPI_Barrier()` free their CPUs to let other processes progress. In practice, to minilize latency, 539811af0c4SBarry Smith MPI ranks stuck in `MPI_Barrier()` keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 540eff715bbSJunchao Zhang 541811af0c4SBarry Smith A code using `PetscOmpCtrlBarrier()` would be like this, 542811af0c4SBarry Smith .vb 543eff715bbSJunchao Zhang if (is_omp_master) { 544eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 545eff715bbSJunchao Zhang Call the library using OpenMP 546eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 547eff715bbSJunchao Zhang } 548eff715bbSJunchao Zhang PetscOmpCtrlBarrier(ctrl); 549811af0c4SBarry Smith .ve 550eff715bbSJunchao Zhang 551eff715bbSJunchao Zhang Level: developer 552eff715bbSJunchao Zhang 553811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, 554eff715bbSJunchao Zhang @*/ 555d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 556d71ae5a4SJacob Faibussowitsch { 5572da392ccSBarry Smith int err; 558a32e93adSJunchao Zhang 559a32e93adSJunchao Zhang PetscFunctionBegin; 5602da392ccSBarry Smith err = pthread_barrier_wait(ctrl->barrier); 56139619372SPierre Jolivet PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err); 562*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 563a32e93adSJunchao Zhang } 564a32e93adSJunchao Zhang 565eff715bbSJunchao Zhang /*@C 566eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 567eff715bbSJunchao Zhang 568eff715bbSJunchao Zhang Input Parameter: 5698fcaa860SBarry Smith . ctrl - a PETSc OMP controller 570eff715bbSJunchao Zhang 571811af0c4SBarry Smith Note: 572811af0c4SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 573eff715bbSJunchao Zhang This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 574eff715bbSJunchao Zhang 575eff715bbSJunchao Zhang Level: developer 576eff715bbSJunchao Zhang 577811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 578eff715bbSJunchao Zhang @*/ 579d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 580d71ae5a4SJacob Faibussowitsch { 581a32e93adSJunchao Zhang PetscFunctionBegin; 5829566063dSJacob Faibussowitsch PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS)); 583eff715bbSJunchao Zhang omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 584*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 585a32e93adSJunchao Zhang } 586a32e93adSJunchao Zhang 587eff715bbSJunchao Zhang /*@C 588eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 589eff715bbSJunchao Zhang 590eff715bbSJunchao Zhang Input Parameter: 5918fcaa860SBarry Smith . ctrl - a PETSc OMP controller 592eff715bbSJunchao Zhang 593811af0c4SBarry Smith Note: 594811af0c4SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank. 595eff715bbSJunchao Zhang This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 596eff715bbSJunchao Zhang 597eff715bbSJunchao Zhang Level: developer 598eff715bbSJunchao Zhang 599811af0c4SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()` 600eff715bbSJunchao Zhang @*/ 601d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 602d71ae5a4SJacob Faibussowitsch { 603a32e93adSJunchao Zhang PetscFunctionBegin; 6049566063dSJacob Faibussowitsch PetscCall(hwloc_set_cpubind(ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS)); 605eff715bbSJunchao Zhang omp_set_num_threads(1); 606*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 607a32e93adSJunchao Zhang } 608a32e93adSJunchao Zhang 6094df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 61020b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 611