15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h> 35f7487a0SJunchao Zhang 45f7487a0SJunchao Zhang struct _n_PetscShmComm { 55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */ 75f7487a0SJunchao Zhang MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 85f7487a0SJunchao Zhang }; 95f7487a0SJunchao Zhang 105f7487a0SJunchao Zhang /* 1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed. 125f7487a0SJunchao Zhang 135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 145f7487a0SJunchao Zhang 155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 165f7487a0SJunchao Zhang 175f7487a0SJunchao Zhang */ 1833779a13SJunchao Zhang PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state) 195f7487a0SJunchao Zhang { 205f7487a0SJunchao Zhang PetscErrorCode ierr; 215f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val; 225f7487a0SJunchao Zhang 235f7487a0SJunchao Zhang PetscFunctionBegin; 2402c9f0b5SLisandro Dalcin ierr = PetscInfo1(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr); 255f7487a0SJunchao Zhang ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr); 265f7487a0SJunchao Zhang ierr = PetscFree(p->globranks);CHKERRMPI(ierr); 275f7487a0SJunchao Zhang ierr = PetscFree(val);CHKERRMPI(ierr); 285f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS); 295f7487a0SJunchao Zhang } 305f7487a0SJunchao Zhang 315f7487a0SJunchao Zhang /*@C 325f7487a0SJunchao Zhang PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory 335f7487a0SJunchao Zhang 345f7487a0SJunchao Zhang 35d083f849SBarry Smith Collective. 365f7487a0SJunchao Zhang 375f7487a0SJunchao Zhang Input Parameter: 385f7487a0SJunchao Zhang . globcomm - MPI_Comm 395f7487a0SJunchao Zhang 405f7487a0SJunchao Zhang Output Parameter: 415f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object 425f7487a0SJunchao Zhang 435f7487a0SJunchao Zhang Level: developer 445f7487a0SJunchao Zhang 455f7487a0SJunchao Zhang Notes: 465f7487a0SJunchao Zhang This should be called only with an PetscCommDuplicate() communictor 475f7487a0SJunchao Zhang 485f7487a0SJunchao Zhang When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 495f7487a0SJunchao Zhang 505f7487a0SJunchao Zhang @*/ 515f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm) 525f7487a0SJunchao Zhang { 535f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 545f7487a0SJunchao Zhang PetscErrorCode ierr; 555f7487a0SJunchao Zhang MPI_Group globgroup,shmgroup; 565f7487a0SJunchao Zhang PetscMPIInt *shmranks,i,flg; 575f7487a0SJunchao Zhang PetscCommCounter *counter; 585f7487a0SJunchao Zhang 595f7487a0SJunchao Zhang PetscFunctionBegin; 605f7487a0SJunchao Zhang ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 615f7487a0SJunchao Zhang if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator"); 625f7487a0SJunchao Zhang 635f7487a0SJunchao Zhang ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRQ(ierr); 645f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 655f7487a0SJunchao Zhang 665f7487a0SJunchao Zhang ierr = PetscNew(pshmcomm);CHKERRQ(ierr); 675f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm; 685f7487a0SJunchao Zhang 695f7487a0SJunchao Zhang ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRQ(ierr); 705f7487a0SJunchao Zhang 715f7487a0SJunchao Zhang ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRQ(ierr); 725f7487a0SJunchao Zhang ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRQ(ierr); 735f7487a0SJunchao Zhang ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRQ(ierr); 745f7487a0SJunchao Zhang ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr); 755f7487a0SJunchao Zhang ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr); 765f7487a0SJunchao Zhang for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i; 775f7487a0SJunchao Zhang ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRQ(ierr); 785f7487a0SJunchao Zhang ierr = PetscFree(shmranks);CHKERRQ(ierr); 795f7487a0SJunchao Zhang ierr = MPI_Group_free(&globgroup);CHKERRQ(ierr); 805f7487a0SJunchao Zhang ierr = MPI_Group_free(&shmgroup);CHKERRQ(ierr); 815f7487a0SJunchao Zhang 825f7487a0SJunchao Zhang for (i=0; i<(*pshmcomm)->shmsize; i++) { 835f7487a0SJunchao Zhang ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr); 845f7487a0SJunchao Zhang } 855f7487a0SJunchao Zhang ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRQ(ierr); 865f7487a0SJunchao Zhang PetscFunctionReturn(0); 875f7487a0SJunchao Zhang #else 885f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 895f7487a0SJunchao Zhang #endif 905f7487a0SJunchao Zhang } 915f7487a0SJunchao Zhang 925f7487a0SJunchao Zhang /*@C 935f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 945f7487a0SJunchao Zhang 955f7487a0SJunchao Zhang Input Parameters: 965f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 975f7487a0SJunchao Zhang - grank - the global rank 985f7487a0SJunchao Zhang 995f7487a0SJunchao Zhang Output Parameter: 1005f7487a0SJunchao Zhang . lrank - the local rank, or MPI_PROC_NULL if it does not exist 1015f7487a0SJunchao Zhang 1025f7487a0SJunchao Zhang Level: developer 1035f7487a0SJunchao Zhang 1045f7487a0SJunchao Zhang Developer Notes: 1055f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted 1065f7487a0SJunchao Zhang 1075f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call 1085f7487a0SJunchao Zhang 1095f7487a0SJunchao Zhang @*/ 1105f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank) 1115f7487a0SJunchao Zhang { 1125f7487a0SJunchao Zhang PetscMPIInt low,high,t,i; 1135f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE; 1145f7487a0SJunchao Zhang PetscErrorCode ierr; 1155f7487a0SJunchao Zhang 1165f7487a0SJunchao Zhang PetscFunctionBegin; 1175f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL; 1185f7487a0SJunchao Zhang if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 1195f7487a0SJunchao Zhang if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0); 1205f7487a0SJunchao Zhang ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr); 1215f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1225f7487a0SJunchao Zhang low = 0; 1235f7487a0SJunchao Zhang high = pshmcomm->shmsize; 1245f7487a0SJunchao Zhang while (high-low > 5) { 1255f7487a0SJunchao Zhang t = (low+high)/2; 1265f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t; 1275f7487a0SJunchao Zhang else low = t; 1285f7487a0SJunchao Zhang } 1295f7487a0SJunchao Zhang for (i=low; i<high; i++) { 1305f7487a0SJunchao Zhang if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 1315f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) { 1325f7487a0SJunchao Zhang *lrank = i; 1335f7487a0SJunchao Zhang PetscFunctionReturn(0); 1345f7487a0SJunchao Zhang } 1355f7487a0SJunchao Zhang } 1365f7487a0SJunchao Zhang PetscFunctionReturn(0); 1375f7487a0SJunchao Zhang } 1385f7487a0SJunchao Zhang 1395f7487a0SJunchao Zhang /*@C 1405f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 1415f7487a0SJunchao Zhang 1425f7487a0SJunchao Zhang Input Parameters: 1435f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1445f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator 1455f7487a0SJunchao Zhang 1465f7487a0SJunchao Zhang Output Parameter: 1475f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built 1485f7487a0SJunchao Zhang 1495f7487a0SJunchao Zhang Level: developer 1505f7487a0SJunchao Zhang 1515f7487a0SJunchao Zhang @*/ 1525f7487a0SJunchao Zhang PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank) 1535f7487a0SJunchao Zhang { 1545f7487a0SJunchao Zhang PetscFunctionBegin; 155*76bd3646SJed Brown if (lrank < 0 || lrank >= pshmcomm->shmsize) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank); 1565f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank]; 1575f7487a0SJunchao Zhang PetscFunctionReturn(0); 1585f7487a0SJunchao Zhang } 1595f7487a0SJunchao Zhang 1605f7487a0SJunchao Zhang /*@C 1615f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 1625f7487a0SJunchao Zhang 1635f7487a0SJunchao Zhang Input Parameter: 1645f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 1655f7487a0SJunchao Zhang 1665f7487a0SJunchao Zhang Output Parameter: 1675f7487a0SJunchao Zhang . comm - the MPI communicator 1685f7487a0SJunchao Zhang 1695f7487a0SJunchao Zhang Level: developer 1705f7487a0SJunchao Zhang 1715f7487a0SJunchao Zhang @*/ 1725f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm) 1735f7487a0SJunchao Zhang { 1745f7487a0SJunchao Zhang PetscFunctionBegin; 1755f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm; 1765f7487a0SJunchao Zhang PetscFunctionReturn(0); 1775f7487a0SJunchao Zhang } 1785f7487a0SJunchao Zhang 17920b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180a32e93adSJunchao Zhang #include <pthread.h> 181a32e93adSJunchao Zhang #include <hwloc.h> 182a32e93adSJunchao Zhang #include <omp.h> 183a32e93adSJunchao Zhang 184eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 185eff715bbSJunchao Zhang otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 1864df5c2c7SJunchao Zhang simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 1874df5c2c7SJunchao Zhang by 50%. Until the reason is found out, we use mmap() instead. 1884df5c2c7SJunchao Zhang */ 1894df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY 1904df5c2c7SJunchao Zhang 1914df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 1924df5c2c7SJunchao Zhang #include <sys/mman.h> 1934df5c2c7SJunchao Zhang #include <sys/types.h> 1944df5c2c7SJunchao Zhang #include <sys/stat.h> 1954df5c2c7SJunchao Zhang #include <fcntl.h> 1964df5c2c7SJunchao Zhang #endif 1974df5c2c7SJunchao Zhang 198a32e93adSJunchao Zhang struct _n_PetscOmpCtrl { 199a32e93adSJunchao Zhang MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 200a32e93adSJunchao Zhang MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 201a32e93adSJunchao Zhang PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 202a32e93adSJunchao Zhang PetscBool is_omp_master; /* rank 0's in omp_comm */ 203a32e93adSJunchao Zhang MPI_Win omp_win; /* a shared memory window containing a barrier */ 204a32e93adSJunchao Zhang pthread_barrier_t *barrier; /* pointer to the barrier */ 205a32e93adSJunchao Zhang hwloc_topology_t topology; 206a32e93adSJunchao Zhang hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 207a32e93adSJunchao Zhang hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 208a32e93adSJunchao Zhang }; 209a32e93adSJunchao Zhang 2104df5c2c7SJunchao Zhang 211eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 2128fcaa860SBarry Smith contained by the controller. 213eff715bbSJunchao Zhang 2148fcaa860SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 215eff715bbSJunchao Zhang only because we want to separate shared memory allocation methods from other code. 216eff715bbSJunchao Zhang */ 217a32e93adSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 218a32e93adSJunchao Zhang { 219a32e93adSJunchao Zhang PetscErrorCode ierr; 220a32e93adSJunchao Zhang MPI_Aint size; 221a32e93adSJunchao Zhang void *baseptr; 222a32e93adSJunchao Zhang pthread_barrierattr_t attr; 223a32e93adSJunchao Zhang 2244df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2254df5c2c7SJunchao Zhang PetscInt fd; 2264df5c2c7SJunchao Zhang PetscChar pathname[PETSC_MAX_PATH_LEN]; 2274df5c2c7SJunchao Zhang #else 2284df5c2c7SJunchao Zhang PetscMPIInt disp_unit; 2294df5c2c7SJunchao Zhang #endif 2304df5c2c7SJunchao Zhang 2314df5c2c7SJunchao Zhang PetscFunctionBegin; 2324df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2334df5c2c7SJunchao Zhang size = sizeof(pthread_barrier_t); 2344df5c2c7SJunchao Zhang if (ctrl->is_omp_master) { 235eff715bbSJunchao Zhang /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 2364df5c2c7SJunchao Zhang ierr = PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 2374df5c2c7SJunchao Zhang ierr = PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 2384df5c2c7SJunchao Zhang /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 2394df5c2c7SJunchao Zhang fd = mkstemp(pathname); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname); 2404df5c2c7SJunchao Zhang ierr = ftruncate(fd,size);CHKERRQ(ierr); 2414df5c2c7SJunchao Zhang baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 2424df5c2c7SJunchao Zhang ierr = close(fd);CHKERRQ(ierr); 2434df5c2c7SJunchao Zhang ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr); 244eff715bbSJunchao Zhang /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 2454df5c2c7SJunchao Zhang ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 2464df5c2c7SJunchao Zhang ierr = unlink(pathname);CHKERRQ(ierr); 2474df5c2c7SJunchao Zhang } else { 2484df5c2c7SJunchao Zhang ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr); 2494df5c2c7SJunchao Zhang fd = open(pathname,O_RDWR); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname); 2504df5c2c7SJunchao Zhang baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 2514df5c2c7SJunchao Zhang ierr = close(fd);CHKERRQ(ierr); 2524df5c2c7SJunchao Zhang ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 2534df5c2c7SJunchao Zhang } 2544df5c2c7SJunchao Zhang #else 255a32e93adSJunchao Zhang size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 256a32e93adSJunchao Zhang ierr = MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);CHKERRQ(ierr); 257a32e93adSJunchao Zhang ierr = MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);CHKERRQ(ierr); 2584df5c2c7SJunchao Zhang #endif 259a32e93adSJunchao Zhang ctrl->barrier = (pthread_barrier_t*)baseptr; 260a32e93adSJunchao Zhang 261a32e93adSJunchao Zhang /* omp master initializes the barrier */ 262a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 263a32e93adSJunchao Zhang ierr = MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);CHKERRQ(ierr); 264a32e93adSJunchao Zhang ierr = pthread_barrierattr_init(&attr);CHKERRQ(ierr); 265a32e93adSJunchao Zhang ierr = pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED);CHKERRQ(ierr); /* make the barrier also work for processes */ 266a32e93adSJunchao Zhang ierr = pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);CHKERRQ(ierr); 267a32e93adSJunchao Zhang ierr = pthread_barrierattr_destroy(&attr);CHKERRQ(ierr); 268a32e93adSJunchao Zhang } 269a32e93adSJunchao Zhang 2704df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 2714df5c2c7SJunchao Zhang ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 272a32e93adSJunchao Zhang PetscFunctionReturn(0); 273a32e93adSJunchao Zhang } 274a32e93adSJunchao Zhang 2758fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 276a32e93adSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 277a32e93adSJunchao Zhang { 278a32e93adSJunchao Zhang PetscErrorCode ierr; 279a32e93adSJunchao Zhang 2804df5c2c7SJunchao Zhang PetscFunctionBegin; 2814df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 282a32e93adSJunchao Zhang ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 283a32e93adSJunchao Zhang if (ctrl->is_omp_master) { ierr = pthread_barrier_destroy(ctrl->barrier);CHKERRQ(ierr); } 2844df5c2c7SJunchao Zhang 2854df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2864df5c2c7SJunchao Zhang ierr = munmap(ctrl->barrier,sizeof(pthread_barrier_t));CHKERRQ(ierr); 2874df5c2c7SJunchao Zhang #else 288a32e93adSJunchao Zhang ierr = MPI_Win_free(&ctrl->omp_win);CHKERRQ(ierr); 2894df5c2c7SJunchao Zhang #endif 290a32e93adSJunchao Zhang PetscFunctionReturn(0); 291a32e93adSJunchao Zhang } 292a32e93adSJunchao Zhang 293eff715bbSJunchao Zhang /*@C 2948fcaa860SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP 295eff715bbSJunchao Zhang 296eff715bbSJunchao Zhang Input Parameter: 297eff715bbSJunchao Zhang + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 298a2b725a8SWilliam Gropp - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 299eff715bbSJunchao Zhang 300eff715bbSJunchao Zhang Output Parameter: 3018fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 302eff715bbSJunchao Zhang 303eff715bbSJunchao Zhang Level: developer 304eff715bbSJunchao Zhang 3058fcaa860SBarry Smith TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use 3068fcaa860SBarry Smith 307eff715bbSJunchao Zhang .seealso PetscOmpCtrlDestroy() 308eff715bbSJunchao Zhang @*/ 309a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl) 310a32e93adSJunchao Zhang { 311a32e93adSJunchao Zhang PetscErrorCode ierr; 312a32e93adSJunchao Zhang PetscOmpCtrl ctrl; 313a32e93adSJunchao Zhang unsigned long *cpu_ulongs=NULL; 314a32e93adSJunchao Zhang PetscInt i,nr_cpu_ulongs; 315a32e93adSJunchao Zhang PetscShmComm pshmcomm; 316a32e93adSJunchao Zhang MPI_Comm shm_comm; 317a32e93adSJunchao Zhang PetscMPIInt shm_rank,shm_comm_size,omp_rank,color; 3187c405c4aSJunchao Zhang PetscInt num_packages,num_cores; 319a32e93adSJunchao Zhang 320a32e93adSJunchao Zhang PetscFunctionBegin; 321a32e93adSJunchao Zhang ierr = PetscNew(&ctrl);CHKERRQ(ierr); 322a32e93adSJunchao Zhang 323a32e93adSJunchao Zhang /*================================================================================= 3247c405c4aSJunchao Zhang Init hwloc 3257c405c4aSJunchao Zhang ==================================================================================*/ 3267c405c4aSJunchao Zhang ierr = hwloc_topology_init(&ctrl->topology);CHKERRQ(ierr); 3277c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 3287c405c4aSJunchao Zhang /* to filter out unneeded info and have faster hwloc_topology_load */ 3297c405c4aSJunchao Zhang ierr = hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);CHKERRQ(ierr); 3307c405c4aSJunchao Zhang ierr = hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);CHKERRQ(ierr); 3317c405c4aSJunchao Zhang #endif 3327c405c4aSJunchao Zhang ierr = hwloc_topology_load(ctrl->topology);CHKERRQ(ierr); 3337c405c4aSJunchao Zhang 3347c405c4aSJunchao Zhang /*================================================================================= 335a32e93adSJunchao Zhang Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 336a32e93adSJunchao Zhang physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 337a32e93adSJunchao Zhang others are called slaves. OMP Masters make up a new comm called omp_master_comm, 338a32e93adSJunchao Zhang which is usually passed to third party libraries. 339a32e93adSJunchao Zhang ==================================================================================*/ 340a32e93adSJunchao Zhang 341a32e93adSJunchao Zhang /* fetch the stored shared memory communicator */ 342a32e93adSJunchao Zhang ierr = PetscShmCommGet(petsc_comm,&pshmcomm);CHKERRQ(ierr); 343a32e93adSJunchao Zhang ierr = PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);CHKERRQ(ierr); 344a32e93adSJunchao Zhang 345a32e93adSJunchao Zhang ierr = MPI_Comm_rank(shm_comm,&shm_rank);CHKERRQ(ierr); 346a32e93adSJunchao Zhang ierr = MPI_Comm_size(shm_comm,&shm_comm_size);CHKERRQ(ierr); 347a32e93adSJunchao Zhang 3487c405c4aSJunchao Zhang /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 3497c405c4aSJunchao Zhang if (nthreads == -1) { 350a312e481SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE); 351a312e481SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE); 3527c405c4aSJunchao Zhang nthreads = num_cores/num_packages; 3537c405c4aSJunchao Zhang if (nthreads > shm_comm_size) nthreads = shm_comm_size; 3547c405c4aSJunchao Zhang } 3557c405c4aSJunchao Zhang 356a312e481SBarry Smith if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %D can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size); 357a312e481SBarry Smith if (shm_comm_size % nthreads) { ierr = PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %D is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);CHKERRQ(ierr); } 358a32e93adSJunchao Zhang 359a32e93adSJunchao Zhang /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 360a32e93adSJunchao Zhang shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 361a32e93adSJunchao Zhang color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 362a32e93adSJunchao Zhang be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 363a32e93adSJunchao Zhang Use 0 as key so that rank ordering wont change in new comm. 364a32e93adSJunchao Zhang */ 365a32e93adSJunchao Zhang color = shm_rank / nthreads; 3663ab56b82SJunchao Zhang ierr = MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);CHKERRQ(ierr); 367a32e93adSJunchao Zhang 368a32e93adSJunchao Zhang /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 369a32e93adSJunchao Zhang ierr = MPI_Comm_rank(ctrl->omp_comm,&omp_rank);CHKERRQ(ierr); 370a32e93adSJunchao Zhang if (!omp_rank) { 371a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_TRUE; /* master */ 372a32e93adSJunchao Zhang color = 0; 373a32e93adSJunchao Zhang } else { 374a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_FALSE; /* slave */ 375a32e93adSJunchao Zhang color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 376a32e93adSJunchao Zhang } 3773ab56b82SJunchao Zhang ierr = MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);CHKERRQ(ierr); /* rank 0 in omp_master_comm is rank 0 in petsc_comm */ 378a32e93adSJunchao Zhang 379a32e93adSJunchao Zhang /*================================================================================= 380a32e93adSJunchao Zhang Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 381a32e93adSJunchao Zhang slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 382a32e93adSJunchao Zhang and run them on the idle CPUs. 383a32e93adSJunchao Zhang ==================================================================================*/ 384a32e93adSJunchao Zhang ierr = PetscOmpCtrlCreateBarrier(ctrl);CHKERRQ(ierr); 385a32e93adSJunchao Zhang 386a32e93adSJunchao Zhang /*================================================================================= 387a32e93adSJunchao Zhang omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 388a32e93adSJunchao Zhang is the union of the bindings of all ranks in the omp_comm 389a32e93adSJunchao Zhang =================================================================================*/ 390a32e93adSJunchao Zhang 3913ab56b82SJunchao Zhang ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 392a32e93adSJunchao Zhang ierr = hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 393a32e93adSJunchao Zhang 3943ab56b82SJunchao Zhang /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 395a32e93adSJunchao Zhang nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8; 396a32e93adSJunchao Zhang ierr = PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);CHKERRQ(ierr); 397a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 398a32e93adSJunchao Zhang cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 399a32e93adSJunchao Zhang } else { 400a32e93adSJunchao Zhang for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i); 401a32e93adSJunchao Zhang } 402a32e93adSJunchao Zhang 403a32e93adSJunchao Zhang ierr = MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);CHKERRQ(ierr); 404a32e93adSJunchao Zhang 405a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 4063ab56b82SJunchao Zhang ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 407a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 4083ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 409a32e93adSJunchao Zhang ierr = hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);CHKERRQ(ierr); 4103ab56b82SJunchao Zhang #else 4113ab56b82SJunchao Zhang hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]); 4123ab56b82SJunchao Zhang #endif 413a32e93adSJunchao Zhang } else { 4143ab56b82SJunchao Zhang for (i=0; i<nr_cpu_ulongs; i++) { 4153ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4163ab56b82SJunchao Zhang ierr = hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);CHKERRQ(ierr); 4173ab56b82SJunchao Zhang #else 4183ab56b82SJunchao Zhang hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]); 4193ab56b82SJunchao Zhang #endif 4203ab56b82SJunchao Zhang } 421a32e93adSJunchao Zhang } 422a32e93adSJunchao Zhang } 423a32e93adSJunchao Zhang 424a32e93adSJunchao Zhang ierr = PetscFree(cpu_ulongs);CHKERRQ(ierr); 425a32e93adSJunchao Zhang *pctrl = ctrl; 426a32e93adSJunchao Zhang PetscFunctionReturn(0); 427a32e93adSJunchao Zhang } 428a32e93adSJunchao Zhang 429eff715bbSJunchao Zhang /*@C 43064f49babSJed Brown PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 431eff715bbSJunchao Zhang 432eff715bbSJunchao Zhang Input Parameter: 4338fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 434eff715bbSJunchao Zhang 435eff715bbSJunchao Zhang Level: developer 436eff715bbSJunchao Zhang 437eff715bbSJunchao Zhang .seealso PetscOmpCtrlCreate() 438eff715bbSJunchao Zhang @*/ 439a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 440a32e93adSJunchao Zhang { 441a32e93adSJunchao Zhang PetscErrorCode ierr; 442a32e93adSJunchao Zhang PetscOmpCtrl ctrl = *pctrl; 443a32e93adSJunchao Zhang 444a32e93adSJunchao Zhang PetscFunctionBegin; 445a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->cpuset); 446a32e93adSJunchao Zhang hwloc_topology_destroy(ctrl->topology); 447a32e93adSJunchao Zhang PetscOmpCtrlDestroyBarrier(ctrl); 448a32e93adSJunchao Zhang ierr = MPI_Comm_free(&ctrl->omp_comm);CHKERRQ(ierr); 449a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 450a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->omp_cpuset); 451a32e93adSJunchao Zhang ierr = MPI_Comm_free(&ctrl->omp_master_comm);CHKERRQ(ierr); 452a32e93adSJunchao Zhang } 453a32e93adSJunchao Zhang ierr = PetscFree(ctrl);CHKERRQ(ierr); 454a32e93adSJunchao Zhang PetscFunctionReturn(0); 455a32e93adSJunchao Zhang } 456a32e93adSJunchao Zhang 457a32e93adSJunchao Zhang /*@C 4588fcaa860SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 459a32e93adSJunchao Zhang 460a32e93adSJunchao Zhang Input Parameter: 4618fcaa860SBarry Smith . ctrl - a PETSc OMP controller 462a32e93adSJunchao Zhang 463a32e93adSJunchao Zhang Output Parameter: 464eff715bbSJunchao Zhang + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 465a32e93adSJunchao Zhang . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 466a32e93adSJunchao Zhang on slave ranks, MPI_COMM_NULL will be return in reality. 467a32e93adSJunchao Zhang - is_omp_master - true if the calling process is an OMP master rank. 468a32e93adSJunchao Zhang 469eff715bbSJunchao Zhang Notes: any output parameter can be NULL. The parameter is just ignored. 470eff715bbSJunchao Zhang 471a32e93adSJunchao Zhang Level: developer 472a32e93adSJunchao Zhang @*/ 473a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master) 474a32e93adSJunchao Zhang { 475a32e93adSJunchao Zhang PetscFunctionBegin; 476a32e93adSJunchao Zhang if (omp_comm) *omp_comm = ctrl->omp_comm; 477a32e93adSJunchao Zhang if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 478a32e93adSJunchao Zhang if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 479a32e93adSJunchao Zhang PetscFunctionReturn(0); 480a32e93adSJunchao Zhang } 481a32e93adSJunchao Zhang 482eff715bbSJunchao Zhang /*@C 4838fcaa860SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 484eff715bbSJunchao Zhang 485eff715bbSJunchao Zhang Input Parameter: 4868fcaa860SBarry Smith . ctrl - a PETSc OMP controller 487eff715bbSJunchao Zhang 488eff715bbSJunchao Zhang Notes: 489eff715bbSJunchao Zhang this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not 490eff715bbSJunchao Zhang require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency, 491eff715bbSJunchao Zhang MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 492eff715bbSJunchao Zhang 493eff715bbSJunchao Zhang A code using PetscOmpCtrlBarrier() would be like this, 494eff715bbSJunchao Zhang 495eff715bbSJunchao Zhang if (is_omp_master) { 496eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 497eff715bbSJunchao Zhang Call the library using OpenMP 498eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 499eff715bbSJunchao Zhang } 500eff715bbSJunchao Zhang PetscOmpCtrlBarrier(ctrl); 501eff715bbSJunchao Zhang 502eff715bbSJunchao Zhang Level: developer 503eff715bbSJunchao Zhang 504eff715bbSJunchao Zhang .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd() 505eff715bbSJunchao Zhang @*/ 506a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 507a32e93adSJunchao Zhang { 508a32e93adSJunchao Zhang PetscErrorCode ierr; 509a32e93adSJunchao Zhang 510a32e93adSJunchao Zhang PetscFunctionBegin; 511a32e93adSJunchao Zhang ierr = pthread_barrier_wait(ctrl->barrier); 512a32e93adSJunchao Zhang if (ierr && ierr != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", ierr); 513a32e93adSJunchao Zhang PetscFunctionReturn(0); 514a32e93adSJunchao Zhang } 515a32e93adSJunchao Zhang 516eff715bbSJunchao Zhang /*@C 517eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 518eff715bbSJunchao Zhang 519eff715bbSJunchao Zhang Input Parameter: 5208fcaa860SBarry Smith . ctrl - a PETSc OMP controller 521eff715bbSJunchao Zhang 522eff715bbSJunchao Zhang Notes: 5238fcaa860SBarry Smith Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 524eff715bbSJunchao Zhang This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 525eff715bbSJunchao Zhang 526eff715bbSJunchao Zhang Level: developer 527eff715bbSJunchao Zhang 528eff715bbSJunchao Zhang .seealso: PetscOmpCtrlOmpRegionOnMasterEnd() 529eff715bbSJunchao Zhang @*/ 530a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 531a32e93adSJunchao Zhang { 532a32e93adSJunchao Zhang PetscErrorCode ierr; 533a32e93adSJunchao Zhang 534a32e93adSJunchao Zhang PetscFunctionBegin; 535a32e93adSJunchao Zhang ierr = hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 536eff715bbSJunchao Zhang omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 537a32e93adSJunchao Zhang PetscFunctionReturn(0); 538a32e93adSJunchao Zhang } 539a32e93adSJunchao Zhang 540eff715bbSJunchao Zhang /*@C 541eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 542eff715bbSJunchao Zhang 543eff715bbSJunchao Zhang Input Parameter: 5448fcaa860SBarry Smith . ctrl - a PETSc OMP controller 545eff715bbSJunchao Zhang 546eff715bbSJunchao Zhang Notes: 5478fcaa860SBarry Smith Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 548eff715bbSJunchao Zhang This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 549eff715bbSJunchao Zhang 550eff715bbSJunchao Zhang Level: developer 551eff715bbSJunchao Zhang 552eff715bbSJunchao Zhang .seealso: PetscOmpCtrlOmpRegionOnMasterBegin() 553eff715bbSJunchao Zhang @*/ 554a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 555a32e93adSJunchao Zhang { 556a32e93adSJunchao Zhang PetscErrorCode ierr; 557a32e93adSJunchao Zhang 558a32e93adSJunchao Zhang PetscFunctionBegin; 559a32e93adSJunchao Zhang ierr = hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 560eff715bbSJunchao Zhang omp_set_num_threads(1); 561a32e93adSJunchao Zhang PetscFunctionReturn(0); 562a32e93adSJunchao Zhang } 563a32e93adSJunchao Zhang 5644df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 56520b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 566