15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h> 35f7487a0SJunchao Zhang 45f7487a0SJunchao Zhang struct _n_PetscShmComm { 55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */ 75f7487a0SJunchao Zhang MPI_Comm globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 85f7487a0SJunchao Zhang }; 95f7487a0SJunchao Zhang 105f7487a0SJunchao Zhang /* 1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed. 125f7487a0SJunchao Zhang 135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 145f7487a0SJunchao Zhang 155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 165f7487a0SJunchao Zhang 175f7487a0SJunchao Zhang */ 188434afd1SBarry Smith PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state) 19d71ae5a4SJacob Faibussowitsch { 205f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val; 215f7487a0SJunchao Zhang 225f7487a0SJunchao Zhang PetscFunctionBegin; 237c5b2466SBarry Smith PetscCallReturnMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm)); 247c5b2466SBarry Smith PetscCallMPIReturnMPI(MPI_Comm_free(&p->shmcomm)); 257c5b2466SBarry Smith PetscCallReturnMPI(PetscFree(p->globranks)); 267c5b2466SBarry Smith PetscCallReturnMPI(PetscFree(val)); 275f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS); 285f7487a0SJunchao Zhang } 295f7487a0SJunchao Zhang 30b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 31b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet(). 32b48189acSJunchao Zhang Since we predict communicators passed to PetscShmCommGet() are very likely 33*f0b74427SPierre Jolivet either a PETSc inner communicator or an MPI communicator with a linked PETSc 34b48189acSJunchao Zhang inner communicator, we use a simple static array to store dupped communicators 35b48189acSJunchao Zhang on rare cases otherwise. 36b48189acSJunchao Zhang */ 37b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16 38b48189acSJunchao Zhang static PetscInt num_dupped_comms = 0; 39b48189acSJunchao Zhang static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS]; 40d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscShmCommDestroyDuppedComms(void) 41d71ae5a4SJacob Faibussowitsch { 42b48189acSJunchao Zhang PetscFunctionBegin; 43cf27e480SPierre Jolivet for (PetscInt i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i])); 44b48189acSJunchao Zhang num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */ 453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 46b48189acSJunchao Zhang } 47b48189acSJunchao Zhang #endif 48b48189acSJunchao Zhang 495f7487a0SJunchao Zhang /*@C 50811af0c4SBarry Smith PetscShmCommGet - Returns a sub-communicator of all ranks that share a common memory 515f7487a0SJunchao Zhang 52d083f849SBarry Smith Collective. 535f7487a0SJunchao Zhang 545f7487a0SJunchao Zhang Input Parameter: 55a3b724e8SBarry Smith . globcomm - `MPI_Comm`, which can be a user `MPI_Comm` or a PETSc inner `MPI_Comm` 565f7487a0SJunchao Zhang 575f7487a0SJunchao Zhang Output Parameter: 585f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object 595f7487a0SJunchao Zhang 605f7487a0SJunchao Zhang Level: developer 615f7487a0SJunchao Zhang 62811af0c4SBarry Smith Note: 63a3b724e8SBarry Smith When used with MPICH, MPICH must be configured with `--download-mpich-device=ch3:nemesis` 645f7487a0SJunchao Zhang 65811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()` 665f7487a0SJunchao Zhang @*/ 67d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm) 68d71ae5a4SJacob Faibussowitsch { 695f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 705f7487a0SJunchao Zhang MPI_Group globgroup, shmgroup; 715f7487a0SJunchao Zhang PetscMPIInt *shmranks, i, flg; 725f7487a0SJunchao Zhang PetscCommCounter *counter; 735f7487a0SJunchao Zhang 745f7487a0SJunchao Zhang PetscFunctionBegin; 754f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 2); 76*f0b74427SPierre Jolivet /* Get a PETSc inner comm, since we always want to stash pshmcomm on PETSc inner comms */ 779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg)); 78*f0b74427SPierre Jolivet if (!flg) { /* globcomm is not a PETSc comm */ 799371c9d4SSatish Balay union 809371c9d4SSatish Balay { 819371c9d4SSatish Balay MPI_Comm comm; 829371c9d4SSatish Balay void *ptr; 839371c9d4SSatish Balay } ucomm; 84*f0b74427SPierre Jolivet /* check if globcomm already has a linked PETSc inner comm */ 859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg)); 86b48189acSJunchao Zhang if (!flg) { 87*f0b74427SPierre Jolivet /* globcomm does not have a linked PETSc inner comm, so we create one and replace globcomm with it */ 8808401ef6SPierre Jolivet PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS); 899566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL)); 90*f0b74427SPierre Jolivet /* Register a function to free the dupped PETSc comms at PetscFinalize() at the first time */ 919566063dSJacob Faibussowitsch if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms)); 92b48189acSJunchao Zhang shmcomm_dupped_comms[num_dupped_comms] = globcomm; 93b48189acSJunchao Zhang num_dupped_comms++; 94b48189acSJunchao Zhang } else { 95b48189acSJunchao Zhang /* otherwise, we pull out the inner comm and use it as globcomm */ 96b48189acSJunchao Zhang globcomm = ucomm.comm; 97b48189acSJunchao Zhang } 98b48189acSJunchao Zhang } 995f7487a0SJunchao Zhang 100b48189acSJunchao Zhang /* Check if globcomm already has an attached pshmcomm. If no, create one */ 1019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg)); 1023ba16761SJacob Faibussowitsch if (flg) PetscFunctionReturn(PETSC_SUCCESS); 1035f7487a0SJunchao Zhang 1049566063dSJacob Faibussowitsch PetscCall(PetscNew(pshmcomm)); 1055f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm; 1065f7487a0SJunchao Zhang 1079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm)); 1085f7487a0SJunchao Zhang 1099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize)); 1109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(globcomm, &globgroup)); 1119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup)); 1129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks)); 1139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks)); 1145f7487a0SJunchao Zhang for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i; 1159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks)); 1169566063dSJacob Faibussowitsch PetscCall(PetscFree(shmranks)); 1179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&globgroup)); 1189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&shmgroup)); 1195f7487a0SJunchao Zhang 12048a46eb9SPierre Jolivet for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i])); 1219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm)); 1223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1235f7487a0SJunchao Zhang #else 1245f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 1255f7487a0SJunchao Zhang #endif 1265f7487a0SJunchao Zhang } 1275f7487a0SJunchao Zhang 1285f7487a0SJunchao Zhang /*@C 1295f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 1305f7487a0SJunchao Zhang 1315f7487a0SJunchao Zhang Input Parameters: 1325f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1335f7487a0SJunchao Zhang - grank - the global rank 1345f7487a0SJunchao Zhang 1355f7487a0SJunchao Zhang Output Parameter: 136811af0c4SBarry Smith . lrank - the local rank, or `MPI_PROC_NULL` if it does not exist 1375f7487a0SJunchao Zhang 1385f7487a0SJunchao Zhang Level: developer 1395f7487a0SJunchao Zhang 1405f7487a0SJunchao Zhang Developer Notes: 1415f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted 1425f7487a0SJunchao Zhang 1435f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call 1445f7487a0SJunchao Zhang 145811af0c4SBarry Smith .seealso: `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()` 1465f7487a0SJunchao Zhang @*/ 147d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank) 148d71ae5a4SJacob Faibussowitsch { 1495f7487a0SJunchao Zhang PetscMPIInt low, high, t, i; 1505f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE; 1515f7487a0SJunchao Zhang 1525f7487a0SJunchao Zhang PetscFunctionBegin; 1534f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 1); 1544f572ea9SToby Isaac PetscAssertPointer(lrank, 3); 1555f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL; 1563ba16761SJacob Faibussowitsch if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(PETSC_SUCCESS); 1573ba16761SJacob Faibussowitsch if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(PETSC_SUCCESS); 1589566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL)); 1593ba16761SJacob Faibussowitsch if (flg) PetscFunctionReturn(PETSC_SUCCESS); 1605f7487a0SJunchao Zhang low = 0; 1615f7487a0SJunchao Zhang high = pshmcomm->shmsize; 1625f7487a0SJunchao Zhang while (high - low > 5) { 1635f7487a0SJunchao Zhang t = (low + high) / 2; 1645f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t; 1655f7487a0SJunchao Zhang else low = t; 1665f7487a0SJunchao Zhang } 1675f7487a0SJunchao Zhang for (i = low; i < high; i++) { 1683ba16761SJacob Faibussowitsch if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(PETSC_SUCCESS); 1695f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) { 1705f7487a0SJunchao Zhang *lrank = i; 1713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1725f7487a0SJunchao Zhang } 1735f7487a0SJunchao Zhang } 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1755f7487a0SJunchao Zhang } 1765f7487a0SJunchao Zhang 1775f7487a0SJunchao Zhang /*@C 1785f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 1795f7487a0SJunchao Zhang 1805f7487a0SJunchao Zhang Input Parameters: 1815f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1825f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator 1835f7487a0SJunchao Zhang 1845f7487a0SJunchao Zhang Output Parameter: 1855f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built 1865f7487a0SJunchao Zhang 1875f7487a0SJunchao Zhang Level: developer 1885f7487a0SJunchao Zhang 189811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommGetMpiShmComm()` 1905f7487a0SJunchao Zhang @*/ 191d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank) 192d71ae5a4SJacob Faibussowitsch { 1935f7487a0SJunchao Zhang PetscFunctionBegin; 1944f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 1); 1954f572ea9SToby Isaac PetscAssertPointer(grank, 3); 1962c71b3e2SJacob Faibussowitsch PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank); 1975f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank]; 1983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1995f7487a0SJunchao Zhang } 2005f7487a0SJunchao Zhang 2015f7487a0SJunchao Zhang /*@C 2025f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 2035f7487a0SJunchao Zhang 2045f7487a0SJunchao Zhang Input Parameter: 2055f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 2065f7487a0SJunchao Zhang 2075f7487a0SJunchao Zhang Output Parameter: 2085f7487a0SJunchao Zhang . comm - the MPI communicator 2095f7487a0SJunchao Zhang 2105f7487a0SJunchao Zhang Level: developer 2115f7487a0SJunchao Zhang 212811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()` 2135f7487a0SJunchao Zhang @*/ 214d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm) 215d71ae5a4SJacob Faibussowitsch { 2165f7487a0SJunchao Zhang PetscFunctionBegin; 2174f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 1); 2184f572ea9SToby Isaac PetscAssertPointer(comm, 2); 2195f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm; 2203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2215f7487a0SJunchao Zhang } 222