1f1f2ae84SBarry Smith /* 2f1f2ae84SBarry Smith This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3f1f2ae84SBarry Smith It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4f1f2ae84SBarry Smith 5dd8e379bSPierre Jolivet That program may use OpenMP to compute the right-hand side and matrix for the linear system 6f1f2ae84SBarry Smith 7f1f2ae84SBarry Smith The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8f1f2ae84SBarry Smith 9f1f2ae84SBarry Smith The resulting KSP and PC can only be controlled via the options database, though some common commands 10f1f2ae84SBarry Smith could be passed through the server. 11f1f2ae84SBarry Smith 12f1f2ae84SBarry Smith */ 139f0612e4SBarry Smith #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h> 15789afff4SPierre Jolivet #include <petscts.h> 16789afff4SPierre Jolivet #include <petsctao.h> 179f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 189f0612e4SBarry Smith #include <pthread.h> 199f0612e4SBarry Smith #endif 20f1f2ae84SBarry Smith 21f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS 256 22f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23f1f2ae84SBarry Smith 24f1f2ae84SBarry Smith typedef struct { 259f0612e4SBarry Smith KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26f1f2ae84SBarry Smith PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27f1f2ae84SBarry Smith PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 289f0612e4SBarry Smith PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 299f0612e4SBarry Smith PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30f1f2ae84SBarry Smith } PC_MPI; 31f1f2ae84SBarry Smith 329371c9d4SSatish Balay typedef enum { 339371c9d4SSatish Balay PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34f1f2ae84SBarry Smith PCMPI_CREATE, 35f1f2ae84SBarry Smith PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36f1f2ae84SBarry Smith PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37f1f2ae84SBarry Smith PCMPI_SOLVE, 38f1f2ae84SBarry Smith PCMPI_VIEW, 399f0612e4SBarry Smith PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40f1f2ae84SBarry Smith } PCMPICommand; 41f1f2ae84SBarry Smith 42f1f2ae84SBarry Smith static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43f1f2ae84SBarry Smith static PetscBool PCMPICommSet = PETSC_FALSE; 44f1f2ae84SBarry Smith static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 455316cbedSBarry Smith static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 469f0612e4SBarry Smith static PetscLogEvent EventServerDist, EventServerDistMPI; 479f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 489f0612e4SBarry Smith static pthread_mutex_t *PCMPIServerLocks; 499f0612e4SBarry Smith #else 509f0612e4SBarry Smith static void *PCMPIServerLocks; 519f0612e4SBarry Smith #endif 52f1f2ae84SBarry Smith 53d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void) 54d71ae5a4SJacob Faibussowitsch { 55f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 56f1f2ae84SBarry Smith PetscMPIInt size, rank, i; 57f1f2ae84SBarry Smith 58f1f2ae84SBarry Smith PetscFunctionBegin; 59f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 60f1f2ae84SBarry Smith PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62f1f2ae84SBarry Smith /* comm for size 1 is useful only for debugging */ 63f1f2ae84SBarry Smith for (i = 0; i < size; i++) { 64f1f2ae84SBarry Smith PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66f1f2ae84SBarry Smith PCMPISolveCounts[i] = 0; 67f1f2ae84SBarry Smith PCMPIKSPCounts[i] = 0; 685316cbedSBarry Smith PCMPIIterations[i] = 0; 695316cbedSBarry Smith PCMPISizes[i] = 0; 70f1f2ae84SBarry Smith } 71f1f2ae84SBarry Smith PCMPICommSet = PETSC_TRUE; 723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 73f1f2ae84SBarry Smith } 74f1f2ae84SBarry Smith 759f0612e4SBarry Smith static PetscErrorCode PCMPICommsDestroy(void) 76d71ae5a4SJacob Faibussowitsch { 77f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 78f1f2ae84SBarry Smith PetscMPIInt size, rank, i; 79f1f2ae84SBarry Smith 80f1f2ae84SBarry Smith PetscFunctionBegin; 813ba16761SJacob Faibussowitsch if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 83f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84f1f2ae84SBarry Smith for (i = 0; i < size; i++) { 85f1f2ae84SBarry Smith if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86f1f2ae84SBarry Smith } 87f1f2ae84SBarry Smith PCMPICommSet = PETSC_FALSE; 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 89f1f2ae84SBarry Smith } 90f1f2ae84SBarry Smith 91d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc) 92d71ae5a4SJacob Faibussowitsch { 93f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 95f1f2ae84SBarry Smith KSP ksp; 96f1f2ae84SBarry Smith PetscInt N[2], mincntperrank = 0; 97f1f2ae84SBarry Smith PetscMPIInt size; 98f1f2ae84SBarry Smith Mat sA; 993821be0aSBarry Smith char *cprefix = NULL; 100f1f2ae84SBarry Smith PetscMPIInt len = 0; 101f1f2ae84SBarry Smith 102f1f2ae84SBarry Smith PetscFunctionBegin; 1039f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 104f1f2ae84SBarry Smith if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 106f1f2ae84SBarry Smith if (pc) { 107f1f2ae84SBarry Smith if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA)); 109f1f2ae84SBarry Smith PetscCall(MatGetSize(sA, &N[0], &N[1])); 110f1f2ae84SBarry Smith } 111f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112f1f2ae84SBarry Smith 113f1f2ae84SBarry Smith /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114f1f2ae84SBarry Smith if (km) mincntperrank = km->mincntperrank; 115f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116f1f2ae84SBarry Smith comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117f1f2ae84SBarry Smith if (comm == MPI_COMM_NULL) { 118f1f2ae84SBarry Smith ksp = NULL; 1199f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 1203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 121f1f2ae84SBarry Smith } 1220316ec64SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage)); 123f1f2ae84SBarry Smith PetscCall(KSPCreate(comm, &ksp)); 1243821be0aSBarry Smith PetscCall(KSPSetNestLevel(ksp, 1)); 1253821be0aSBarry Smith PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 1260316ec64SBarry Smith PetscCall(PetscLogStagePop()); 127f1f2ae84SBarry Smith PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128f1f2ae84SBarry Smith if (pc) { 129f1f2ae84SBarry Smith size_t slen; 1303821be0aSBarry Smith const char *prefix = NULL; 131f9818f3cSJose E. Roman char *found = NULL; 132f1f2ae84SBarry Smith 133f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 134dad3da8eSBarry Smith PCMPIKSPCounts[size - 1]++; 1353821be0aSBarry Smith /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 1363821be0aSBarry Smith PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1373821be0aSBarry Smith PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 1383821be0aSBarry Smith PetscCall(PetscStrallocpy(prefix, &cprefix)); 1393821be0aSBarry Smith PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 1403821be0aSBarry Smith PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 1413821be0aSBarry Smith *found = 0; 1423821be0aSBarry Smith PetscCall(PetscStrlen(cprefix, &slen)); 143f1f2ae84SBarry Smith len = (PetscMPIInt)slen; 144f1f2ae84SBarry Smith } 145f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146f1f2ae84SBarry Smith if (len) { 1473821be0aSBarry Smith if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 1483821be0aSBarry Smith PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 1493821be0aSBarry Smith PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150f1f2ae84SBarry Smith } 1513821be0aSBarry Smith PetscCall(PetscFree(cprefix)); 1529f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 1533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 154f1f2ae84SBarry Smith } 155f1f2ae84SBarry Smith 156d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc) 157d71ae5a4SJacob Faibussowitsch { 158f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159f1f2ae84SBarry Smith Mat A; 1609f0612e4SBarry Smith PetscInt m, n, j, bs; 161f1f2ae84SBarry Smith Mat sA; 162f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 163f1f2ae84SBarry Smith KSP ksp; 164f1f2ae84SBarry Smith PetscLayout layout; 1659f0612e4SBarry Smith const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166f1f2ae84SBarry Smith const PetscInt *range; 167f1f2ae84SBarry Smith PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 1689f0612e4SBarry Smith const PetscScalar *a = NULL, *sa; 1699f0612e4SBarry Smith PetscInt matproperties[8] = {0}, rstart, rend; 1703821be0aSBarry Smith char *cprefix; 171f1f2ae84SBarry Smith 172f1f2ae84SBarry Smith PetscFunctionBegin; 173f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 1743ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 1759f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 17645682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 177f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 178f1f2ae84SBarry Smith if (pc) { 17968a21331SBarry Smith PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 1803821be0aSBarry Smith const char *prefix; 1813821be0aSBarry Smith size_t clen; 18268a21331SBarry Smith 183f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 184dad3da8eSBarry Smith PCMPIMatCounts[size - 1]++; 185f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA)); 18668a21331SBarry Smith PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 187dd0d27b1SBarry Smith PetscCall(MatGetBlockSize(sA, &bs)); 18868a21331SBarry Smith matproperties[2] = bs; 18968a21331SBarry Smith PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 19068a21331SBarry Smith matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 19168a21331SBarry Smith PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 19268a21331SBarry Smith matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 19368a21331SBarry Smith PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 19468a21331SBarry Smith matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 19568a21331SBarry Smith PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 19668a21331SBarry Smith matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 1977a99bfcaSBarry Smith /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 1983821be0aSBarry Smith PetscCall(MatGetOptionsPrefix(sA, &prefix)); 1993821be0aSBarry Smith PetscCall(PetscStrallocpy(prefix, &cprefix)); 2003821be0aSBarry Smith PetscCall(PetscStrlen(cprefix, &clen)); 2013821be0aSBarry Smith matproperties[7] = (PetscInt)clen; 202f1f2ae84SBarry Smith } 2033821be0aSBarry Smith PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 204f1f2ae84SBarry Smith 20568a21331SBarry Smith /* determine ownership ranges of matrix columns */ 206f1f2ae84SBarry Smith PetscCall(PetscLayoutCreate(comm, &layout)); 20768a21331SBarry Smith PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 20868a21331SBarry Smith PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 209f1f2ae84SBarry Smith PetscCall(PetscLayoutSetUp(layout)); 210f1f2ae84SBarry Smith PetscCall(PetscLayoutGetLocalSize(layout, &n)); 21168a21331SBarry Smith PetscCall(PetscLayoutDestroy(&layout)); 21268a21331SBarry Smith 21368a21331SBarry Smith /* determine ownership ranges of matrix rows */ 21468a21331SBarry Smith PetscCall(PetscLayoutCreate(comm, &layout)); 21568a21331SBarry Smith PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 21668a21331SBarry Smith PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 21768a21331SBarry Smith PetscCall(PetscLayoutSetUp(layout)); 21868a21331SBarry Smith PetscCall(PetscLayoutGetLocalSize(layout, &m)); 2199f0612e4SBarry Smith PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 220f1f2ae84SBarry Smith 2219f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 222f1f2ae84SBarry Smith /* copy over the matrix nonzero structure and values */ 223f1f2ae84SBarry Smith if (pc) { 2249f0612e4SBarry Smith PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 2259f0612e4SBarry Smith if (!PCMPIServerUseShmget) { 226f1f2ae84SBarry Smith NZ = km->NZ; 227f1f2ae84SBarry Smith NZdispl = km->NZdispl; 228f1f2ae84SBarry Smith PetscCall(PetscLayoutGetRanges(layout, &range)); 229f1f2ae84SBarry Smith for (i = 0; i < size; i++) { 230f1f2ae84SBarry Smith sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 231f1f2ae84SBarry Smith NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 232f1f2ae84SBarry Smith } 233f1f2ae84SBarry Smith displi[0] = 0; 234f1f2ae84SBarry Smith NZdispl[0] = 0; 235f1f2ae84SBarry Smith for (j = 1; j < size; j++) { 236f1f2ae84SBarry Smith displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 237f1f2ae84SBarry Smith NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 238f1f2ae84SBarry Smith } 2399f0612e4SBarry Smith } 240f1f2ae84SBarry Smith PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241f1f2ae84SBarry Smith } 242f1f2ae84SBarry Smith PetscCall(PetscLayoutDestroy(&layout)); 243f1f2ae84SBarry Smith 2443821be0aSBarry Smith PetscCall(MatCreate(comm, &A)); 2453821be0aSBarry Smith if (matproperties[7] > 0) { 2463821be0aSBarry Smith if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 2473821be0aSBarry Smith PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 2483821be0aSBarry Smith PetscCall(MatSetOptionsPrefix(A, cprefix)); 2493821be0aSBarry Smith PetscCall(PetscFree(cprefix)); 2503821be0aSBarry Smith } 2513821be0aSBarry Smith PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 2523821be0aSBarry Smith PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 2533821be0aSBarry Smith PetscCall(MatSetType(A, MATMPIAIJ)); 2549f0612e4SBarry Smith 2559f0612e4SBarry Smith if (!PCMPIServerUseShmget) { 2569f0612e4SBarry Smith PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 2579f0612e4SBarry Smith PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 2589f0612e4SBarry Smith PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm)); 2599f0612e4SBarry Smith PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 2609f0612e4SBarry Smith PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 2619f0612e4SBarry Smith } else { 2629f0612e4SBarry Smith const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 2639f0612e4SBarry Smith PCMPIServerAddresses *addresses; 2649f0612e4SBarry Smith 2659f0612e4SBarry Smith PetscCall(PetscNew(&addresses)); 2669f0612e4SBarry Smith addresses->n = 3; 2679f0612e4SBarry Smith PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 2689f0612e4SBarry Smith ia = rstart + (PetscInt *)addresses->addr[0]; 2699f0612e4SBarry Smith ja = ia[0] + (PetscInt *)addresses->addr[1]; 2709f0612e4SBarry Smith a = ia[0] + (PetscScalar *)addresses->addr[2]; 2719f0612e4SBarry Smith PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy)); 2729f0612e4SBarry Smith } 2739f0612e4SBarry Smith 2749f0612e4SBarry Smith if (pc) { 2759f0612e4SBarry Smith PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 2769f0612e4SBarry Smith PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 2779f0612e4SBarry Smith } 2789f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 2799f0612e4SBarry Smith 2809f0612e4SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage)); 2813821be0aSBarry Smith PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 28268a21331SBarry Smith PetscCall(MatSetBlockSize(A, matproperties[2])); 2833821be0aSBarry Smith 28468a21331SBarry Smith if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 28568a21331SBarry Smith if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 28668a21331SBarry Smith if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 28768a21331SBarry Smith if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 288f1f2ae84SBarry Smith 2899f0612e4SBarry Smith if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 290f1f2ae84SBarry Smith PetscCall(KSPSetOperators(ksp, A, A)); 291f1f2ae84SBarry Smith if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 2920316ec64SBarry Smith PetscCall(PetscLogStagePop()); 2939f0612e4SBarry Smith if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 294f1f2ae84SBarry Smith const PetscInt *range; 295f1f2ae84SBarry Smith 296f1f2ae84SBarry Smith PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 297f1f2ae84SBarry Smith for (i = 0; i < size; i++) { 298f1f2ae84SBarry Smith km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 299f1f2ae84SBarry Smith km->displ[i] = (PetscMPIInt)range[i]; 300f1f2ae84SBarry Smith } 301f1f2ae84SBarry Smith } 302f1f2ae84SBarry Smith PetscCall(MatDestroy(&A)); 30345682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 304f1f2ae84SBarry Smith PetscCall(KSPSetFromOptions(ksp)); 3059f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 3063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 307f1f2ae84SBarry Smith } 308f1f2ae84SBarry Smith 309d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc) 310d71ae5a4SJacob Faibussowitsch { 311f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 312f1f2ae84SBarry Smith KSP ksp; 313f1f2ae84SBarry Smith Mat sA, A; 314f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 3159f0612e4SBarry Smith const PetscInt *ia, *IA; 3169f0612e4SBarry Smith const PetscScalar *a; 317f1f2ae84SBarry Smith PetscCount nz; 318f1f2ae84SBarry Smith const PetscScalar *sa = NULL; 319dad3da8eSBarry Smith PetscMPIInt size; 3209f0612e4SBarry Smith PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 321f1f2ae84SBarry Smith 322f1f2ae84SBarry Smith PetscFunctionBegin; 323f1f2ae84SBarry Smith if (pc) { 324f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA)); 325f1f2ae84SBarry Smith PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 3269f0612e4SBarry Smith PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 327f1f2ae84SBarry Smith } 328f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 3293ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 3309f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 33145682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 332f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 333dad3da8eSBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 334dad3da8eSBarry Smith PCMPIMatCounts[size - 1]++; 335f1f2ae84SBarry Smith PetscCall(KSPGetOperators(ksp, NULL, &A)); 3369f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 3379f0612e4SBarry Smith if (!PCMPIServerUseShmget) { 338f1f2ae84SBarry Smith PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 339f1f2ae84SBarry Smith PetscCall(PetscMalloc1(nz, &a)); 3409f0612e4SBarry Smith PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 3419f0612e4SBarry Smith } else { 3429f0612e4SBarry Smith PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 3439f0612e4SBarry Smith PCMPIServerAddresses *addresses; 3449f0612e4SBarry Smith PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 3459f0612e4SBarry Smith ia = rstart + (PetscInt *)addresses->addr[0]; 3469f0612e4SBarry Smith a = ia[0] + (PetscScalar *)addresses->addr[2]; 3479f0612e4SBarry Smith } 3489f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 34968a21331SBarry Smith if (pc) { 35068a21331SBarry Smith PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 35168a21331SBarry Smith 35268a21331SBarry Smith PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 3539f0612e4SBarry Smith PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 35468a21331SBarry Smith 35568a21331SBarry Smith PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 35668a21331SBarry Smith matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 35768a21331SBarry Smith PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 35868a21331SBarry Smith matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 35968a21331SBarry Smith PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 36068a21331SBarry Smith matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 36168a21331SBarry Smith PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 36268a21331SBarry Smith matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 36368a21331SBarry Smith } 364f1f2ae84SBarry Smith PetscCall(MatUpdateMPIAIJWithArray(A, a)); 3659f0612e4SBarry Smith if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 36668a21331SBarry Smith PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 36768a21331SBarry Smith /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 36868a21331SBarry Smith if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 36968a21331SBarry Smith if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 37068a21331SBarry Smith if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 37168a21331SBarry Smith if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 37245682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 3739f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 3743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 375f1f2ae84SBarry Smith } 376f1f2ae84SBarry Smith 377d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 378d71ae5a4SJacob Faibussowitsch { 379f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 380f1f2ae84SBarry Smith KSP ksp; 381f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 382f1f2ae84SBarry Smith const PetscScalar *sb = NULL, *x; 383f1f2ae84SBarry Smith PetscScalar *b, *sx = NULL; 3845316cbedSBarry Smith PetscInt its, n; 3855316cbedSBarry Smith PetscMPIInt size; 3869f0612e4SBarry Smith void *addr[2]; 387f1f2ae84SBarry Smith 388f1f2ae84SBarry Smith PetscFunctionBegin; 389f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 3903ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 3919f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 39245682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 393f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 394f1f2ae84SBarry Smith 395f1f2ae84SBarry Smith /* scatterv rhs */ 396dad3da8eSBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 3975316cbedSBarry Smith if (pc) { 3985316cbedSBarry Smith PetscInt N; 3995316cbedSBarry Smith 400dad3da8eSBarry Smith PCMPISolveCounts[size - 1]++; 40168a21331SBarry Smith PetscCall(MatGetSize(pc->pmat, &N, NULL)); 4025316cbedSBarry Smith PCMPISizes[size - 1] += N; 403f1f2ae84SBarry Smith } 404f1f2ae84SBarry Smith PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 4059f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 4069f0612e4SBarry Smith if (!PCMPIServerUseShmget) { 407f1f2ae84SBarry Smith PetscCall(VecGetArray(ksp->vec_rhs, &b)); 4089f0612e4SBarry Smith if (pc) PetscCall(VecGetArrayRead(B, &sb)); 409f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 410f1f2ae84SBarry Smith if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 4119f0612e4SBarry Smith PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 4129f0612e4SBarry Smith // TODO: scatter initial guess if needed 4139f0612e4SBarry Smith } else { 4149f0612e4SBarry Smith PetscInt rstart; 4159f0612e4SBarry Smith 4169f0612e4SBarry Smith if (pc) PetscCall(VecGetArrayRead(B, &sb)); 4179f0612e4SBarry Smith if (pc) PetscCall(VecGetArray(X, &sx)); 4189f0612e4SBarry Smith const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 4199f0612e4SBarry Smith if (pc) PetscCall(VecRestoreArray(X, &sx)); 4209f0612e4SBarry Smith if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 4219f0612e4SBarry Smith 4229f0612e4SBarry Smith PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 4239f0612e4SBarry Smith PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 4249f0612e4SBarry Smith PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 4259f0612e4SBarry Smith PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 4269f0612e4SBarry Smith } 4279f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 428f1f2ae84SBarry Smith 42945682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 4300316ec64SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage)); 431f1f2ae84SBarry Smith PetscCall(KSPSolve(ksp, NULL, NULL)); 4320316ec64SBarry Smith PetscCall(PetscLogStagePop()); 43345682376SBarry Smith PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 4345316cbedSBarry Smith PetscCall(KSPGetIterationNumber(ksp, &its)); 4355316cbedSBarry Smith PCMPIIterations[size - 1] += its; 4369f0612e4SBarry Smith // TODO: send iterations up to outer KSP 4379f0612e4SBarry Smith 4389f0612e4SBarry Smith if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 439f1f2ae84SBarry Smith 440f1f2ae84SBarry Smith /* gather solution */ 4419f0612e4SBarry Smith PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 4429f0612e4SBarry Smith if (!PCMPIServerUseShmget) { 443f1f2ae84SBarry Smith PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 444f1f2ae84SBarry Smith if (pc) PetscCall(VecGetArray(X, &sx)); 445f1f2ae84SBarry Smith PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 446f1f2ae84SBarry Smith if (pc) PetscCall(VecRestoreArray(X, &sx)); 447f1f2ae84SBarry Smith PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 4489f0612e4SBarry Smith } else { 4499f0612e4SBarry Smith PetscCall(VecResetArray(ksp->vec_rhs)); 4509f0612e4SBarry Smith PetscCall(VecResetArray(ksp->vec_sol)); 4519f0612e4SBarry Smith } 4529f0612e4SBarry Smith PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 45345682376SBarry Smith PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 4549f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 4553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 456f1f2ae84SBarry Smith } 457f1f2ae84SBarry Smith 458d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc) 459d71ae5a4SJacob Faibussowitsch { 460f1f2ae84SBarry Smith PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 461f1f2ae84SBarry Smith KSP ksp; 462f1f2ae84SBarry Smith MPI_Comm comm = PC_MPI_COMM_WORLD; 463f1f2ae84SBarry Smith 464f1f2ae84SBarry Smith PetscFunctionBegin; 465f1f2ae84SBarry Smith PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 4663ba16761SJacob Faibussowitsch if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 467c7d372c4SBarry Smith PetscCall(PetscLogStagePush(PCMPIStage)); 4689f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 469f1f2ae84SBarry Smith PetscCall(KSPDestroy(&ksp)); 470c7d372c4SBarry Smith PetscCall(PetscLogStagePop()); 4719f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 4723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 473f1f2ae84SBarry Smith } 474f1f2ae84SBarry Smith 4759f0612e4SBarry Smith static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 4769f0612e4SBarry Smith { 4779f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 4789f0612e4SBarry Smith PetscMPIInt dummy1 = 1, dummy2; 4799f0612e4SBarry Smith #endif 4809f0612e4SBarry Smith 4819f0612e4SBarry Smith PetscFunctionBegin; 4829f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 4839f0612e4SBarry Smith if (PCMPIServerUseShmget) { 4849f0612e4SBarry Smith for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 4859f0612e4SBarry Smith } 4869f0612e4SBarry Smith #endif 4879f0612e4SBarry Smith PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 4889f0612e4SBarry Smith /* next line ensures the sender has already taken the lock */ 4899f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 4909f0612e4SBarry Smith if (PCMPIServerUseShmget) { 4919f0612e4SBarry Smith PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 4929f0612e4SBarry Smith for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 4939f0612e4SBarry Smith } 4949f0612e4SBarry Smith #endif 4959f0612e4SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 4969f0612e4SBarry Smith } 4973821be0aSBarry Smith 498f1f2ae84SBarry Smith /*@C 4997a99bfcaSBarry Smith PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 500f1580f4eSBarry Smith parallel `KSP` solves and management of parallel `KSP` objects. 501f1f2ae84SBarry Smith 5023821be0aSBarry Smith Logically Collective on all MPI processes except rank 0 503f1f2ae84SBarry Smith 504f1580f4eSBarry Smith Options Database Keys: 505f1f2ae84SBarry Smith + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 5069f0612e4SBarry Smith . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 5079f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 508f1f2ae84SBarry Smith 50920f4b53cSBarry Smith Level: developer 51020f4b53cSBarry Smith 511f1580f4eSBarry Smith Note: 512f1f2ae84SBarry Smith This is normally started automatically in `PetscInitialize()` when the option is provided 513f1f2ae84SBarry Smith 5143821be0aSBarry Smith See `PCMPI` for information on using the solver with a `KSP` object 5153821be0aSBarry Smith 516f1f2ae84SBarry Smith Developer Notes: 5173821be0aSBarry Smith When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 518f1580f4eSBarry Smith written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 519f1580f4eSBarry Smith (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 520f1f2ae84SBarry Smith 521f1580f4eSBarry Smith Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 522f1f2ae84SBarry Smith 5233821be0aSBarry Smith Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 5243821be0aSBarry Smith 5253821be0aSBarry Smith This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 5263821be0aSBarry Smith 527baca6076SPierre Jolivet The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 5283821be0aSBarry Smith nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 5293821be0aSBarry Smith 5307a99bfcaSBarry Smith The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 5313821be0aSBarry Smith all MPI processes but the user callback only runs on one MPI process per node. 5323821be0aSBarry Smith 5333821be0aSBarry Smith PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 5343821be0aSBarry Smith the `MPI_Comm` argument from PETSc calls. 5353821be0aSBarry Smith 5363821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 537f1f2ae84SBarry Smith @*/ 538d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void) 539d71ae5a4SJacob Faibussowitsch { 540f1f2ae84SBarry Smith PetscMPIInt rank; 541f1f2ae84SBarry Smith 542f1f2ae84SBarry Smith PetscFunctionBegin; 5439d3446b2SPierre Jolivet PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 5445e1a0e3cSBarry Smith if (PetscDefined(USE_SINGLE_LIBRARY)) { 5455e1a0e3cSBarry Smith PetscCall(VecInitializePackage()); 5465e1a0e3cSBarry Smith PetscCall(MatInitializePackage()); 5475e1a0e3cSBarry Smith PetscCall(DMInitializePackage()); 5485e1a0e3cSBarry Smith PetscCall(PCInitializePackage()); 5495e1a0e3cSBarry Smith PetscCall(KSPInitializePackage()); 5505e1a0e3cSBarry Smith PetscCall(SNESInitializePackage()); 5515e1a0e3cSBarry Smith PetscCall(TSInitializePackage()); 5525e1a0e3cSBarry Smith PetscCall(TaoInitializePackage()); 5535e1a0e3cSBarry Smith } 554956255efSBarry Smith PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 55545682376SBarry Smith PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 5569f0612e4SBarry Smith PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 5579f0612e4SBarry Smith 5589f0612e4SBarry Smith if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 5599f0612e4SBarry Smith PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 5605e1a0e3cSBarry Smith 561f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 5629f0612e4SBarry Smith if (PCMPIServerUseShmget) { 5639f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 5649f0612e4SBarry Smith PetscMPIInt size; 5659f0612e4SBarry Smith 5669f0612e4SBarry Smith PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 5679f0612e4SBarry Smith if (size > 1) { 5689f0612e4SBarry Smith pthread_mutex_t *locks; 5699f0612e4SBarry Smith 5709f0612e4SBarry Smith if (rank == 0) { 5719f0612e4SBarry Smith PCMPIServerActive = PETSC_TRUE; 5729f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 5739f0612e4SBarry Smith } 5749f0612e4SBarry Smith PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 5759f0612e4SBarry Smith if (rank == 0) { 5769f0612e4SBarry Smith pthread_mutexattr_t attr; 5779f0612e4SBarry Smith 5789f0612e4SBarry Smith pthread_mutexattr_init(&attr); 5799f0612e4SBarry Smith pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 5809f0612e4SBarry Smith 5819f0612e4SBarry Smith for (int i = 1; i < size; i++) { 5829f0612e4SBarry Smith pthread_mutex_init(&PCMPIServerLocks[i], &attr); 5839f0612e4SBarry Smith pthread_mutex_lock(&PCMPIServerLocks[i]); 5849f0612e4SBarry Smith } 5859f0612e4SBarry Smith } 5869f0612e4SBarry Smith PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 5879f0612e4SBarry Smith } 5889f0612e4SBarry Smith #endif 5899f0612e4SBarry Smith } 590f1f2ae84SBarry Smith if (rank == 0) { 591f1f2ae84SBarry Smith PETSC_COMM_WORLD = PETSC_COMM_SELF; 5923821be0aSBarry Smith PCMPIServerActive = PETSC_TRUE; 5933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 594f1f2ae84SBarry Smith } 595f1f2ae84SBarry Smith 596f1f2ae84SBarry Smith while (PETSC_TRUE) { 597f1f2ae84SBarry Smith PCMPICommand request = PCMPI_CREATE; 598*66a7e86cSPierre Jolivet #if defined(PETSC_HAVE_PTHREAD_MUTEX) 5999f0612e4SBarry Smith PetscMPIInt dummy1 = 1, dummy2; 600*66a7e86cSPierre Jolivet #endif 6019f0612e4SBarry Smith 6029f0612e4SBarry Smith // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters? 6039f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 6049f0612e4SBarry Smith if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 6059f0612e4SBarry Smith #endif 606f1f2ae84SBarry Smith PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 6079f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX) 6089f0612e4SBarry Smith if (PCMPIServerUseShmget) { 6099f0612e4SBarry Smith /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 6109f0612e4SBarry Smith PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 6119f0612e4SBarry Smith pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 6129f0612e4SBarry Smith } 6139f0612e4SBarry Smith #endif 614f1f2ae84SBarry Smith switch (request) { 615d71ae5a4SJacob Faibussowitsch case PCMPI_CREATE: 616d71ae5a4SJacob Faibussowitsch PetscCall(PCMPICreate(NULL)); 617d71ae5a4SJacob Faibussowitsch break; 618d71ae5a4SJacob Faibussowitsch case PCMPI_SET_MAT: 619d71ae5a4SJacob Faibussowitsch PetscCall(PCMPISetMat(NULL)); 620d71ae5a4SJacob Faibussowitsch break; 621d71ae5a4SJacob Faibussowitsch case PCMPI_UPDATE_MAT_VALUES: 622d71ae5a4SJacob Faibussowitsch PetscCall(PCMPIUpdateMatValues(NULL)); 623d71ae5a4SJacob Faibussowitsch break; 624f1f2ae84SBarry Smith case PCMPI_VIEW: 625f1f2ae84SBarry Smith // PetscCall(PCMPIView(NULL)); 626f1f2ae84SBarry Smith break; 627d71ae5a4SJacob Faibussowitsch case PCMPI_SOLVE: 628d71ae5a4SJacob Faibussowitsch PetscCall(PCMPISolve(NULL, NULL, NULL)); 629d71ae5a4SJacob Faibussowitsch break; 630d71ae5a4SJacob Faibussowitsch case PCMPI_DESTROY: 631d71ae5a4SJacob Faibussowitsch PetscCall(PCMPIDestroy(NULL)); 632d71ae5a4SJacob Faibussowitsch break; 633f1f2ae84SBarry Smith case PCMPI_EXIT: 6349f0612e4SBarry Smith if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 635f1f2ae84SBarry Smith PetscCall(PetscFinalize()); 636f1f2ae84SBarry Smith exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 637f1f2ae84SBarry Smith break; 638d71ae5a4SJacob Faibussowitsch default: 639d71ae5a4SJacob Faibussowitsch break; 640f1f2ae84SBarry Smith } 641f1f2ae84SBarry Smith } 6423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 643f1f2ae84SBarry Smith } 644f1f2ae84SBarry Smith 645f1f2ae84SBarry Smith /*@C 646f1f2ae84SBarry Smith PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 647f1580f4eSBarry Smith parallel KSP solves and management of parallel `KSP` objects. 648f1f2ae84SBarry Smith 64920f4b53cSBarry Smith Logically Collective on all MPI ranks except 0 65020f4b53cSBarry Smith 65120f4b53cSBarry Smith Level: developer 652f1f2ae84SBarry Smith 653f1580f4eSBarry Smith Note: 6549f0612e4SBarry Smith This is normally called automatically in `PetscFinalize()` 655f1f2ae84SBarry Smith 6563821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 657f1f2ae84SBarry Smith @*/ 658d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void) 659d71ae5a4SJacob Faibussowitsch { 660f1f2ae84SBarry Smith PetscFunctionBegin; 661f1f2ae84SBarry Smith if (PetscGlobalRank == 0) { 662f1f2ae84SBarry Smith PetscViewer viewer = NULL; 663f1f2ae84SBarry Smith PetscViewerFormat format; 664f1f2ae84SBarry Smith 6659f0612e4SBarry Smith PetscCall(PetscShmgetAddressesFinalize()); 6669f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 6679f0612e4SBarry Smith if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 668f1f2ae84SBarry Smith PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 669f1f2ae84SBarry Smith PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 670f1f2ae84SBarry Smith PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 671f1f2ae84SBarry Smith PetscOptionsEnd(); 672f1f2ae84SBarry Smith if (viewer) { 673f1f2ae84SBarry Smith PetscBool isascii; 674f1f2ae84SBarry Smith 675f1f2ae84SBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 676f1f2ae84SBarry Smith if (isascii) { 677f1f2ae84SBarry Smith PetscMPIInt size; 6785316cbedSBarry Smith PetscMPIInt i; 679f1f2ae84SBarry Smith 680f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 6815316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 6825316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 6835316cbedSBarry Smith if (PCMPIKSPCountsSeq) { 6845316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 685f1f2ae84SBarry Smith } 6865316cbedSBarry Smith for (i = 0; i < size; i++) { 6875316cbedSBarry Smith if (PCMPIKSPCounts[i]) { 6885316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 6895316cbedSBarry Smith } 6905316cbedSBarry Smith } 6919f0612e4SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 692f1f2ae84SBarry Smith } 693648c30bcSBarry Smith PetscCall(PetscViewerDestroy(&viewer)); 694f1f2ae84SBarry Smith } 695f1f2ae84SBarry Smith } 696f1f2ae84SBarry Smith PetscCall(PCMPICommsDestroy()); 6973821be0aSBarry Smith PCMPIServerActive = PETSC_FALSE; 6983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 699f1f2ae84SBarry Smith } 700f1f2ae84SBarry Smith 701f1f2ae84SBarry Smith /* 702f1f2ae84SBarry Smith This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 703f1f2ae84SBarry Smith because, for example, the problem is small. This version is more efficient because it does not require copying any data 704f1f2ae84SBarry Smith */ 705d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc) 706d71ae5a4SJacob Faibussowitsch { 707f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 708f1f2ae84SBarry Smith Mat sA; 709f1f2ae84SBarry Smith const char *prefix; 710f9818f3cSJose E. Roman char *found = NULL, *cprefix; 711f1f2ae84SBarry Smith 712f1f2ae84SBarry Smith PetscFunctionBegin; 7139f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 714f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, NULL, &sA)); 715f1f2ae84SBarry Smith PetscCall(PCGetOptionsPrefix(pc, &prefix)); 716f1f2ae84SBarry Smith PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 7173821be0aSBarry Smith PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 7183821be0aSBarry Smith PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 7193821be0aSBarry Smith 7203821be0aSBarry Smith /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 7213821be0aSBarry Smith PetscCall(PCGetOptionsPrefix(pc, &prefix)); 7223821be0aSBarry Smith PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 7233821be0aSBarry Smith PetscCall(PetscStrallocpy(prefix, &cprefix)); 7243821be0aSBarry Smith PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 7253821be0aSBarry Smith PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 7263821be0aSBarry Smith *found = 0; 7273821be0aSBarry Smith PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 7283821be0aSBarry Smith PetscCall(PetscFree(cprefix)); 7293821be0aSBarry Smith 730f1f2ae84SBarry Smith PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 731f1f2ae84SBarry Smith PetscCall(KSPSetFromOptions(km->ksps[0])); 732f1f2ae84SBarry Smith PetscCall(KSPSetUp(km->ksps[0])); 7333ba16761SJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 734f1f2ae84SBarry Smith PCMPIKSPCountsSeq++; 7359f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 7363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 737f1f2ae84SBarry Smith } 738f1f2ae84SBarry Smith 739d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 740d71ae5a4SJacob Faibussowitsch { 741f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 7425316cbedSBarry Smith PetscInt its, n; 7435316cbedSBarry Smith Mat A; 744f1f2ae84SBarry Smith 745f1f2ae84SBarry Smith PetscFunctionBegin; 7469f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 747f1f2ae84SBarry Smith PetscCall(KSPSolve(km->ksps[0], b, x)); 7485316cbedSBarry Smith PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 749f1f2ae84SBarry Smith PCMPISolveCountsSeq++; 7505316cbedSBarry Smith PCMPIIterationsSeq += its; 7515316cbedSBarry Smith PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 7525316cbedSBarry Smith PetscCall(MatGetSize(A, &n, NULL)); 7535316cbedSBarry Smith PCMPISizesSeq += n; 7549f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 7559f0612e4SBarry Smith /* 7569f0612e4SBarry Smith do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 7579f0612e4SBarry Smith my use PetscFree() instead of PCMPIArrayDeallocate() 7589f0612e4SBarry Smith */ 7599f0612e4SBarry Smith PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 7609f0612e4SBarry Smith PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 7613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 762f1f2ae84SBarry Smith } 763f1f2ae84SBarry Smith 764d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 765d71ae5a4SJacob Faibussowitsch { 766f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 767f1f2ae84SBarry Smith 768f1f2ae84SBarry Smith PetscFunctionBegin; 769f1f2ae84SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 770f1f2ae84SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 7715316cbedSBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 7723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 773f1f2ae84SBarry Smith } 774f1f2ae84SBarry Smith 775d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc) 776d71ae5a4SJacob Faibussowitsch { 777f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 7789f0612e4SBarry Smith Mat A, B; 7799f0612e4SBarry Smith Vec x, b; 780f1f2ae84SBarry Smith 781f1f2ae84SBarry Smith PetscFunctionBegin; 7829f0612e4SBarry Smith PCMPIServerInSolve = PETSC_TRUE; 7839f0612e4SBarry Smith /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 7849f0612e4SBarry Smith PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 7859f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)A)); 7869f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)B)); 7879f0612e4SBarry Smith PetscCall(KSPGetSolution(km->ksps[0], &x)); 7889f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)x)); 7899f0612e4SBarry Smith PetscCall(KSPGetRhs(km->ksps[0], &b)); 7909f0612e4SBarry Smith PetscCall(PetscObjectReference((PetscObject)b)); 791f1f2ae84SBarry Smith PetscCall(KSPDestroy(&km->ksps[0])); 792f1f2ae84SBarry Smith PetscCall(PetscFree(pc->data)); 7939f0612e4SBarry Smith PCMPIServerInSolve = PETSC_FALSE; 7949f0612e4SBarry Smith PetscCall(MatDestroy(&A)); 7959f0612e4SBarry Smith PetscCall(MatDestroy(&B)); 7969f0612e4SBarry Smith PetscCall(VecDestroy(&x)); 7979f0612e4SBarry Smith PetscCall(VecDestroy(&b)); 7983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 799f1f2ae84SBarry Smith } 800f1f2ae84SBarry Smith 801f1f2ae84SBarry Smith /* 802f1f2ae84SBarry Smith PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 803dd8e379bSPierre Jolivet right-hand side to the parallel PC 804f1f2ae84SBarry Smith */ 805d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc) 806d71ae5a4SJacob Faibussowitsch { 807f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 808f1f2ae84SBarry Smith PetscMPIInt rank, size; 809f1f2ae84SBarry Smith PetscBool newmatrix = PETSC_FALSE; 810f1f2ae84SBarry Smith 811f1f2ae84SBarry Smith PetscFunctionBegin; 812f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 813f1f2ae84SBarry Smith PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 814f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 815f1f2ae84SBarry Smith 816f1f2ae84SBarry Smith if (!pc->setupcalled) { 817f1f2ae84SBarry Smith if (!km->alwaysuseserver) { 818f1f2ae84SBarry Smith PetscInt n; 819f1f2ae84SBarry Smith Mat sA; 820f1f2ae84SBarry Smith /* short circuit for small systems */ 821f1f2ae84SBarry Smith PetscCall(PCGetOperators(pc, &sA, &sA)); 822f1f2ae84SBarry Smith PetscCall(MatGetSize(sA, &n, NULL)); 823f1f2ae84SBarry Smith if (n < 2 * km->mincntperrank - 1 || size == 1) { 824f1f2ae84SBarry Smith pc->ops->setup = NULL; 825f1f2ae84SBarry Smith pc->ops->apply = PCApply_Seq; 826f1f2ae84SBarry Smith pc->ops->destroy = PCDestroy_Seq; 827f1f2ae84SBarry Smith pc->ops->view = PCView_Seq; 828f1f2ae84SBarry Smith PetscCall(PCSetUp_Seq(pc)); 8293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 830f1f2ae84SBarry Smith } 831f1f2ae84SBarry Smith } 832f1f2ae84SBarry Smith 8339f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 834f1f2ae84SBarry Smith PetscCall(PCMPICreate(pc)); 835f1f2ae84SBarry Smith newmatrix = PETSC_TRUE; 8369371c9d4SSatish Balay } 8379371c9d4SSatish Balay if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 838f1f2ae84SBarry Smith 839f1f2ae84SBarry Smith if (newmatrix) { 8403ba16761SJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 8419f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 842f1f2ae84SBarry Smith PetscCall(PCMPISetMat(pc)); 843f1f2ae84SBarry Smith } else { 844bbea24aaSStefano Zampini PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 8459f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 846f1f2ae84SBarry Smith PetscCall(PCMPIUpdateMatValues(pc)); 847f1f2ae84SBarry Smith } 8483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 849f1f2ae84SBarry Smith } 850f1f2ae84SBarry Smith 851d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 852d71ae5a4SJacob Faibussowitsch { 853f1f2ae84SBarry Smith PetscFunctionBegin; 8549f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 855f1f2ae84SBarry Smith PetscCall(PCMPISolve(pc, b, x)); 8563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 857f1f2ae84SBarry Smith } 858f1f2ae84SBarry Smith 85966976f2fSJacob Faibussowitsch static PetscErrorCode PCDestroy_MPI(PC pc) 860d71ae5a4SJacob Faibussowitsch { 861f1f2ae84SBarry Smith PetscFunctionBegin; 8629f0612e4SBarry Smith PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 863f1f2ae84SBarry Smith PetscCall(PCMPIDestroy(pc)); 864f1f2ae84SBarry Smith PetscCall(PetscFree(pc->data)); 8653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 866f1f2ae84SBarry Smith } 867f1f2ae84SBarry Smith 868f1f2ae84SBarry Smith /* 8699f0612e4SBarry Smith PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 870f1f2ae84SBarry Smith */ 87166976f2fSJacob Faibussowitsch static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 872d71ae5a4SJacob Faibussowitsch { 873f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 874f1f2ae84SBarry Smith MPI_Comm comm; 875f1f2ae84SBarry Smith PetscMPIInt size; 876f1f2ae84SBarry Smith 877f1f2ae84SBarry Smith PetscFunctionBegin; 878f1f2ae84SBarry Smith PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 879f1f2ae84SBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size)); 880f1f2ae84SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 8819f0612e4SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank)); 8829f0612e4SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 884f1f2ae84SBarry Smith } 885f1f2ae84SBarry Smith 88666976f2fSJacob Faibussowitsch static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 887d71ae5a4SJacob Faibussowitsch { 888f1f2ae84SBarry Smith PC_MPI *km = (PC_MPI *)pc->data; 889f1f2ae84SBarry Smith 890f1f2ae84SBarry Smith PetscFunctionBegin; 891f1f2ae84SBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 8923821be0aSBarry Smith PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 8933821be0aSBarry Smith PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 894f1f2ae84SBarry Smith PetscOptionsHeadEnd(); 8953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 896f1f2ae84SBarry Smith } 897f1f2ae84SBarry Smith 898f1f2ae84SBarry Smith /*MC 899f1580f4eSBarry Smith PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 900f1f2ae84SBarry Smith 9013821be0aSBarry Smith Options Database Keys for the Server: 902f1f2ae84SBarry Smith + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 9039f0612e4SBarry Smith . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 9049f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 905f1f2ae84SBarry Smith 9063821be0aSBarry Smith Options Database Keys for a specific `KSP` object 9073821be0aSBarry Smith + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 9083821be0aSBarry Smith - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 9093821be0aSBarry Smith 9103821be0aSBarry Smith Level: developer 91120f4b53cSBarry Smith 912f1f2ae84SBarry Smith Notes: 9139f0612e4SBarry Smith This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 9149f0612e4SBarry Smith `MatCreateSeqAIJWithArrays()` 9159f0612e4SBarry Smith 91646bbbc36SPierre Jolivet The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 917f1f2ae84SBarry Smith 918f1f2ae84SBarry Smith It can be particularly useful for user OpenMP code or potentially user GPU code. 919f1f2ae84SBarry Smith 920dd8e379bSPierre Jolivet When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 9213821be0aSBarry Smith and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 922f1f2ae84SBarry Smith 9233821be0aSBarry Smith The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 9240316ec64SBarry Smith because they are not the actual solver objects. 9250316ec64SBarry Smith 9260316ec64SBarry Smith When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 92768a21331SBarry Smith stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 9280316ec64SBarry Smith 9293821be0aSBarry Smith Developer Note: 9303821be0aSBarry Smith This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 9313821be0aSBarry Smith a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 9323821be0aSBarry Smith 9333821be0aSBarry Smith .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 934f1f2ae84SBarry Smith M*/ 935d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 936d71ae5a4SJacob Faibussowitsch { 937f1f2ae84SBarry Smith PC_MPI *km; 938f9818f3cSJose E. Roman char *found = NULL; 939f1f2ae84SBarry Smith 940f1f2ae84SBarry Smith PetscFunctionBegin; 9413821be0aSBarry Smith PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 9423821be0aSBarry Smith PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 9433821be0aSBarry Smith 9443821be0aSBarry Smith /* material from PCSetType() */ 9453821be0aSBarry Smith PetscTryTypeMethod(pc, destroy); 9463821be0aSBarry Smith pc->ops->destroy = NULL; 9473821be0aSBarry Smith pc->data = NULL; 9483821be0aSBarry Smith 9493821be0aSBarry Smith PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 9503821be0aSBarry Smith PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 9513821be0aSBarry Smith pc->modifysubmatrices = NULL; 9523821be0aSBarry Smith pc->modifysubmatricesP = NULL; 9533821be0aSBarry Smith pc->setupcalled = 0; 9543821be0aSBarry Smith 9554dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&km)); 956f1f2ae84SBarry Smith pc->data = (void *)km; 957f1f2ae84SBarry Smith 958f1f2ae84SBarry Smith km->mincntperrank = 10000; 959f1f2ae84SBarry Smith 960f1f2ae84SBarry Smith pc->ops->setup = PCSetUp_MPI; 961f1f2ae84SBarry Smith pc->ops->apply = PCApply_MPI; 962f1f2ae84SBarry Smith pc->ops->destroy = PCDestroy_MPI; 963f1f2ae84SBarry Smith pc->ops->view = PCView_MPI; 964f1f2ae84SBarry Smith pc->ops->setfromoptions = PCSetFromOptions_MPI; 9653821be0aSBarry Smith PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 9663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 967f1f2ae84SBarry Smith } 9689f0612e4SBarry Smith 9699f0612e4SBarry Smith /*@ 9709f0612e4SBarry Smith PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 9719f0612e4SBarry Smith 9729f0612e4SBarry Smith Not Collective 9739f0612e4SBarry Smith 9749f0612e4SBarry Smith Input Parameter: 9759f0612e4SBarry Smith . pc - the preconditioner context 9769f0612e4SBarry Smith 9779f0612e4SBarry Smith Output Parameter: 9789f0612e4SBarry Smith . innerksp - the inner `KSP` 9799f0612e4SBarry Smith 9809f0612e4SBarry Smith Level: advanced 9819f0612e4SBarry Smith 9829f0612e4SBarry Smith .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 9839f0612e4SBarry Smith @*/ 9849f0612e4SBarry Smith PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 9859f0612e4SBarry Smith { 9869f0612e4SBarry Smith PC_MPI *red = (PC_MPI *)pc->data; 9879f0612e4SBarry Smith 9889f0612e4SBarry Smith PetscFunctionBegin; 9899f0612e4SBarry Smith PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 9909f0612e4SBarry Smith PetscAssertPointer(innerksp, 2); 9919f0612e4SBarry Smith *innerksp = red->ksps[0]; 9929f0612e4SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 9939f0612e4SBarry Smith } 994