xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 835f2295474254850a9de28f274be7ce943244c7)
1f1f2ae84SBarry Smith /*
2f1f2ae84SBarry Smith     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3f1f2ae84SBarry Smith     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4f1f2ae84SBarry Smith 
5dd8e379bSPierre Jolivet     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6f1f2ae84SBarry Smith 
7f1f2ae84SBarry Smith     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8f1f2ae84SBarry Smith 
9f1f2ae84SBarry Smith     The resulting KSP and PC can only be controlled via the options database, though some common commands
10f1f2ae84SBarry Smith     could be passed through the server.
11f1f2ae84SBarry Smith 
12f1f2ae84SBarry Smith */
139f0612e4SBarry Smith #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h>
15789afff4SPierre Jolivet #include <petscts.h>
16789afff4SPierre Jolivet #include <petsctao.h>
179f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
189f0612e4SBarry Smith   #include <pthread.h>
199f0612e4SBarry Smith #endif
20f1f2ae84SBarry Smith 
21f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS  256
22f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23f1f2ae84SBarry Smith 
24f1f2ae84SBarry Smith typedef struct {
259f0612e4SBarry Smith   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26f1f2ae84SBarry Smith   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27f1f2ae84SBarry Smith   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
289f0612e4SBarry Smith   PetscInt    mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
299f0612e4SBarry Smith   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30f1f2ae84SBarry Smith } PC_MPI;
31f1f2ae84SBarry Smith 
329371c9d4SSatish Balay typedef enum {
339371c9d4SSatish Balay   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34f1f2ae84SBarry Smith   PCMPI_CREATE,
35f1f2ae84SBarry Smith   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
36f1f2ae84SBarry Smith   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37f1f2ae84SBarry Smith   PCMPI_SOLVE,
38f1f2ae84SBarry Smith   PCMPI_VIEW,
399f0612e4SBarry Smith   PCMPI_DESTROY /* destroy a PC that is no longer needed */
40f1f2ae84SBarry Smith } PCMPICommand;
41f1f2ae84SBarry Smith 
42f1f2ae84SBarry Smith static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
43f1f2ae84SBarry Smith static PetscBool     PCMPICommSet = PETSC_FALSE;
44f1f2ae84SBarry Smith static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
455316cbedSBarry Smith static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
469f0612e4SBarry Smith static PetscLogEvent EventServerDist, EventServerDistMPI;
479f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
489f0612e4SBarry Smith static pthread_mutex_t *PCMPIServerLocks;
499f0612e4SBarry Smith #else
509f0612e4SBarry Smith static void *PCMPIServerLocks;
519f0612e4SBarry Smith #endif
52f1f2ae84SBarry Smith 
53d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void)
54d71ae5a4SJacob Faibussowitsch {
55f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
56f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
57f1f2ae84SBarry Smith 
58f1f2ae84SBarry Smith   PetscFunctionBegin;
59f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
60f1f2ae84SBarry Smith   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62f1f2ae84SBarry Smith   /* comm for size 1 is useful only for debugging */
63f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
64f1f2ae84SBarry Smith     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66f1f2ae84SBarry Smith     PCMPISolveCounts[i] = 0;
67f1f2ae84SBarry Smith     PCMPIKSPCounts[i]   = 0;
685316cbedSBarry Smith     PCMPIIterations[i]  = 0;
695316cbedSBarry Smith     PCMPISizes[i]       = 0;
70f1f2ae84SBarry Smith   }
71f1f2ae84SBarry Smith   PCMPICommSet = PETSC_TRUE;
723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
73f1f2ae84SBarry Smith }
74f1f2ae84SBarry Smith 
759f0612e4SBarry Smith static PetscErrorCode PCMPICommsDestroy(void)
76d71ae5a4SJacob Faibussowitsch {
77f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
79f1f2ae84SBarry Smith 
80f1f2ae84SBarry Smith   PetscFunctionBegin;
813ba16761SJacob Faibussowitsch   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
83f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
84f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
85f1f2ae84SBarry Smith     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86f1f2ae84SBarry Smith   }
87f1f2ae84SBarry Smith   PCMPICommSet = PETSC_FALSE;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89f1f2ae84SBarry Smith }
90f1f2ae84SBarry Smith 
91d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc)
92d71ae5a4SJacob Faibussowitsch {
93f1f2ae84SBarry Smith   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
94f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
95f1f2ae84SBarry Smith   KSP         ksp;
96f1f2ae84SBarry Smith   PetscInt    N[2], mincntperrank = 0;
97f1f2ae84SBarry Smith   PetscMPIInt size;
98f1f2ae84SBarry Smith   Mat         sA;
993821be0aSBarry Smith   char       *cprefix = NULL;
100f1f2ae84SBarry Smith   PetscMPIInt len     = 0;
101f1f2ae84SBarry Smith 
102f1f2ae84SBarry Smith   PetscFunctionBegin;
1039f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
104f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
106f1f2ae84SBarry Smith   if (pc) {
107f1f2ae84SBarry Smith     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
109f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110f1f2ae84SBarry Smith   }
111f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112f1f2ae84SBarry Smith 
113f1f2ae84SBarry Smith   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114f1f2ae84SBarry Smith   if (km) mincntperrank = km->mincntperrank;
115f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116f1f2ae84SBarry Smith   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117f1f2ae84SBarry Smith   if (comm == MPI_COMM_NULL) {
118f1f2ae84SBarry Smith     ksp                = NULL;
1199f0612e4SBarry Smith     PCMPIServerInSolve = PETSC_FALSE;
1203ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
121f1f2ae84SBarry Smith   }
1220316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
123f1f2ae84SBarry Smith   PetscCall(KSPCreate(comm, &ksp));
1243821be0aSBarry Smith   PetscCall(KSPSetNestLevel(ksp, 1));
1253821be0aSBarry Smith   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
1260316ec64SBarry Smith   PetscCall(PetscLogStagePop());
127f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128f1f2ae84SBarry Smith   if (pc) {
129f1f2ae84SBarry Smith     size_t      slen;
1303821be0aSBarry Smith     const char *prefix = NULL;
131f9818f3cSJose E. Roman     char       *found  = NULL;
132f1f2ae84SBarry Smith 
133f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
134dad3da8eSBarry Smith     PCMPIKSPCounts[size - 1]++;
1353821be0aSBarry Smith     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
1363821be0aSBarry Smith     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1373821be0aSBarry Smith     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
1383821be0aSBarry Smith     PetscCall(PetscStrallocpy(prefix, &cprefix));
1393821be0aSBarry Smith     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
1403821be0aSBarry Smith     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
1413821be0aSBarry Smith     *found = 0;
1423821be0aSBarry Smith     PetscCall(PetscStrlen(cprefix, &slen));
143*835f2295SStefano Zampini     PetscCall(PetscMPIIntCast(slen, &len));
144f1f2ae84SBarry Smith   }
145f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146f1f2ae84SBarry Smith   if (len) {
1473821be0aSBarry Smith     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
1483821be0aSBarry Smith     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
1493821be0aSBarry Smith     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150f1f2ae84SBarry Smith   }
1513821be0aSBarry Smith   PetscCall(PetscFree(cprefix));
1529f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
1533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
154f1f2ae84SBarry Smith }
155f1f2ae84SBarry Smith 
156d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc)
157d71ae5a4SJacob Faibussowitsch {
158f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159f1f2ae84SBarry Smith   Mat                A;
1609f0612e4SBarry Smith   PetscInt           m, n, j, bs;
161f1f2ae84SBarry Smith   Mat                sA;
162f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163f1f2ae84SBarry Smith   KSP                ksp;
164f1f2ae84SBarry Smith   PetscLayout        layout;
1659f0612e4SBarry Smith   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166f1f2ae84SBarry Smith   const PetscInt    *range;
167f1f2ae84SBarry Smith   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
1689f0612e4SBarry Smith   const PetscScalar *a                = NULL, *sa;
1699f0612e4SBarry Smith   PetscInt           matproperties[8] = {0}, rstart, rend;
1703821be0aSBarry Smith   char              *cprefix;
171f1f2ae84SBarry Smith 
172f1f2ae84SBarry Smith   PetscFunctionBegin;
173f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
1743ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
1759f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
17645682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178f1f2ae84SBarry Smith   if (pc) {
17968a21331SBarry Smith     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
1803821be0aSBarry Smith     const char *prefix;
1813821be0aSBarry Smith     size_t      clen;
18268a21331SBarry Smith 
183f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
184dad3da8eSBarry Smith     PCMPIMatCounts[size - 1]++;
185f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
18668a21331SBarry Smith     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187dd0d27b1SBarry Smith     PetscCall(MatGetBlockSize(sA, &bs));
18868a21331SBarry Smith     matproperties[2] = bs;
18968a21331SBarry Smith     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
19068a21331SBarry Smith     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
19168a21331SBarry Smith     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
19268a21331SBarry Smith     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
19368a21331SBarry Smith     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
19468a21331SBarry Smith     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
19568a21331SBarry Smith     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
19668a21331SBarry Smith     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
1977a99bfcaSBarry Smith     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
1983821be0aSBarry Smith     PetscCall(MatGetOptionsPrefix(sA, &prefix));
1993821be0aSBarry Smith     PetscCall(PetscStrallocpy(prefix, &cprefix));
2003821be0aSBarry Smith     PetscCall(PetscStrlen(cprefix, &clen));
2013821be0aSBarry Smith     matproperties[7] = (PetscInt)clen;
202f1f2ae84SBarry Smith   }
2033821be0aSBarry Smith   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
204f1f2ae84SBarry Smith 
20568a21331SBarry Smith   /* determine ownership ranges of matrix columns */
206f1f2ae84SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
20768a21331SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
20868a21331SBarry Smith   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
210f1f2ae84SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &n));
21168a21331SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
21268a21331SBarry Smith 
21368a21331SBarry Smith   /* determine ownership ranges of matrix rows */
21468a21331SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
21568a21331SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
21668a21331SBarry Smith   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
21768a21331SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
21868a21331SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &m));
2199f0612e4SBarry Smith   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
220f1f2ae84SBarry Smith 
2219f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222f1f2ae84SBarry Smith   /* copy over the matrix nonzero structure and values */
223f1f2ae84SBarry Smith   if (pc) {
2249f0612e4SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
2259f0612e4SBarry Smith     if (!PCMPIServerUseShmget) {
226f1f2ae84SBarry Smith       NZ      = km->NZ;
227f1f2ae84SBarry Smith       NZdispl = km->NZdispl;
228f1f2ae84SBarry Smith       PetscCall(PetscLayoutGetRanges(layout, &range));
229f1f2ae84SBarry Smith       for (i = 0; i < size; i++) {
230*835f2295SStefano Zampini         PetscCall(PetscMPIIntCast(1 + range[i + 1] - range[i], &sendcounti[i]));
231*835f2295SStefano Zampini         PetscCall(PetscMPIIntCast(IA[range[i + 1]] - IA[range[i]], &NZ[i]));
232f1f2ae84SBarry Smith       }
233f1f2ae84SBarry Smith       displi[0]  = 0;
234f1f2ae84SBarry Smith       NZdispl[0] = 0;
235f1f2ae84SBarry Smith       for (j = 1; j < size; j++) {
236f1f2ae84SBarry Smith         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
237f1f2ae84SBarry Smith         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238f1f2ae84SBarry Smith       }
2399f0612e4SBarry Smith     }
240f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241f1f2ae84SBarry Smith   }
242f1f2ae84SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
243f1f2ae84SBarry Smith 
2443821be0aSBarry Smith   PetscCall(MatCreate(comm, &A));
2453821be0aSBarry Smith   if (matproperties[7] > 0) {
246*835f2295SStefano Zampini     PetscMPIInt ni;
247*835f2295SStefano Zampini 
248*835f2295SStefano Zampini     PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
2493821be0aSBarry Smith     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
250*835f2295SStefano Zampini     PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
2513821be0aSBarry Smith     PetscCall(MatSetOptionsPrefix(A, cprefix));
2523821be0aSBarry Smith     PetscCall(PetscFree(cprefix));
2533821be0aSBarry Smith   }
2543821be0aSBarry Smith   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
2553821be0aSBarry Smith   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
2563821be0aSBarry Smith   PetscCall(MatSetType(A, MATMPIAIJ));
2579f0612e4SBarry Smith 
2589f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
2596497c311SBarry Smith     PetscMPIInt in;
2609f0612e4SBarry Smith     PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
2619f0612e4SBarry Smith     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
2626497c311SBarry Smith     PetscCall(PetscMPIIntCast(n, &in));
2636497c311SBarry Smith     PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm));
2649f0612e4SBarry Smith     PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
2659f0612e4SBarry Smith     PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
2669f0612e4SBarry Smith   } else {
2679f0612e4SBarry Smith     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
2689f0612e4SBarry Smith     PCMPIServerAddresses *addresses;
2699f0612e4SBarry Smith 
2709f0612e4SBarry Smith     PetscCall(PetscNew(&addresses));
2719f0612e4SBarry Smith     addresses->n = 3;
2729f0612e4SBarry Smith     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
2739f0612e4SBarry Smith     ia = rstart + (PetscInt *)addresses->addr[0];
2749f0612e4SBarry Smith     ja = ia[0] + (PetscInt *)addresses->addr[1];
2759f0612e4SBarry Smith     a  = ia[0] + (PetscScalar *)addresses->addr[2];
27649abdd8aSBarry Smith     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
2779f0612e4SBarry Smith   }
2789f0612e4SBarry Smith 
2799f0612e4SBarry Smith   if (pc) {
2809f0612e4SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
2819f0612e4SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
2829f0612e4SBarry Smith   }
2839f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
2849f0612e4SBarry Smith 
2859f0612e4SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
2863821be0aSBarry Smith   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
28768a21331SBarry Smith   PetscCall(MatSetBlockSize(A, matproperties[2]));
2883821be0aSBarry Smith 
28968a21331SBarry Smith   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
29068a21331SBarry Smith   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
29168a21331SBarry Smith   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
29268a21331SBarry Smith   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
293f1f2ae84SBarry Smith 
2949f0612e4SBarry Smith   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
295f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(ksp, A, A));
296f1f2ae84SBarry Smith   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
2970316ec64SBarry Smith   PetscCall(PetscLogStagePop());
2989f0612e4SBarry Smith   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
299f1f2ae84SBarry Smith     const PetscInt *range;
300f1f2ae84SBarry Smith 
301f1f2ae84SBarry Smith     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
302f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
303*835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(range[i + 1] - range[i], &km->sendcount[i]));
304*835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(range[i], &km->displ[i]));
305f1f2ae84SBarry Smith     }
306f1f2ae84SBarry Smith   }
307f1f2ae84SBarry Smith   PetscCall(MatDestroy(&A));
30845682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
309f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(ksp));
3109f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
3113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
312f1f2ae84SBarry Smith }
313f1f2ae84SBarry Smith 
314d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc)
315d71ae5a4SJacob Faibussowitsch {
316f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
317f1f2ae84SBarry Smith   KSP                ksp;
318f1f2ae84SBarry Smith   Mat                sA, A;
319f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
3209f0612e4SBarry Smith   const PetscInt    *ia, *IA;
3219f0612e4SBarry Smith   const PetscScalar *a;
322f1f2ae84SBarry Smith   PetscCount         nz;
323f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
324dad3da8eSBarry Smith   PetscMPIInt        size;
3259f0612e4SBarry Smith   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};
326f1f2ae84SBarry Smith 
327f1f2ae84SBarry Smith   PetscFunctionBegin;
328f1f2ae84SBarry Smith   if (pc) {
329f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
330f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
3319f0612e4SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
332f1f2ae84SBarry Smith   }
333f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3343ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
3359f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
33645682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
337f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
338dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
339dad3da8eSBarry Smith   PCMPIMatCounts[size - 1]++;
340f1f2ae84SBarry Smith   PetscCall(KSPGetOperators(ksp, NULL, &A));
3419f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
3429f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
3436497c311SBarry Smith     PetscMPIInt mpi_nz;
3446497c311SBarry Smith 
345f1f2ae84SBarry Smith     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
3466497c311SBarry Smith     PetscCall(PetscMPIIntCast(nz, &mpi_nz));
347f1f2ae84SBarry Smith     PetscCall(PetscMalloc1(nz, &a));
3486497c311SBarry Smith     PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm));
3499f0612e4SBarry Smith   } else {
3509f0612e4SBarry Smith     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
3519f0612e4SBarry Smith     PCMPIServerAddresses *addresses;
3529f0612e4SBarry Smith     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
3539f0612e4SBarry Smith     ia = rstart + (PetscInt *)addresses->addr[0];
3549f0612e4SBarry Smith     a  = ia[0] + (PetscScalar *)addresses->addr[2];
3559f0612e4SBarry Smith   }
3569f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
35768a21331SBarry Smith   if (pc) {
35868a21331SBarry Smith     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
35968a21331SBarry Smith 
36068a21331SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
3619f0612e4SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
36268a21331SBarry Smith 
36368a21331SBarry Smith     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
36468a21331SBarry Smith     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
36568a21331SBarry Smith     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
36668a21331SBarry Smith     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
36768a21331SBarry Smith     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
36868a21331SBarry Smith     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
36968a21331SBarry Smith     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
37068a21331SBarry Smith     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
37168a21331SBarry Smith   }
372f1f2ae84SBarry Smith   PetscCall(MatUpdateMPIAIJWithArray(A, a));
3739f0612e4SBarry Smith   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
37468a21331SBarry Smith   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
37568a21331SBarry Smith   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
37668a21331SBarry Smith   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
37768a21331SBarry Smith   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
37868a21331SBarry Smith   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
37968a21331SBarry Smith   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
38045682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
3819f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
3823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
383f1f2ae84SBarry Smith }
384f1f2ae84SBarry Smith 
385d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
386d71ae5a4SJacob Faibussowitsch {
387f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
388f1f2ae84SBarry Smith   KSP                ksp;
389f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
390f1f2ae84SBarry Smith   const PetscScalar *sb   = NULL, *x;
391f1f2ae84SBarry Smith   PetscScalar       *b, *sx = NULL;
3925316cbedSBarry Smith   PetscInt           its, n;
3935316cbedSBarry Smith   PetscMPIInt        size;
3949f0612e4SBarry Smith   void              *addr[2];
395f1f2ae84SBarry Smith 
396f1f2ae84SBarry Smith   PetscFunctionBegin;
397f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3983ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
3999f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
40045682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
401f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
402f1f2ae84SBarry Smith 
403f1f2ae84SBarry Smith   /* scatterv rhs */
404dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
4055316cbedSBarry Smith   if (pc) {
4065316cbedSBarry Smith     PetscInt N;
4075316cbedSBarry Smith 
408dad3da8eSBarry Smith     PCMPISolveCounts[size - 1]++;
40968a21331SBarry Smith     PetscCall(MatGetSize(pc->pmat, &N, NULL));
4105316cbedSBarry Smith     PCMPISizes[size - 1] += N;
411f1f2ae84SBarry Smith   }
412f1f2ae84SBarry Smith   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
4139f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
4149f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
4156497c311SBarry Smith     PetscMPIInt in;
4166497c311SBarry Smith 
417f1f2ae84SBarry Smith     PetscCall(VecGetArray(ksp->vec_rhs, &b));
4189f0612e4SBarry Smith     if (pc) PetscCall(VecGetArrayRead(B, &sb));
4196497c311SBarry Smith     PetscCall(PetscMPIIntCast(n, &in));
4206497c311SBarry Smith     PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm));
421f1f2ae84SBarry Smith     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
4229f0612e4SBarry Smith     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
4239f0612e4SBarry Smith     // TODO: scatter initial guess if needed
4249f0612e4SBarry Smith   } else {
4259f0612e4SBarry Smith     PetscInt rstart;
4269f0612e4SBarry Smith 
4279f0612e4SBarry Smith     if (pc) PetscCall(VecGetArrayRead(B, &sb));
4289f0612e4SBarry Smith     if (pc) PetscCall(VecGetArray(X, &sx));
4299f0612e4SBarry Smith     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
4309f0612e4SBarry Smith     if (pc) PetscCall(VecRestoreArray(X, &sx));
4319f0612e4SBarry Smith     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
4329f0612e4SBarry Smith 
4339f0612e4SBarry Smith     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
4349f0612e4SBarry Smith     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
4359f0612e4SBarry Smith     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
4369f0612e4SBarry Smith     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
4379f0612e4SBarry Smith   }
4389f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
439f1f2ae84SBarry Smith 
44045682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4410316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
442f1f2ae84SBarry Smith   PetscCall(KSPSolve(ksp, NULL, NULL));
4430316ec64SBarry Smith   PetscCall(PetscLogStagePop());
44445682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
4455316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(ksp, &its));
4465316cbedSBarry Smith   PCMPIIterations[size - 1] += its;
4479f0612e4SBarry Smith   // TODO: send iterations up to outer KSP
4489f0612e4SBarry Smith 
4499f0612e4SBarry Smith   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
450f1f2ae84SBarry Smith 
451f1f2ae84SBarry Smith   /* gather solution */
4529f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
4539f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
4546497c311SBarry Smith     PetscMPIInt in;
4556497c311SBarry Smith 
456f1f2ae84SBarry Smith     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
457f1f2ae84SBarry Smith     if (pc) PetscCall(VecGetArray(X, &sx));
4586497c311SBarry Smith     PetscCall(PetscMPIIntCast(n, &in));
4596497c311SBarry Smith     PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
460f1f2ae84SBarry Smith     if (pc) PetscCall(VecRestoreArray(X, &sx));
461f1f2ae84SBarry Smith     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
4629f0612e4SBarry Smith   } else {
4639f0612e4SBarry Smith     PetscCall(VecResetArray(ksp->vec_rhs));
4649f0612e4SBarry Smith     PetscCall(VecResetArray(ksp->vec_sol));
4659f0612e4SBarry Smith   }
4669f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
46745682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4689f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
4693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
470f1f2ae84SBarry Smith }
471f1f2ae84SBarry Smith 
472d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc)
473d71ae5a4SJacob Faibussowitsch {
474f1f2ae84SBarry Smith   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
475f1f2ae84SBarry Smith   KSP      ksp;
476f1f2ae84SBarry Smith   MPI_Comm comm = PC_MPI_COMM_WORLD;
477f1f2ae84SBarry Smith 
478f1f2ae84SBarry Smith   PetscFunctionBegin;
479f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
4803ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
481c7d372c4SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
4829f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
483f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&ksp));
484c7d372c4SBarry Smith   PetscCall(PetscLogStagePop());
4859f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
4863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
487f1f2ae84SBarry Smith }
488f1f2ae84SBarry Smith 
4899f0612e4SBarry Smith static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
4909f0612e4SBarry Smith {
4919f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4929f0612e4SBarry Smith   PetscMPIInt dummy1 = 1, dummy2;
4939f0612e4SBarry Smith #endif
4949f0612e4SBarry Smith 
4959f0612e4SBarry Smith   PetscFunctionBegin;
4969f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4979f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
4989f0612e4SBarry Smith     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
4999f0612e4SBarry Smith   }
5009f0612e4SBarry Smith #endif
5019f0612e4SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
5029f0612e4SBarry Smith   /* next line ensures the sender has already taken the lock */
5039f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
5049f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
5059f0612e4SBarry Smith     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
5069f0612e4SBarry Smith     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
5079f0612e4SBarry Smith   }
5089f0612e4SBarry Smith #endif
5099f0612e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
5109f0612e4SBarry Smith }
5113821be0aSBarry Smith 
512f1f2ae84SBarry Smith /*@C
5137a99bfcaSBarry Smith   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
514f1580f4eSBarry Smith   parallel `KSP` solves and management of parallel `KSP` objects.
515f1f2ae84SBarry Smith 
5163821be0aSBarry Smith   Logically Collective on all MPI processes except rank 0
517f1f2ae84SBarry Smith 
518f1580f4eSBarry Smith   Options Database Keys:
519f1f2ae84SBarry Smith + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
5209f0612e4SBarry Smith . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
5219f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
522f1f2ae84SBarry Smith 
52320f4b53cSBarry Smith   Level: developer
52420f4b53cSBarry Smith 
525f1580f4eSBarry Smith   Note:
526f1f2ae84SBarry Smith   This is normally started automatically in `PetscInitialize()` when the option is provided
527f1f2ae84SBarry Smith 
5283821be0aSBarry Smith   See `PCMPI` for information on using the solver with a `KSP` object
5293821be0aSBarry Smith 
530f1f2ae84SBarry Smith   Developer Notes:
5313821be0aSBarry Smith   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
532f1580f4eSBarry Smith   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
533f1580f4eSBarry Smith   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
534f1f2ae84SBarry Smith 
535f1580f4eSBarry Smith   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
536f1f2ae84SBarry Smith 
5373821be0aSBarry Smith   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
5383821be0aSBarry Smith 
5393821be0aSBarry Smith   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
5403821be0aSBarry Smith 
541baca6076SPierre Jolivet   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
5423821be0aSBarry Smith   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
5433821be0aSBarry Smith 
5447a99bfcaSBarry Smith   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
5453821be0aSBarry Smith   all MPI processes but the user callback only runs on one MPI process per node.
5463821be0aSBarry Smith 
5473821be0aSBarry Smith   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
5483821be0aSBarry Smith   the `MPI_Comm` argument from PETSc calls.
5493821be0aSBarry Smith 
5503821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
551f1f2ae84SBarry Smith @*/
552d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void)
553d71ae5a4SJacob Faibussowitsch {
554f1f2ae84SBarry Smith   PetscMPIInt rank;
555f1f2ae84SBarry Smith 
556f1f2ae84SBarry Smith   PetscFunctionBegin;
5579d3446b2SPierre Jolivet   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
5585e1a0e3cSBarry Smith   if (PetscDefined(USE_SINGLE_LIBRARY)) {
5595e1a0e3cSBarry Smith     PetscCall(VecInitializePackage());
5605e1a0e3cSBarry Smith     PetscCall(MatInitializePackage());
5615e1a0e3cSBarry Smith     PetscCall(DMInitializePackage());
5625e1a0e3cSBarry Smith     PetscCall(PCInitializePackage());
5635e1a0e3cSBarry Smith     PetscCall(KSPInitializePackage());
5645e1a0e3cSBarry Smith     PetscCall(SNESInitializePackage());
5655e1a0e3cSBarry Smith     PetscCall(TSInitializePackage());
5665e1a0e3cSBarry Smith     PetscCall(TaoInitializePackage());
5675e1a0e3cSBarry Smith   }
568956255efSBarry Smith   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
56945682376SBarry Smith   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
5709f0612e4SBarry Smith   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
5719f0612e4SBarry Smith 
5729f0612e4SBarry Smith   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
5739f0612e4SBarry Smith   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
5745e1a0e3cSBarry Smith 
575f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
5769f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
5779f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
5789f0612e4SBarry Smith     PetscMPIInt size;
5799f0612e4SBarry Smith 
5809f0612e4SBarry Smith     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
5819f0612e4SBarry Smith     if (size > 1) {
5829f0612e4SBarry Smith       pthread_mutex_t *locks;
5839f0612e4SBarry Smith 
5849f0612e4SBarry Smith       if (rank == 0) {
5859f0612e4SBarry Smith         PCMPIServerActive = PETSC_TRUE;
5869f0612e4SBarry Smith         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
5879f0612e4SBarry Smith       }
5889f0612e4SBarry Smith       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
5899f0612e4SBarry Smith       if (rank == 0) {
5909f0612e4SBarry Smith         pthread_mutexattr_t attr;
5919f0612e4SBarry Smith 
5929f0612e4SBarry Smith         pthread_mutexattr_init(&attr);
5939f0612e4SBarry Smith         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
5949f0612e4SBarry Smith 
5959f0612e4SBarry Smith         for (int i = 1; i < size; i++) {
5969f0612e4SBarry Smith           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
5979f0612e4SBarry Smith           pthread_mutex_lock(&PCMPIServerLocks[i]);
5989f0612e4SBarry Smith         }
5999f0612e4SBarry Smith       }
6009f0612e4SBarry Smith       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
6019f0612e4SBarry Smith     }
6029f0612e4SBarry Smith #endif
6039f0612e4SBarry Smith   }
604f1f2ae84SBarry Smith   if (rank == 0) {
605f1f2ae84SBarry Smith     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
6063821be0aSBarry Smith     PCMPIServerActive = PETSC_TRUE;
6073ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
608f1f2ae84SBarry Smith   }
609f1f2ae84SBarry Smith 
610f1f2ae84SBarry Smith   while (PETSC_TRUE) {
611f1f2ae84SBarry Smith     PCMPICommand request = PCMPI_CREATE;
61266a7e86cSPierre Jolivet #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6139f0612e4SBarry Smith     PetscMPIInt dummy1 = 1, dummy2;
61466a7e86cSPierre Jolivet #endif
6159f0612e4SBarry Smith 
616d7c1f440SPierre Jolivet     // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
6179f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6189f0612e4SBarry Smith     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
6199f0612e4SBarry Smith #endif
620f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
6219f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6229f0612e4SBarry Smith     if (PCMPIServerUseShmget) {
6239f0612e4SBarry Smith       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
6249f0612e4SBarry Smith       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
6259f0612e4SBarry Smith       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
6269f0612e4SBarry Smith     }
6279f0612e4SBarry Smith #endif
628f1f2ae84SBarry Smith     switch (request) {
629d71ae5a4SJacob Faibussowitsch     case PCMPI_CREATE:
630d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPICreate(NULL));
631d71ae5a4SJacob Faibussowitsch       break;
632d71ae5a4SJacob Faibussowitsch     case PCMPI_SET_MAT:
633d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISetMat(NULL));
634d71ae5a4SJacob Faibussowitsch       break;
635d71ae5a4SJacob Faibussowitsch     case PCMPI_UPDATE_MAT_VALUES:
636d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIUpdateMatValues(NULL));
637d71ae5a4SJacob Faibussowitsch       break;
638f1f2ae84SBarry Smith     case PCMPI_VIEW:
639f1f2ae84SBarry Smith       // PetscCall(PCMPIView(NULL));
640f1f2ae84SBarry Smith       break;
641d71ae5a4SJacob Faibussowitsch     case PCMPI_SOLVE:
642d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISolve(NULL, NULL, NULL));
643d71ae5a4SJacob Faibussowitsch       break;
644d71ae5a4SJacob Faibussowitsch     case PCMPI_DESTROY:
645d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIDestroy(NULL));
646d71ae5a4SJacob Faibussowitsch       break;
647f1f2ae84SBarry Smith     case PCMPI_EXIT:
6489f0612e4SBarry Smith       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
649f1f2ae84SBarry Smith       PetscCall(PetscFinalize());
650f1f2ae84SBarry Smith       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
651f1f2ae84SBarry Smith       break;
652d71ae5a4SJacob Faibussowitsch     default:
653d71ae5a4SJacob Faibussowitsch       break;
654f1f2ae84SBarry Smith     }
655f1f2ae84SBarry Smith   }
6563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
657f1f2ae84SBarry Smith }
658f1f2ae84SBarry Smith 
659f1f2ae84SBarry Smith /*@C
660f1f2ae84SBarry Smith   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
661f1580f4eSBarry Smith   parallel KSP solves and management of parallel `KSP` objects.
662f1f2ae84SBarry Smith 
66320f4b53cSBarry Smith   Logically Collective on all MPI ranks except 0
66420f4b53cSBarry Smith 
66520f4b53cSBarry Smith   Level: developer
666f1f2ae84SBarry Smith 
667f1580f4eSBarry Smith   Note:
6689f0612e4SBarry Smith   This is normally called automatically in `PetscFinalize()`
669f1f2ae84SBarry Smith 
6703821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
671f1f2ae84SBarry Smith @*/
672d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void)
673d71ae5a4SJacob Faibussowitsch {
674f1f2ae84SBarry Smith   PetscFunctionBegin;
675f1f2ae84SBarry Smith   if (PetscGlobalRank == 0) {
676f1f2ae84SBarry Smith     PetscViewer       viewer = NULL;
677f1f2ae84SBarry Smith     PetscViewerFormat format;
678f1f2ae84SBarry Smith 
6799f0612e4SBarry Smith     PetscCall(PetscShmgetAddressesFinalize());
6809f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
6819f0612e4SBarry Smith     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
682f1f2ae84SBarry Smith     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
683f1f2ae84SBarry Smith     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
684f1f2ae84SBarry Smith     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
685f1f2ae84SBarry Smith     PetscOptionsEnd();
686f1f2ae84SBarry Smith     if (viewer) {
687f1f2ae84SBarry Smith       PetscBool isascii;
688f1f2ae84SBarry Smith 
689f1f2ae84SBarry Smith       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
690f1f2ae84SBarry Smith       if (isascii) {
691f1f2ae84SBarry Smith         PetscMPIInt size;
6925316cbedSBarry Smith         PetscMPIInt i;
693f1f2ae84SBarry Smith 
694f1f2ae84SBarry Smith         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
6955316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
6965316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
6975316cbedSBarry Smith         if (PCMPIKSPCountsSeq) {
6985316cbedSBarry Smith           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
699f1f2ae84SBarry Smith         }
7005316cbedSBarry Smith         for (i = 0; i < size; i++) {
7015316cbedSBarry Smith           if (PCMPIKSPCounts[i]) {
7025316cbedSBarry Smith             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
7035316cbedSBarry Smith           }
7045316cbedSBarry Smith         }
7059f0612e4SBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
706f1f2ae84SBarry Smith       }
707648c30bcSBarry Smith       PetscCall(PetscViewerDestroy(&viewer));
708f1f2ae84SBarry Smith     }
709f1f2ae84SBarry Smith   }
710f1f2ae84SBarry Smith   PetscCall(PCMPICommsDestroy());
7113821be0aSBarry Smith   PCMPIServerActive = PETSC_FALSE;
7123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
713f1f2ae84SBarry Smith }
714f1f2ae84SBarry Smith 
715f1f2ae84SBarry Smith /*
716f1f2ae84SBarry Smith     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
717f1f2ae84SBarry Smith     because, for example, the problem is small. This version is more efficient because it does not require copying any data
718f1f2ae84SBarry Smith */
719d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc)
720d71ae5a4SJacob Faibussowitsch {
721f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
722f1f2ae84SBarry Smith   Mat         sA;
723f1f2ae84SBarry Smith   const char *prefix;
724f9818f3cSJose E. Roman   char       *found = NULL, *cprefix;
725f1f2ae84SBarry Smith 
726f1f2ae84SBarry Smith   PetscFunctionBegin;
7279f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
728f1f2ae84SBarry Smith   PetscCall(PCGetOperators(pc, NULL, &sA));
729f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
730f1f2ae84SBarry Smith   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
7313821be0aSBarry Smith   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
7323821be0aSBarry Smith   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
7333821be0aSBarry Smith 
7343821be0aSBarry Smith   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
7353821be0aSBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
7363821be0aSBarry Smith   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
7373821be0aSBarry Smith   PetscCall(PetscStrallocpy(prefix, &cprefix));
7383821be0aSBarry Smith   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
7393821be0aSBarry Smith   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
7403821be0aSBarry Smith   *found = 0;
7413821be0aSBarry Smith   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
7423821be0aSBarry Smith   PetscCall(PetscFree(cprefix));
7433821be0aSBarry Smith 
744f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
745f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(km->ksps[0]));
746f1f2ae84SBarry Smith   PetscCall(KSPSetUp(km->ksps[0]));
747*835f2295SStefano Zampini   PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
748f1f2ae84SBarry Smith   PCMPIKSPCountsSeq++;
7499f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
7503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
751f1f2ae84SBarry Smith }
752f1f2ae84SBarry Smith 
753d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
754d71ae5a4SJacob Faibussowitsch {
755f1f2ae84SBarry Smith   PC_MPI  *km = (PC_MPI *)pc->data;
7565316cbedSBarry Smith   PetscInt its, n;
7575316cbedSBarry Smith   Mat      A;
758f1f2ae84SBarry Smith 
759f1f2ae84SBarry Smith   PetscFunctionBegin;
7609f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
761f1f2ae84SBarry Smith   PetscCall(KSPSolve(km->ksps[0], b, x));
7625316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
763f1f2ae84SBarry Smith   PCMPISolveCountsSeq++;
7645316cbedSBarry Smith   PCMPIIterationsSeq += its;
7655316cbedSBarry Smith   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
7665316cbedSBarry Smith   PetscCall(MatGetSize(A, &n, NULL));
7675316cbedSBarry Smith   PCMPISizesSeq += n;
7689f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
7699f0612e4SBarry Smith   /*
7709f0612e4SBarry Smith     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
7719f0612e4SBarry Smith     my use PetscFree() instead of PCMPIArrayDeallocate()
7729f0612e4SBarry Smith   */
7739f0612e4SBarry Smith   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
7749f0612e4SBarry Smith   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
7753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
776f1f2ae84SBarry Smith }
777f1f2ae84SBarry Smith 
778d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
779d71ae5a4SJacob Faibussowitsch {
780f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
781f1f2ae84SBarry Smith 
782f1f2ae84SBarry Smith   PetscFunctionBegin;
783f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
784*835f2295SStefano Zampini   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
7855316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
7863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
787f1f2ae84SBarry Smith }
788f1f2ae84SBarry Smith 
789d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc)
790d71ae5a4SJacob Faibussowitsch {
791f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
7929f0612e4SBarry Smith   Mat     A, B;
7939f0612e4SBarry Smith   Vec     x, b;
794f1f2ae84SBarry Smith 
795f1f2ae84SBarry Smith   PetscFunctionBegin;
7969f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
7979f0612e4SBarry Smith   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
7989f0612e4SBarry Smith   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
7999f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)A));
8009f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)B));
8019f0612e4SBarry Smith   PetscCall(KSPGetSolution(km->ksps[0], &x));
8029f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)x));
8039f0612e4SBarry Smith   PetscCall(KSPGetRhs(km->ksps[0], &b));
8049f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)b));
805f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&km->ksps[0]));
806f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
8079f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
8089f0612e4SBarry Smith   PetscCall(MatDestroy(&A));
8099f0612e4SBarry Smith   PetscCall(MatDestroy(&B));
8109f0612e4SBarry Smith   PetscCall(VecDestroy(&x));
8119f0612e4SBarry Smith   PetscCall(VecDestroy(&b));
8123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
813f1f2ae84SBarry Smith }
814f1f2ae84SBarry Smith 
815f1f2ae84SBarry Smith /*
816f1f2ae84SBarry Smith      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
817dd8e379bSPierre Jolivet      right-hand side to the parallel PC
818f1f2ae84SBarry Smith */
819d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc)
820d71ae5a4SJacob Faibussowitsch {
821f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
822f1f2ae84SBarry Smith   PetscMPIInt rank, size;
823f1f2ae84SBarry Smith   PetscBool   newmatrix = PETSC_FALSE;
824f1f2ae84SBarry Smith 
825f1f2ae84SBarry Smith   PetscFunctionBegin;
826f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
827f1f2ae84SBarry Smith   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
828f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
829f1f2ae84SBarry Smith 
830f1f2ae84SBarry Smith   if (!pc->setupcalled) {
831f1f2ae84SBarry Smith     if (!km->alwaysuseserver) {
832f1f2ae84SBarry Smith       PetscInt n;
833f1f2ae84SBarry Smith       Mat      sA;
834f1f2ae84SBarry Smith       /* short circuit for small systems */
835f1f2ae84SBarry Smith       PetscCall(PCGetOperators(pc, &sA, &sA));
836f1f2ae84SBarry Smith       PetscCall(MatGetSize(sA, &n, NULL));
837f1f2ae84SBarry Smith       if (n < 2 * km->mincntperrank - 1 || size == 1) {
838f1f2ae84SBarry Smith         pc->ops->setup   = NULL;
839f1f2ae84SBarry Smith         pc->ops->apply   = PCApply_Seq;
840f1f2ae84SBarry Smith         pc->ops->destroy = PCDestroy_Seq;
841f1f2ae84SBarry Smith         pc->ops->view    = PCView_Seq;
842f1f2ae84SBarry Smith         PetscCall(PCSetUp_Seq(pc));
8433ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
844f1f2ae84SBarry Smith       }
845f1f2ae84SBarry Smith     }
846f1f2ae84SBarry Smith 
8479f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
848f1f2ae84SBarry Smith     PetscCall(PCMPICreate(pc));
849f1f2ae84SBarry Smith     newmatrix = PETSC_TRUE;
8509371c9d4SSatish Balay   }
8519371c9d4SSatish Balay   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
852f1f2ae84SBarry Smith 
853f1f2ae84SBarry Smith   if (newmatrix) {
854*835f2295SStefano Zampini     PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
8559f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
856f1f2ae84SBarry Smith     PetscCall(PCMPISetMat(pc));
857f1f2ae84SBarry Smith   } else {
858*835f2295SStefano Zampini     PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
8599f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
860f1f2ae84SBarry Smith     PetscCall(PCMPIUpdateMatValues(pc));
861f1f2ae84SBarry Smith   }
8623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
863f1f2ae84SBarry Smith }
864f1f2ae84SBarry Smith 
865d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
866d71ae5a4SJacob Faibussowitsch {
867f1f2ae84SBarry Smith   PetscFunctionBegin;
8689f0612e4SBarry Smith   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
869f1f2ae84SBarry Smith   PetscCall(PCMPISolve(pc, b, x));
8703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
871f1f2ae84SBarry Smith }
872f1f2ae84SBarry Smith 
87366976f2fSJacob Faibussowitsch static PetscErrorCode PCDestroy_MPI(PC pc)
874d71ae5a4SJacob Faibussowitsch {
875f1f2ae84SBarry Smith   PetscFunctionBegin;
8769f0612e4SBarry Smith   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
877f1f2ae84SBarry Smith   PetscCall(PCMPIDestroy(pc));
878f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
8793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
880f1f2ae84SBarry Smith }
881f1f2ae84SBarry Smith 
882f1f2ae84SBarry Smith /*
8839f0612e4SBarry Smith      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
884f1f2ae84SBarry Smith */
88566976f2fSJacob Faibussowitsch static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
886d71ae5a4SJacob Faibussowitsch {
887f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
888f1f2ae84SBarry Smith   MPI_Comm    comm;
889f1f2ae84SBarry Smith   PetscMPIInt size;
890f1f2ae84SBarry Smith 
891f1f2ae84SBarry Smith   PetscFunctionBegin;
892f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
893f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
894f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
895*835f2295SStefano Zampini   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
8969f0612e4SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
8973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
898f1f2ae84SBarry Smith }
899f1f2ae84SBarry Smith 
90066976f2fSJacob Faibussowitsch static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
901d71ae5a4SJacob Faibussowitsch {
902f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
903f1f2ae84SBarry Smith 
904f1f2ae84SBarry Smith   PetscFunctionBegin;
905f1f2ae84SBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
9063821be0aSBarry Smith   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
9073821be0aSBarry Smith   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
908f1f2ae84SBarry Smith   PetscOptionsHeadEnd();
9093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
910f1f2ae84SBarry Smith }
911f1f2ae84SBarry Smith 
912f1f2ae84SBarry Smith /*MC
913f1580f4eSBarry Smith      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
914f1f2ae84SBarry Smith 
9153821be0aSBarry Smith    Options Database Keys for the Server:
916f1f2ae84SBarry Smith +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
9179f0612e4SBarry Smith .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
9189f0612e4SBarry Smith -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
919f1f2ae84SBarry Smith 
9203821be0aSBarry Smith    Options Database Keys for a specific `KSP` object
9213821be0aSBarry Smith +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
9223821be0aSBarry Smith -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
9233821be0aSBarry Smith 
9243821be0aSBarry Smith    Level: developer
92520f4b53cSBarry Smith 
926f1f2ae84SBarry Smith    Notes:
9279f0612e4SBarry Smith    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
9289f0612e4SBarry Smith    `MatCreateSeqAIJWithArrays()`
9299f0612e4SBarry Smith 
93046bbbc36SPierre Jolivet    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
931f1f2ae84SBarry Smith 
932f1f2ae84SBarry Smith    It can be particularly useful for user OpenMP code or potentially user GPU code.
933f1f2ae84SBarry Smith 
934dd8e379bSPierre Jolivet    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
9353821be0aSBarry Smith    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
936f1f2ae84SBarry Smith 
9373821be0aSBarry Smith    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
9380316ec64SBarry Smith    because they are not the actual solver objects.
9390316ec64SBarry Smith 
9400316ec64SBarry Smith    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
94168a21331SBarry Smith    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
9420316ec64SBarry Smith 
9433821be0aSBarry Smith    Developer Note:
9443821be0aSBarry Smith    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
9453821be0aSBarry Smith    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
9463821be0aSBarry Smith 
9473821be0aSBarry Smith .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
948f1f2ae84SBarry Smith M*/
949d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
950d71ae5a4SJacob Faibussowitsch {
951f1f2ae84SBarry Smith   PC_MPI *km;
952f9818f3cSJose E. Roman   char   *found = NULL;
953f1f2ae84SBarry Smith 
954f1f2ae84SBarry Smith   PetscFunctionBegin;
9553821be0aSBarry Smith   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
9563821be0aSBarry Smith   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
9573821be0aSBarry Smith 
9583821be0aSBarry Smith   /* material from PCSetType() */
9593821be0aSBarry Smith   PetscTryTypeMethod(pc, destroy);
9603821be0aSBarry Smith   pc->ops->destroy = NULL;
9613821be0aSBarry Smith   pc->data         = NULL;
9623821be0aSBarry Smith 
9633821be0aSBarry Smith   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
9643821be0aSBarry Smith   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
9653821be0aSBarry Smith   pc->modifysubmatrices  = NULL;
9663821be0aSBarry Smith   pc->modifysubmatricesP = NULL;
9673821be0aSBarry Smith   pc->setupcalled        = 0;
9683821be0aSBarry Smith 
9694dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&km));
970f1f2ae84SBarry Smith   pc->data = (void *)km;
971f1f2ae84SBarry Smith 
972f1f2ae84SBarry Smith   km->mincntperrank = 10000;
973f1f2ae84SBarry Smith 
974f1f2ae84SBarry Smith   pc->ops->setup          = PCSetUp_MPI;
975f1f2ae84SBarry Smith   pc->ops->apply          = PCApply_MPI;
976f1f2ae84SBarry Smith   pc->ops->destroy        = PCDestroy_MPI;
977f1f2ae84SBarry Smith   pc->ops->view           = PCView_MPI;
978f1f2ae84SBarry Smith   pc->ops->setfromoptions = PCSetFromOptions_MPI;
9793821be0aSBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
9803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
981f1f2ae84SBarry Smith }
9829f0612e4SBarry Smith 
9839f0612e4SBarry Smith /*@
9849f0612e4SBarry Smith   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
9859f0612e4SBarry Smith 
9869f0612e4SBarry Smith   Not Collective
9879f0612e4SBarry Smith 
9889f0612e4SBarry Smith   Input Parameter:
9899f0612e4SBarry Smith . pc - the preconditioner context
9909f0612e4SBarry Smith 
9919f0612e4SBarry Smith   Output Parameter:
9929f0612e4SBarry Smith . innerksp - the inner `KSP`
9939f0612e4SBarry Smith 
9949f0612e4SBarry Smith   Level: advanced
9959f0612e4SBarry Smith 
9969f0612e4SBarry Smith .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
9979f0612e4SBarry Smith @*/
9989f0612e4SBarry Smith PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
9999f0612e4SBarry Smith {
10009f0612e4SBarry Smith   PC_MPI *red = (PC_MPI *)pc->data;
10019f0612e4SBarry Smith 
10029f0612e4SBarry Smith   PetscFunctionBegin;
10039f0612e4SBarry Smith   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
10049f0612e4SBarry Smith   PetscAssertPointer(innerksp, 2);
10059f0612e4SBarry Smith   *innerksp = red->ksps[0];
10069f0612e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
10079f0612e4SBarry Smith }
1008