xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 42e5ec60e3f6d3f21d92afbe12a337913bc9ad72)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
18   #include <pthread.h>
19 #endif
20 
21 #define PC_MPI_MAX_RANKS  256
22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23 
24 typedef struct {
25   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
28   PetscInt    mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
29   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30 } PC_MPI;
31 
32 typedef enum {
33   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34   PCMPI_CREATE,
35   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
36   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37   PCMPI_SOLVE,
38   PCMPI_VIEW,
39   PCMPI_DESTROY /* destroy a PC that is no longer needed */
40 } PCMPICommand;
41 
42 static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
43 static PetscBool     PCMPICommSet = PETSC_FALSE;
44 static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
45 static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
46 static PetscLogEvent EventServerDist, EventServerDistMPI;
47 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
48 static pthread_mutex_t *PCMPIServerLocks;
49 #else
50 static void *PCMPIServerLocks;
51 #endif
52 
53 static PetscErrorCode PCMPICommsCreate(void)
54 {
55   MPI_Comm    comm = PC_MPI_COMM_WORLD;
56   PetscMPIInt size, rank, i;
57 
58   PetscFunctionBegin;
59   PetscCallMPI(MPI_Comm_size(comm, &size));
60   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62   /* comm for size 1 is useful only for debugging */
63   for (i = 0; i < size; i++) {
64     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66     PCMPISolveCounts[i] = 0;
67     PCMPIKSPCounts[i]   = 0;
68     PCMPIIterations[i]  = 0;
69     PCMPISizes[i]       = 0;
70   }
71   PCMPICommSet = PETSC_TRUE;
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 static PetscErrorCode PCMPICommsDestroy(void)
76 {
77   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78   PetscMPIInt size, rank, i;
79 
80   PetscFunctionBegin;
81   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82   PetscCallMPI(MPI_Comm_size(comm, &size));
83   PetscCallMPI(MPI_Comm_rank(comm, &rank));
84   for (i = 0; i < size; i++) {
85     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86   }
87   PCMPICommSet = PETSC_FALSE;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode PCMPICreate(PC pc)
92 {
93   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
94   MPI_Comm    comm = PC_MPI_COMM_WORLD;
95   KSP         ksp;
96   PetscInt    N[2], mincntperrank = 0;
97   PetscMPIInt size;
98   Mat         sA;
99   char       *cprefix = NULL;
100   PetscMPIInt len     = 0;
101 
102   PetscFunctionBegin;
103   PCMPIServerInSolve = PETSC_TRUE;
104   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105   PetscCallMPI(MPI_Comm_size(comm, &size));
106   if (pc) {
107     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108     PetscCall(PCGetOperators(pc, &sA, &sA));
109     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110   }
111   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112 
113   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114   if (km) mincntperrank = km->mincntperrank;
115   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117   if (comm == MPI_COMM_NULL) {
118     ksp                = NULL;
119     PCMPIServerInSolve = PETSC_FALSE;
120     PetscFunctionReturn(PETSC_SUCCESS);
121   }
122   PetscCall(PetscLogStagePush(PCMPIStage));
123   PetscCall(KSPCreate(comm, &ksp));
124   PetscCall(KSPSetNestLevel(ksp, 1));
125   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
126   PetscCall(PetscLogStagePop());
127   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128   if (pc) {
129     size_t      slen;
130     const char *prefix = NULL;
131     char       *found  = NULL;
132 
133     PetscCallMPI(MPI_Comm_size(comm, &size));
134     PCMPIKSPCounts[size - 1]++;
135     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
136     PetscCall(PCGetOptionsPrefix(pc, &prefix));
137     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
138     PetscCall(PetscStrallocpy(prefix, &cprefix));
139     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
140     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
141     *found = 0;
142     PetscCall(PetscStrlen(cprefix, &slen));
143     PetscCall(PetscMPIIntCast(slen, &len));
144   }
145   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146   if (len) {
147     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
148     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
149     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150   }
151   PetscCall(PetscFree(cprefix));
152   PCMPIServerInSolve = PETSC_FALSE;
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 static PetscErrorCode PCMPISetMat(PC pc)
157 {
158   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159   Mat                A;
160   PetscInt           m, n, j, bs;
161   Mat                sA;
162   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163   KSP                ksp;
164   PetscLayout        layout;
165   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166   const PetscInt    *range;
167   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
168   const PetscScalar *a                = NULL, *sa;
169   PetscInt           matproperties[8] = {0}, rstart, rend;
170   char              *cprefix;
171 
172   PetscFunctionBegin;
173   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
174   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
175   PCMPIServerInSolve = PETSC_TRUE;
176   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178   if (pc) {
179     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
180     const char *prefix;
181     size_t      clen;
182 
183     PetscCallMPI(MPI_Comm_size(comm, &size));
184     PCMPIMatCounts[size - 1]++;
185     PetscCall(PCGetOperators(pc, &sA, &sA));
186     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187     PetscCall(MatGetBlockSize(sA, &bs));
188     matproperties[2] = bs;
189     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
190     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
191     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
192     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
193     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
194     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
195     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
196     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
197     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
198     PetscCall(MatGetOptionsPrefix(sA, &prefix));
199     PetscCall(PetscStrallocpy(prefix, &cprefix));
200     PetscCall(PetscStrlen(cprefix, &clen));
201     matproperties[7] = (PetscInt)clen;
202   }
203   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
204 
205   /* determine ownership ranges of matrix columns */
206   PetscCall(PetscLayoutCreate(comm, &layout));
207   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
208   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209   PetscCall(PetscLayoutSetUp(layout));
210   PetscCall(PetscLayoutGetLocalSize(layout, &n));
211   PetscCall(PetscLayoutDestroy(&layout));
212 
213   /* determine ownership ranges of matrix rows */
214   PetscCall(PetscLayoutCreate(comm, &layout));
215   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
216   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
217   PetscCall(PetscLayoutSetUp(layout));
218   PetscCall(PetscLayoutGetLocalSize(layout, &m));
219   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
220 
221   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222   /* copy over the matrix nonzero structure and values */
223   if (pc) {
224     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
225     if (!PCMPIServerUseShmget) {
226       NZ      = km->NZ;
227       NZdispl = km->NZdispl;
228       PetscCall(PetscLayoutGetRanges(layout, &range));
229       for (i = 0; i < size; i++) {
230         PetscCall(PetscMPIIntCast(1 + range[i + 1] - range[i], &sendcounti[i]));
231         PetscCall(PetscMPIIntCast(IA[range[i + 1]] - IA[range[i]], &NZ[i]));
232       }
233       displi[0]  = 0;
234       NZdispl[0] = 0;
235       for (j = 1; j < size; j++) {
236         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
237         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238       }
239     }
240     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241   }
242   PetscCall(PetscLayoutDestroy(&layout));
243 
244   PetscCall(MatCreate(comm, &A));
245   if (matproperties[7] > 0) {
246     PetscMPIInt ni;
247 
248     PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
249     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
250     PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
251     PetscCall(MatSetOptionsPrefix(A, cprefix));
252     PetscCall(PetscFree(cprefix));
253   }
254   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
255   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
256   PetscCall(MatSetType(A, MATMPIAIJ));
257 
258   if (!PCMPIServerUseShmget) {
259     PetscMPIInt in;
260     PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
261     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
262     PetscCall(PetscMPIIntCast(n, &in));
263     PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm));
264     PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
265     PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
266   } else {
267     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
268     PCMPIServerAddresses *addresses;
269 
270     PetscCall(PetscNew(&addresses));
271     addresses->n = 3;
272     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
273     ia = rstart + (PetscInt *)addresses->addr[0];
274     ja = ia[0] + (PetscInt *)addresses->addr[1];
275     a  = ia[0] + (PetscScalar *)addresses->addr[2];
276     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
277   }
278 
279   if (pc) {
280     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
281     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
282   }
283   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
284 
285   PetscCall(PetscLogStagePush(PCMPIStage));
286   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
287   PetscCall(MatSetBlockSize(A, matproperties[2]));
288 
289   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
290   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
291   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
292   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
293 
294   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
295   PetscCall(KSPSetOperators(ksp, A, A));
296   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
297   PetscCall(PetscLogStagePop());
298   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
299     const PetscInt *range;
300 
301     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
302     for (i = 0; i < size; i++) {
303       PetscCall(PetscMPIIntCast(range[i + 1] - range[i], &km->sendcount[i]));
304       PetscCall(PetscMPIIntCast(range[i], &km->displ[i]));
305     }
306   }
307   PetscCall(MatDestroy(&A));
308   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
309   PetscCall(KSPSetFromOptions(ksp));
310   PCMPIServerInSolve = PETSC_FALSE;
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
315 {
316   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
317   KSP                ksp;
318   Mat                sA, A;
319   MPI_Comm           comm = PC_MPI_COMM_WORLD;
320   const PetscInt    *ia, *IA;
321   const PetscScalar *a;
322   PetscCount         nz;
323   const PetscScalar *sa = NULL;
324   PetscMPIInt        size;
325   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};
326 
327   PetscFunctionBegin;
328   if (pc) {
329     PetscCall(PCGetOperators(pc, &sA, &sA));
330     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
331     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
332   }
333   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
334   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
335   PCMPIServerInSolve = PETSC_TRUE;
336   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
337   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
338   PetscCallMPI(MPI_Comm_size(comm, &size));
339   PCMPIMatCounts[size - 1]++;
340   PetscCall(KSPGetOperators(ksp, NULL, &A));
341   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
342   if (!PCMPIServerUseShmget) {
343     PetscMPIInt mpi_nz;
344 
345     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
346     PetscCall(PetscMPIIntCast(nz, &mpi_nz));
347     PetscCall(PetscMalloc1(nz, &a));
348     PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm));
349   } else {
350     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
351     PCMPIServerAddresses *addresses;
352     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
353     ia = rstart + (PetscInt *)addresses->addr[0];
354     a  = ia[0] + (PetscScalar *)addresses->addr[2];
355   }
356   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
357   if (pc) {
358     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
359 
360     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
361     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
362 
363     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
364     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
365     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
366     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
367     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
368     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
369     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
370     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
371   }
372   PetscCall(MatUpdateMPIAIJWithArray(A, a));
373   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
374   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
375   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
376   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
377   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
378   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
379   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
380   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
381   PCMPIServerInSolve = PETSC_FALSE;
382   PetscFunctionReturn(PETSC_SUCCESS);
383 }
384 
385 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
386 {
387   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
388   KSP                ksp;
389   MPI_Comm           comm = PC_MPI_COMM_WORLD;
390   const PetscScalar *sb   = NULL, *x;
391   PetscScalar       *b, *sx = NULL;
392   PetscInt           its, n;
393   PetscMPIInt        size;
394   void              *addr[2];
395 
396   PetscFunctionBegin;
397   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
398   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
399   PCMPIServerInSolve = PETSC_TRUE;
400   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
401   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
402 
403   /* scatterv rhs */
404   PetscCallMPI(MPI_Comm_size(comm, &size));
405   if (pc) {
406     PetscInt N;
407 
408     PCMPISolveCounts[size - 1]++;
409     PetscCall(MatGetSize(pc->pmat, &N, NULL));
410     PCMPISizes[size - 1] += N;
411   }
412   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
413   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
414   if (!PCMPIServerUseShmget) {
415     PetscMPIInt in;
416 
417     PetscCall(VecGetArray(ksp->vec_rhs, &b));
418     if (pc) PetscCall(VecGetArrayRead(B, &sb));
419     PetscCall(PetscMPIIntCast(n, &in));
420     PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm));
421     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
422     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
423     // TODO: scatter initial guess if needed
424   } else {
425     PetscInt rstart;
426 
427     if (pc) PetscCall(VecGetArrayRead(B, &sb));
428     if (pc) PetscCall(VecGetArray(X, &sx));
429     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
430     if (pc) PetscCall(VecRestoreArray(X, &sx));
431     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
432 
433     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
434     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
435     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
436     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
437   }
438   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
439 
440   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
441   PetscCall(PetscLogStagePush(PCMPIStage));
442   PetscCall(KSPSolve(ksp, NULL, NULL));
443   PetscCall(PetscLogStagePop());
444   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
445   PetscCall(KSPGetIterationNumber(ksp, &its));
446   PCMPIIterations[size - 1] += its;
447   // TODO: send iterations up to outer KSP
448 
449   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
450 
451   /* gather solution */
452   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
453   if (!PCMPIServerUseShmget) {
454     PetscMPIInt in;
455 
456     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
457     if (pc) PetscCall(VecGetArray(X, &sx));
458     PetscCall(PetscMPIIntCast(n, &in));
459     PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
460     if (pc) PetscCall(VecRestoreArray(X, &sx));
461     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
462   } else {
463     PetscCall(VecResetArray(ksp->vec_rhs));
464     PetscCall(VecResetArray(ksp->vec_sol));
465   }
466   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
467   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
468   PCMPIServerInSolve = PETSC_FALSE;
469   PetscFunctionReturn(PETSC_SUCCESS);
470 }
471 
472 static PetscErrorCode PCMPIDestroy(PC pc)
473 {
474   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
475   KSP      ksp;
476   MPI_Comm comm = PC_MPI_COMM_WORLD;
477 
478   PetscFunctionBegin;
479   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
480   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
481   PetscCall(PetscLogStagePush(PCMPIStage));
482   PCMPIServerInSolve = PETSC_TRUE;
483   PetscCall(KSPDestroy(&ksp));
484   PetscCall(PetscLogStagePop());
485   PCMPIServerInSolve = PETSC_FALSE;
486   PetscFunctionReturn(PETSC_SUCCESS);
487 }
488 
489 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
490 {
491 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
492   PetscMPIInt dummy1 = 1, dummy2;
493 #endif
494 
495   PetscFunctionBegin;
496 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
497   if (PCMPIServerUseShmget) {
498     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
499   }
500 #endif
501   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
502   /* next line ensures the sender has already taken the lock */
503 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
504   if (PCMPIServerUseShmget) {
505     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
506     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
507   }
508 #endif
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 /*@C
513   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
514   parallel `KSP` solves and management of parallel `KSP` objects.
515 
516   Logically Collective on all MPI processes except rank 0
517 
518   Options Database Keys:
519 + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
520 . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
521 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
522 
523   Level: developer
524 
525   Note:
526   This is normally started automatically in `PetscInitialize()` when the option is provided
527 
528   See `PCMPI` for information on using the solver with a `KSP` object
529 
530   Developer Notes:
531   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
532   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
533   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
534 
535   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
536 
537   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
538 
539   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
540 
541   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
542   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
543 
544   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
545   all MPI processes but the user callback only runs on one MPI process per node.
546 
547   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
548   the `MPI_Comm` argument from PETSc calls.
549 
550 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
551 @*/
552 PetscErrorCode PCMPIServerBegin(void)
553 {
554   PetscMPIInt rank;
555 
556   PetscFunctionBegin;
557   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
558   if (PetscDefined(USE_SINGLE_LIBRARY)) {
559     PetscCall(VecInitializePackage());
560     PetscCall(MatInitializePackage());
561     PetscCall(DMInitializePackage());
562     PetscCall(PCInitializePackage());
563     PetscCall(KSPInitializePackage());
564     PetscCall(SNESInitializePackage());
565     PetscCall(TSInitializePackage());
566     PetscCall(TaoInitializePackage());
567   }
568   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
569   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
570   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
571 
572   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
573   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
574 
575   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
576   if (PCMPIServerUseShmget) {
577 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
578     PetscMPIInt size;
579 
580     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
581     if (size > 1) {
582       pthread_mutex_t *locks;
583 
584       if (rank == 0) {
585         PCMPIServerActive = PETSC_TRUE;
586         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
587       }
588       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
589       if (rank == 0) {
590         pthread_mutexattr_t attr;
591 
592         pthread_mutexattr_init(&attr);
593         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
594 
595         for (int i = 1; i < size; i++) {
596           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
597           pthread_mutex_lock(&PCMPIServerLocks[i]);
598         }
599       }
600       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
601     }
602 #endif
603   }
604   if (rank == 0) {
605     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
606     PCMPIServerActive = PETSC_TRUE;
607     PetscFunctionReturn(PETSC_SUCCESS);
608   }
609 
610   while (PETSC_TRUE) {
611     PCMPICommand request = PCMPI_CREATE;
612 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
613     PetscMPIInt dummy1 = 1, dummy2;
614 #endif
615 
616     // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
617 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
618     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
619 #endif
620     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
621 #if defined(PETSC_HAVE_PTHREAD_MUTEX)
622     if (PCMPIServerUseShmget) {
623       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
624       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
625       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
626     }
627 #endif
628     switch (request) {
629     case PCMPI_CREATE:
630       PetscCall(PCMPICreate(NULL));
631       break;
632     case PCMPI_SET_MAT:
633       PetscCall(PCMPISetMat(NULL));
634       break;
635     case PCMPI_UPDATE_MAT_VALUES:
636       PetscCall(PCMPIUpdateMatValues(NULL));
637       break;
638     case PCMPI_VIEW:
639       // PetscCall(PCMPIView(NULL));
640       break;
641     case PCMPI_SOLVE:
642       PetscCall(PCMPISolve(NULL, NULL, NULL));
643       break;
644     case PCMPI_DESTROY:
645       PetscCall(PCMPIDestroy(NULL));
646       break;
647     case PCMPI_EXIT:
648       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
649       PetscCall(PetscFinalize());
650       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
651       break;
652     default:
653       break;
654     }
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*@C
660   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
661   parallel KSP solves and management of parallel `KSP` objects.
662 
663   Logically Collective on all MPI ranks except 0
664 
665   Level: developer
666 
667   Note:
668   This is normally called automatically in `PetscFinalize()`
669 
670 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
671 @*/
672 PetscErrorCode PCMPIServerEnd(void)
673 {
674   PetscFunctionBegin;
675   if (PetscGlobalRank == 0) {
676     PetscViewer       viewer = NULL;
677     PetscViewerFormat format;
678 
679     PetscCall(PetscShmgetAddressesFinalize());
680     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
681     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
682     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
683     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
684     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
685     PetscOptionsEnd();
686     if (viewer) {
687       PetscBool isascii;
688 
689       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
690       if (isascii) {
691         PetscMPIInt size;
692         PetscMPIInt i;
693 
694         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
695         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
696         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
697         if (PCMPIKSPCountsSeq) {
698           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
699         }
700         for (i = 0; i < size; i++) {
701           if (PCMPIKSPCounts[i]) {
702             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
703           }
704         }
705         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
706       }
707       PetscCall(PetscViewerDestroy(&viewer));
708     }
709   }
710   PetscCall(PCMPICommsDestroy());
711   PCMPIServerActive = PETSC_FALSE;
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 /*
716     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
717     because, for example, the problem is small. This version is more efficient because it does not require copying any data
718 */
719 static PetscErrorCode PCSetUp_Seq(PC pc)
720 {
721   PC_MPI     *km = (PC_MPI *)pc->data;
722   Mat         sA;
723   const char *prefix;
724   char       *found = NULL, *cprefix;
725 
726   PetscFunctionBegin;
727   PCMPIServerInSolve = PETSC_TRUE;
728   PetscCall(PCGetOperators(pc, NULL, &sA));
729   PetscCall(PCGetOptionsPrefix(pc, &prefix));
730   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
731   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
732   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
733 
734   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
735   PetscCall(PCGetOptionsPrefix(pc, &prefix));
736   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
737   PetscCall(PetscStrallocpy(prefix, &cprefix));
738   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
739   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
740   *found = 0;
741   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
742   PetscCall(PetscFree(cprefix));
743 
744   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
745   PetscCall(KSPSetFromOptions(km->ksps[0]));
746   PetscCall(KSPSetUp(km->ksps[0]));
747   PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
748   PCMPIKSPCountsSeq++;
749   PCMPIServerInSolve = PETSC_FALSE;
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
753 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
754 {
755   PC_MPI  *km = (PC_MPI *)pc->data;
756   PetscInt its, n;
757   Mat      A;
758 
759   PetscFunctionBegin;
760   PCMPIServerInSolve = PETSC_TRUE;
761   PetscCall(KSPSolve(km->ksps[0], b, x));
762   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
763   PCMPISolveCountsSeq++;
764   PCMPIIterationsSeq += its;
765   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
766   PetscCall(MatGetSize(A, &n, NULL));
767   PCMPISizesSeq += n;
768   PCMPIServerInSolve = PETSC_FALSE;
769   /*
770     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
771     my use PetscFree() instead of PCMPIArrayDeallocate()
772   */
773   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
774   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
779 {
780   PC_MPI *km = (PC_MPI *)pc->data;
781 
782   PetscFunctionBegin;
783   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
784   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
785   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 static PetscErrorCode PCDestroy_Seq(PC pc)
790 {
791   PC_MPI *km = (PC_MPI *)pc->data;
792   Mat     A, B;
793   Vec     x, b;
794 
795   PetscFunctionBegin;
796   PCMPIServerInSolve = PETSC_TRUE;
797   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
798   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
799   PetscCall(PetscObjectReference((PetscObject)A));
800   PetscCall(PetscObjectReference((PetscObject)B));
801   PetscCall(KSPGetSolution(km->ksps[0], &x));
802   PetscCall(PetscObjectReference((PetscObject)x));
803   PetscCall(KSPGetRhs(km->ksps[0], &b));
804   PetscCall(PetscObjectReference((PetscObject)b));
805   PetscCall(KSPDestroy(&km->ksps[0]));
806   PetscCall(PetscFree(pc->data));
807   PCMPIServerInSolve = PETSC_FALSE;
808   PetscCall(MatDestroy(&A));
809   PetscCall(MatDestroy(&B));
810   PetscCall(VecDestroy(&x));
811   PetscCall(VecDestroy(&b));
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 /*
816      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
817      right-hand side to the parallel PC
818 */
819 static PetscErrorCode PCSetUp_MPI(PC pc)
820 {
821   PC_MPI     *km = (PC_MPI *)pc->data;
822   PetscMPIInt rank, size;
823   PetscBool   newmatrix = PETSC_FALSE;
824 
825   PetscFunctionBegin;
826   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
827   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
828   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
829 
830   if (!pc->setupcalled) {
831     if (!km->alwaysuseserver) {
832       PetscInt n;
833       Mat      sA;
834       /* short circuit for small systems */
835       PetscCall(PCGetOperators(pc, &sA, &sA));
836       PetscCall(MatGetSize(sA, &n, NULL));
837       if (n < 2 * km->mincntperrank - 1 || size == 1) {
838         pc->ops->setup   = NULL;
839         pc->ops->apply   = PCApply_Seq;
840         pc->ops->destroy = PCDestroy_Seq;
841         pc->ops->view    = PCView_Seq;
842         PetscCall(PCSetUp_Seq(pc));
843         PetscFunctionReturn(PETSC_SUCCESS);
844       }
845     }
846 
847     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
848     PetscCall(PCMPICreate(pc));
849     newmatrix = PETSC_TRUE;
850   }
851   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
852 
853   if (newmatrix) {
854     PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
855     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
856     PetscCall(PCMPISetMat(pc));
857   } else {
858     PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
859     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
860     PetscCall(PCMPIUpdateMatValues(pc));
861   }
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
865 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
866 {
867   PetscFunctionBegin;
868   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
869   PetscCall(PCMPISolve(pc, b, x));
870   PetscFunctionReturn(PETSC_SUCCESS);
871 }
872 
873 static PetscErrorCode PCDestroy_MPI(PC pc)
874 {
875   PetscFunctionBegin;
876   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
877   PetscCall(PCMPIDestroy(pc));
878   PetscCall(PetscFree(pc->data));
879   PetscFunctionReturn(PETSC_SUCCESS);
880 }
881 
882 /*
883      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
884 */
885 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
886 {
887   PC_MPI     *km = (PC_MPI *)pc->data;
888   MPI_Comm    comm;
889   PetscMPIInt size;
890 
891   PetscFunctionBegin;
892   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
893   PetscCallMPI(MPI_Comm_size(comm, &size));
894   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
895   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
896   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
901 {
902   PC_MPI *km = (PC_MPI *)pc->data;
903 
904   PetscFunctionBegin;
905   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
906   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
907   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
908   PetscOptionsHeadEnd();
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 /*MC
913      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
914 
915    Options Database Keys for the Server:
916 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
917 .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
918 -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
919 
920    Options Database Keys for a specific `KSP` object
921 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
922 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
923 
924    Level: developer
925 
926    Notes:
927    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
928    `MatCreateSeqAIJWithArrays()`
929 
930    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
931 
932    It can be particularly useful for user OpenMP code or potentially user GPU code.
933 
934    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
935    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
936 
937    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
938    because they are not the actual solver objects.
939 
940    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
941    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
942 
943    Developer Note:
944    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
945    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
946 
947 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
948 M*/
949 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
950 {
951   PC_MPI *km;
952   char   *found = NULL;
953 
954   PetscFunctionBegin;
955   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
956   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
957 
958   /* material from PCSetType() */
959   PetscTryTypeMethod(pc, destroy);
960   pc->ops->destroy = NULL;
961   pc->data         = NULL;
962 
963   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
964   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
965   pc->modifysubmatrices  = NULL;
966   pc->modifysubmatricesP = NULL;
967   pc->setupcalled        = 0;
968 
969   PetscCall(PetscNew(&km));
970   pc->data = (void *)km;
971 
972   km->mincntperrank = 10000;
973 
974   pc->ops->setup          = PCSetUp_MPI;
975   pc->ops->apply          = PCApply_MPI;
976   pc->ops->destroy        = PCDestroy_MPI;
977   pc->ops->view           = PCView_MPI;
978   pc->ops->setfromoptions = PCSetFromOptions_MPI;
979   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
980   PetscFunctionReturn(PETSC_SUCCESS);
981 }
982 
983 /*@
984   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
985 
986   Not Collective
987 
988   Input Parameter:
989 . pc - the preconditioner context
990 
991   Output Parameter:
992 . innerksp - the inner `KSP`
993 
994   Level: advanced
995 
996 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
997 @*/
998 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
999 {
1000   PC_MPI *red = (PC_MPI *)pc->data;
1001 
1002   PetscFunctionBegin;
1003   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
1004   PetscAssertPointer(innerksp, 2);
1005   *innerksp = red->ksps[0];
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008