xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 5e1a0e3c6e01980e646208f9267a1b3490610ebb)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 
42 static PetscErrorCode PCMPICommsCreate(void) {
43   MPI_Comm    comm = PC_MPI_COMM_WORLD;
44   PetscMPIInt size, rank, i;
45 
46   PetscFunctionBegin;
47   PetscCallMPI(MPI_Comm_size(comm, &size));
48   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
49   PetscCallMPI(MPI_Comm_rank(comm, &rank));
50   /* comm for size 1 is useful only for debugging */
51   for (i = 0; i < size; i++) {
52     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
53     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
54     PCMPISolveCounts[i] = 0;
55     PCMPIKSPCounts[i]   = 0;
56   }
57   PCMPICommSet = PETSC_TRUE;
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode PCMPICommsDestroy(void) {
62   MPI_Comm    comm = PC_MPI_COMM_WORLD;
63   PetscMPIInt size, rank, i;
64 
65   PetscFunctionBegin;
66   if (!PCMPICommSet) PetscFunctionReturn(0);
67   PetscCallMPI(MPI_Comm_size(comm, &size));
68   PetscCallMPI(MPI_Comm_rank(comm, &rank));
69   for (i = 0; i < size; i++) {
70     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
71   }
72   PCMPICommSet = PETSC_FALSE;
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode PCMPICreate(PC pc) {
77   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
78   MPI_Comm    comm = PC_MPI_COMM_WORLD;
79   KSP         ksp;
80   PetscInt    N[2], mincntperrank = 0;
81   PetscMPIInt size;
82   Mat         sA;
83   char       *prefix;
84   PetscMPIInt len = 0;
85 
86   PetscFunctionBegin;
87   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
88   PetscCallMPI(MPI_Comm_size(comm, &size));
89   if (pc) {
90     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
91     PetscCall(PCGetOperators(pc, &sA, &sA));
92     PetscCall(MatGetSize(sA, &N[0], &N[1]));
93   }
94   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
95 
96   /* choose a suitable sized MPI_Comm for the problem to be solved on */
97   if (km) mincntperrank = km->mincntperrank;
98   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
99   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
100   if (comm == MPI_COMM_NULL) {
101     ksp = NULL;
102     PetscFunctionReturn(0);
103   }
104   PetscCall(KSPCreate(comm, &ksp));
105   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
106   if (pc) {
107     size_t slen;
108 
109     PetscCallMPI(MPI_Comm_size(comm, &size));
110     PCMPIKSPCounts[size - 1]++;
111     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
112     PetscCall(PetscStrlen(prefix, &slen));
113     len = (PetscMPIInt)slen;
114   }
115   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
116   if (len) {
117     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
118     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
119     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
120   }
121   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
122   PetscFunctionReturn(0);
123 }
124 
125 static PetscErrorCode PCMPISetMat(PC pc) {
126   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
127   Mat                A;
128   PetscInt           N[2], n, *ia, *ja, j;
129   Mat                sA;
130   MPI_Comm           comm = PC_MPI_COMM_WORLD;
131   KSP                ksp;
132   PetscLayout        layout;
133   const PetscInt    *IA = NULL, *JA = NULL;
134   const PetscInt    *range;
135   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
136   PetscScalar       *a;
137   const PetscScalar *sa = NULL;
138 
139   PetscFunctionBegin;
140   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
141   if (!ksp) PetscFunctionReturn(0);
142   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
143   if (pc) {
144     PetscCallMPI(MPI_Comm_size(comm, &size));
145     PCMPIMatCounts[size - 1]++;
146     PetscCall(PCGetOperators(pc, &sA, &sA));
147     PetscCall(MatGetSize(sA, &N[0], &N[1]));
148   }
149   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
150 
151   /* determine ownership ranges of matrix */
152   PetscCall(PetscLayoutCreate(comm, &layout));
153   PetscCall(PetscLayoutSetBlockSize(layout, 1));
154   PetscCall(PetscLayoutSetSize(layout, N[0]));
155   PetscCall(PetscLayoutSetUp(layout));
156   PetscCall(PetscLayoutGetLocalSize(layout, &n));
157 
158   /* copy over the matrix nonzero structure and values */
159   if (pc) {
160     NZ      = km->NZ;
161     NZdispl = km->NZdispl;
162     PetscCall(PetscLayoutGetRanges(layout, &range));
163     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
164     for (i = 0; i < size; i++) {
165       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
166       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
167     }
168     displi[0]  = 0;
169     NZdispl[0] = 0;
170     for (j = 1; j < size; j++) {
171       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
172       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
173     }
174     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
175   }
176   PetscCall(PetscLayoutDestroy(&layout));
177   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
178 
179   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
180   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
181   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
182   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
183 
184   if (pc) {
185     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
186     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
187   }
188 
189   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
190   ia[0] = 0;
191   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
192   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
193 
194   PetscCall(PetscFree3(ia, ja, a));
195   PetscCall(KSPSetOperators(ksp, A, A));
196   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
197   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
198     const PetscInt *range;
199 
200     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
201     for (i = 0; i < size; i++) {
202       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
203       km->displ[i]     = (PetscMPIInt)range[i];
204     }
205   }
206   PetscCall(MatDestroy(&A));
207   PetscCall(KSPSetFromOptions(ksp));
208   PetscFunctionReturn(0);
209 }
210 
211 static PetscErrorCode PCMPIUpdateMatValues(PC pc) {
212   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
213   KSP                ksp;
214   Mat                sA, A;
215   MPI_Comm           comm = PC_MPI_COMM_WORLD;
216   PetscScalar       *a;
217   PetscCount         nz;
218   const PetscScalar *sa = NULL;
219   PetscMPIInt        size;
220 
221   PetscFunctionBegin;
222   if (pc) {
223     PetscCall(PCGetOperators(pc, &sA, &sA));
224     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
225   }
226   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
227   if (!ksp) PetscFunctionReturn(0);
228   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
229   PetscCallMPI(MPI_Comm_size(comm, &size));
230   PCMPIMatCounts[size - 1]++;
231   PetscCall(KSPGetOperators(ksp, NULL, &A));
232   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
233   PetscCall(PetscMalloc1(nz, &a));
234   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
235   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
236   PetscCall(MatUpdateMPIAIJWithArray(A, a));
237   PetscCall(PetscFree(a));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) {
242   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
243   KSP                ksp;
244   MPI_Comm           comm = PC_MPI_COMM_WORLD;
245   const PetscScalar *sb   = NULL, *x;
246   PetscScalar       *b, *sx = NULL;
247   PetscInt           n;
248 
249   PetscFunctionBegin;
250   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
251   if (!ksp) PetscFunctionReturn(0);
252   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
253 
254   /* TODO: optimize code to not require building counts/displ everytime */
255 
256   /* scatterv rhs */
257   if (pc) {
258     PetscMPIInt size;
259 
260     PetscCallMPI(MPI_Comm_size(comm, &size));
261     PCMPISolveCounts[size - 1]++;
262     PetscCall(VecGetArrayRead(B, &sb));
263   }
264   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
265   PetscCall(VecGetArray(ksp->vec_rhs, &b));
266   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
267   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
268   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
269 
270   PetscCall(KSPSolve(ksp, NULL, NULL));
271 
272   /* gather solution */
273   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
274   if (pc) PetscCall(VecGetArray(X, &sx));
275   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
276   if (pc) PetscCall(VecRestoreArray(X, &sx));
277   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode PCMPIDestroy(PC pc) {
282   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
283   KSP      ksp;
284   MPI_Comm comm = PC_MPI_COMM_WORLD;
285 
286   PetscFunctionBegin;
287   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
288   if (!ksp) PetscFunctionReturn(0);
289   PetscCall(KSPDestroy(&ksp));
290   PetscFunctionReturn(0);
291 }
292 
293 /*@C
294      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
295      parallel KSP solves and management of parallel KSP objects.
296 
297      Logically collective on all MPI ranks except 0
298 
299    Options Database:
300 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
301 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
302 
303      Notes:
304       This is normally started automatically in `PetscInitialize()` when the option is provided
305 
306      Developer Notes:
307        When called on rank zero this sets PETSC_COMM_WORLD to PETSC_COMM_SELF to allow a main program
308        written with PETSC_COMM_WORLD to run correctly on the single rank while all the ranks
309        (that would normally be sharing PETSC_COMM_WORLD) to run the solver server.
310 
311        Can this be integrated into the PetscDevice abstraction that is currently being developed?
312 
313      Level: developer
314 
315 .seealso: PCMPIServerEnd(), PCMPI
316 @*/
317 PetscErrorCode PCMPIServerBegin(void) {
318   PetscMPIInt rank;
319 
320   PetscFunctionBegin;
321   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
322   if (PetscDefined(USE_SINGLE_LIBRARY)) {
323     PetscCall(VecInitializePackage());
324     PetscCall(MatInitializePackage());
325     PetscCall(DMInitializePackage());
326     PetscCall(PCInitializePackage());
327     PetscCall(KSPInitializePackage());
328     PetscCall(SNESInitializePackage());
329     PetscCall(TSInitializePackage());
330     PetscCall(TaoInitializePackage());
331   }
332 
333   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
334   if (rank == 0) {
335     PETSC_COMM_WORLD = PETSC_COMM_SELF;
336     PetscFunctionReturn(0);
337   }
338 
339   while (PETSC_TRUE) {
340     PCMPICommand request = PCMPI_CREATE;
341     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
342     switch (request) {
343     case PCMPI_CREATE: PetscCall(PCMPICreate(NULL)); break;
344     case PCMPI_SET_MAT: PetscCall(PCMPISetMat(NULL)); break;
345     case PCMPI_UPDATE_MAT_VALUES: PetscCall(PCMPIUpdateMatValues(NULL)); break;
346     case PCMPI_VIEW:
347       // PetscCall(PCMPIView(NULL));
348       break;
349     case PCMPI_SOLVE: PetscCall(PCMPISolve(NULL, NULL, NULL)); break;
350     case PCMPI_DESTROY: PetscCall(PCMPIDestroy(NULL)); break;
351     case PCMPI_EXIT:
352       PetscCall(PetscFinalize());
353       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
354       break;
355     default: break;
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 /*@C
362      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
363      parallel KSP solves and management of parallel KSP objects.
364 
365      Logically collective on all MPI ranks except 0
366 
367      Notes:
368       This is normally ended automatically in `PetscFinalize()` when the option is provided
369 
370      Level: developer
371 
372 .seealso: PCMPIServerBegin(), PCMPI
373 @*/
374 PetscErrorCode PCMPIServerEnd(void) {
375   PCMPICommand request = PCMPI_EXIT;
376 
377   PetscFunctionBegin;
378   if (PetscGlobalRank == 0) {
379     PetscViewer       viewer = NULL;
380     PetscViewerFormat format;
381 
382     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
383     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
384     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
385     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
386     PetscOptionsEnd();
387     if (viewer) {
388       PetscBool isascii;
389 
390       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
391       if (isascii) {
392         PetscMPIInt size;
393         PetscInt    i, ksp = 0, mat = 0, solve = 0;
394 
395         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
396         for (i = 0; i < size; i++) {
397           ksp += PCMPIKSPCounts[i];
398           mat += PCMPIMatCounts[i];
399           solve += PCMPISolveCounts[i];
400         }
401         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n"));
402         PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel KSPs  %" PetscInt_FMT " Mats  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve));
403         PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential KSPs  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq));
404       }
405       PetscCall(PetscViewerDestroy(&viewer));
406     }
407   }
408   PetscCall(PCMPICommsDestroy());
409   PetscFunctionReturn(0);
410 }
411 
412 /*
413     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
414     because, for example, the problem is small. This version is more efficient because it does not require copying any data
415 */
416 static PetscErrorCode PCSetUp_Seq(PC pc) {
417   PC_MPI     *km = (PC_MPI *)pc->data;
418   Mat         sA;
419   const char *prefix;
420 
421   PetscFunctionBegin;
422   PetscCall(PCGetOperators(pc, NULL, &sA));
423   PetscCall(PCGetOptionsPrefix(pc, &prefix));
424   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
425   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
426   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
427   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
428   PetscCall(KSPSetFromOptions(km->ksps[0]));
429   PetscCall(KSPSetUp(km->ksps[0]));
430   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
431   PCMPIKSPCountsSeq++;
432   PetscFunctionReturn(0);
433 }
434 
435 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) {
436   PC_MPI *km = (PC_MPI *)pc->data;
437 
438   PetscFunctionBegin;
439   PetscCall(KSPSolve(km->ksps[0], b, x));
440   PCMPISolveCountsSeq++;
441   PetscFunctionReturn(0);
442 }
443 
444 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) {
445   PC_MPI     *km = (PC_MPI *)pc->data;
446   const char *prefix;
447 
448   PetscFunctionBegin;
449   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
450   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
451   PetscCall(PCGetOptionsPrefix(pc, &prefix));
452   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
453   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
454   PetscFunctionReturn(0);
455 }
456 
457 static PetscErrorCode PCDestroy_Seq(PC pc) {
458   PC_MPI *km = (PC_MPI *)pc->data;
459 
460   PetscFunctionBegin;
461   PetscCall(KSPDestroy(&km->ksps[0]));
462   PetscCall(PetscFree(pc->data));
463   PetscFunctionReturn(0);
464 }
465 
466 /*
467      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
468      right hand side to the parallel PC
469 */
470 static PetscErrorCode PCSetUp_MPI(PC pc) {
471   PC_MPI      *km = (PC_MPI *)pc->data;
472   PetscMPIInt  rank, size;
473   PCMPICommand request;
474   PetscBool    newmatrix = PETSC_FALSE;
475 
476   PetscFunctionBegin;
477   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
478   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
479   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
480 
481   if (!pc->setupcalled) {
482     if (!km->alwaysuseserver) {
483       PetscInt n;
484       Mat      sA;
485       /* short circuit for small systems */
486       PetscCall(PCGetOperators(pc, &sA, &sA));
487       PetscCall(MatGetSize(sA, &n, NULL));
488       if (n < 2 * km->mincntperrank - 1 || size == 1) {
489         pc->ops->setup   = NULL;
490         pc->ops->apply   = PCApply_Seq;
491         pc->ops->destroy = PCDestroy_Seq;
492         pc->ops->view    = PCView_Seq;
493         PetscCall(PCSetUp_Seq(pc));
494         PetscFunctionReturn(0);
495       }
496     }
497 
498     request = PCMPI_CREATE;
499     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
500     PetscCall(PCMPICreate(pc));
501     newmatrix = PETSC_TRUE;
502   }
503   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
504 
505   if (newmatrix) {
506     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
507     request = PCMPI_SET_MAT;
508     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
509     PetscCall(PCMPISetMat(pc));
510   } else {
511     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
512     request = PCMPI_UPDATE_MAT_VALUES;
513     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
514     PetscCall(PCMPIUpdateMatValues(pc));
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) {
520   PCMPICommand request = PCMPI_SOLVE;
521 
522   PetscFunctionBegin;
523   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
524   PetscCall(PCMPISolve(pc, b, x));
525   PetscFunctionReturn(0);
526 }
527 
528 PetscErrorCode PCDestroy_MPI(PC pc) {
529   PCMPICommand request = PCMPI_DESTROY;
530 
531   PetscFunctionBegin;
532   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
533   PetscCall(PCMPIDestroy(pc));
534   PetscCall(PetscFree(pc->data));
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
540 */
541 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) {
542   PC_MPI     *km = (PC_MPI *)pc->data;
543   MPI_Comm    comm;
544   PetscMPIInt size;
545   const char *prefix;
546 
547   PetscFunctionBegin;
548   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
549   PetscCallMPI(MPI_Comm_size(comm, &size));
550   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
551   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
552   PetscCall(PCGetOptionsPrefix(pc, &prefix));
553   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
554   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
555   PetscFunctionReturn(0);
556 }
557 
558 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) {
559   PC_MPI *km = (PC_MPI *)pc->data;
560 
561   PetscFunctionBegin;
562   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
563   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
564   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
565   PetscOptionsHeadEnd();
566   PetscFunctionReturn(0);
567 }
568 
569 /*MC
570      PCMPI - Calls an MPI parallel KSP to solve a linear system from user code running on one process
571 
572    Level: beginner
573 
574    Options Database:
575 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
576 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
577 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
578 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
579 
580    Notes:
581    The options database prefix for the MPI parallel KSP and PC is -mpi_
582 
583    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
584    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
585 
586    It can be particularly useful for user OpenMP code or potentially user GPU code.
587 
588    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a KSP with the options prefix of -mpi)
589    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the KSP directly.
590 
591 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
592 
593 M*/
594 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) {
595   PC_MPI *km;
596 
597   PetscFunctionBegin;
598   PetscCall(PetscNewLog(pc, &km));
599   pc->data = (void *)km;
600 
601   km->mincntperrank = 10000;
602 
603   pc->ops->setup          = PCSetUp_MPI;
604   pc->ops->apply          = PCApply_MPI;
605   pc->ops->destroy        = PCDestroy_MPI;
606   pc->ops->view           = PCView_MPI;
607   pc->ops->setfromoptions = PCSetFromOptions_MPI;
608   PetscFunctionReturn(0);
609 }
610