xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision bd412c90fba8895e9763ccee76c7dd2e09a982d7)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 
42 static PetscErrorCode PCMPICommsCreate(void)
43 {
44   MPI_Comm    comm = PC_MPI_COMM_WORLD;
45   PetscMPIInt size, rank, i;
46 
47   PetscFunctionBegin;
48   PetscCallMPI(MPI_Comm_size(comm, &size));
49   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
50   PetscCallMPI(MPI_Comm_rank(comm, &rank));
51   /* comm for size 1 is useful only for debugging */
52   for (i = 0; i < size; i++) {
53     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
54     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
55     PCMPISolveCounts[i] = 0;
56     PCMPIKSPCounts[i]   = 0;
57   }
58   PCMPICommSet = PETSC_TRUE;
59   PetscFunctionReturn(0);
60 }
61 
62 PetscErrorCode PCMPICommsDestroy(void)
63 {
64   MPI_Comm    comm = PC_MPI_COMM_WORLD;
65   PetscMPIInt size, rank, i;
66 
67   PetscFunctionBegin;
68   if (!PCMPICommSet) PetscFunctionReturn(0);
69   PetscCallMPI(MPI_Comm_size(comm, &size));
70   PetscCallMPI(MPI_Comm_rank(comm, &rank));
71   for (i = 0; i < size; i++) {
72     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
73   }
74   PCMPICommSet = PETSC_FALSE;
75   PetscFunctionReturn(0);
76 }
77 
78 static PetscErrorCode PCMPICreate(PC pc)
79 {
80   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
81   MPI_Comm    comm = PC_MPI_COMM_WORLD;
82   KSP         ksp;
83   PetscInt    N[2], mincntperrank = 0;
84   PetscMPIInt size;
85   Mat         sA;
86   char       *prefix;
87   PetscMPIInt len = 0;
88 
89   PetscFunctionBegin;
90   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
91   PetscCallMPI(MPI_Comm_size(comm, &size));
92   if (pc) {
93     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
94     PetscCall(PCGetOperators(pc, &sA, &sA));
95     PetscCall(MatGetSize(sA, &N[0], &N[1]));
96   }
97   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
98 
99   /* choose a suitable sized MPI_Comm for the problem to be solved on */
100   if (km) mincntperrank = km->mincntperrank;
101   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
102   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
103   if (comm == MPI_COMM_NULL) {
104     ksp = NULL;
105     PetscFunctionReturn(0);
106   }
107   PetscCall(KSPCreate(comm, &ksp));
108   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
109   if (pc) {
110     size_t slen;
111 
112     PetscCallMPI(MPI_Comm_size(comm, &size));
113     PCMPIKSPCounts[size - 1]++;
114     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
115     PetscCall(PetscStrlen(prefix, &slen));
116     len = (PetscMPIInt)slen;
117   }
118   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
119   if (len) {
120     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
121     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
122     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
123   }
124   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
125   PetscFunctionReturn(0);
126 }
127 
128 static PetscErrorCode PCMPISetMat(PC pc)
129 {
130   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
131   Mat                A;
132   PetscInt           N[2], n, *ia, *ja, j, bs;
133   Mat                sA;
134   MPI_Comm           comm = PC_MPI_COMM_WORLD;
135   KSP                ksp;
136   PetscLayout        layout;
137   const PetscInt    *IA = NULL, *JA = NULL;
138   const PetscInt    *range;
139   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
140   PetscScalar       *a;
141   const PetscScalar *sa = NULL;
142 
143   PetscFunctionBegin;
144   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
145   if (!ksp) PetscFunctionReturn(0);
146   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
147   if (pc) {
148     PetscCallMPI(MPI_Comm_size(comm, &size));
149     PCMPIMatCounts[size - 1]++;
150     PetscCall(PCGetOperators(pc, &sA, &sA));
151     PetscCall(MatGetSize(sA, &N[0], &N[1]));
152     PetscCall(MatGetBlockSize(sA, &bs));
153     /* need to broadcast symmetry flags etc if set */
154   }
155   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
156   PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm));
157 
158   /* determine ownership ranges of matrix */
159   PetscCall(PetscLayoutCreate(comm, &layout));
160   PetscCall(PetscLayoutSetBlockSize(layout, bs));
161   PetscCall(PetscLayoutSetSize(layout, N[0]));
162   PetscCall(PetscLayoutSetUp(layout));
163   PetscCall(PetscLayoutGetLocalSize(layout, &n));
164 
165   /* copy over the matrix nonzero structure and values */
166   if (pc) {
167     NZ      = km->NZ;
168     NZdispl = km->NZdispl;
169     PetscCall(PetscLayoutGetRanges(layout, &range));
170     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
171     for (i = 0; i < size; i++) {
172       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
173       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
174     }
175     displi[0]  = 0;
176     NZdispl[0] = 0;
177     for (j = 1; j < size; j++) {
178       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
179       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
180     }
181     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
182   }
183   PetscCall(PetscLayoutDestroy(&layout));
184   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
185 
186   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
187   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
188   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
189   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
190 
191   if (pc) {
192     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
193     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
194   }
195 
196   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
197   ia[0] = 0;
198   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
199   PetscCall(MatSetBlockSize(A, bs));
200   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
201 
202   PetscCall(PetscFree3(ia, ja, a));
203   PetscCall(KSPSetOperators(ksp, A, A));
204   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
205   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
206     const PetscInt *range;
207 
208     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
209     for (i = 0; i < size; i++) {
210       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
211       km->displ[i]     = (PetscMPIInt)range[i];
212     }
213   }
214   PetscCall(MatDestroy(&A));
215   PetscCall(KSPSetFromOptions(ksp));
216   PetscFunctionReturn(0);
217 }
218 
219 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
220 {
221   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
222   KSP                ksp;
223   Mat                sA, A;
224   MPI_Comm           comm = PC_MPI_COMM_WORLD;
225   PetscScalar       *a;
226   PetscCount         nz;
227   const PetscScalar *sa = NULL;
228   PetscMPIInt        size;
229 
230   PetscFunctionBegin;
231   if (pc) {
232     PetscCall(PCGetOperators(pc, &sA, &sA));
233     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
234   }
235   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
236   if (!ksp) PetscFunctionReturn(0);
237   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
238   PetscCallMPI(MPI_Comm_size(comm, &size));
239   PCMPIMatCounts[size - 1]++;
240   PetscCall(KSPGetOperators(ksp, NULL, &A));
241   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
242   PetscCall(PetscMalloc1(nz, &a));
243   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
244   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
245   PetscCall(MatUpdateMPIAIJWithArray(A, a));
246   PetscCall(PetscFree(a));
247   PetscFunctionReturn(0);
248 }
249 
250 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
251 {
252   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
253   KSP                ksp;
254   MPI_Comm           comm = PC_MPI_COMM_WORLD;
255   const PetscScalar *sb   = NULL, *x;
256   PetscScalar       *b, *sx = NULL;
257   PetscInt           n;
258 
259   PetscFunctionBegin;
260   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
261   if (!ksp) PetscFunctionReturn(0);
262   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
263 
264   /* TODO: optimize code to not require building counts/displ everytime */
265 
266   /* scatterv rhs */
267   if (pc) {
268     PetscMPIInt size;
269 
270     PetscCallMPI(MPI_Comm_size(comm, &size));
271     PCMPISolveCounts[size - 1]++;
272     PetscCall(VecGetArrayRead(B, &sb));
273   }
274   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
275   PetscCall(VecGetArray(ksp->vec_rhs, &b));
276   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
277   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
278   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
279 
280   PetscCall(KSPSolve(ksp, NULL, NULL));
281 
282   /* gather solution */
283   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
284   if (pc) PetscCall(VecGetArray(X, &sx));
285   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
286   if (pc) PetscCall(VecRestoreArray(X, &sx));
287   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
288   PetscFunctionReturn(0);
289 }
290 
291 static PetscErrorCode PCMPIDestroy(PC pc)
292 {
293   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
294   KSP      ksp;
295   MPI_Comm comm = PC_MPI_COMM_WORLD;
296 
297   PetscFunctionBegin;
298   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
299   if (!ksp) PetscFunctionReturn(0);
300   PetscCall(KSPDestroy(&ksp));
301   PetscFunctionReturn(0);
302 }
303 
304 /*@C
305      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
306      parallel `KSP` solves and management of parallel `KSP` objects.
307 
308      Logically collective on all MPI ranks except 0
309 
310    Options Database Keys:
311 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
312 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
313 
314      Note:
315       This is normally started automatically in `PetscInitialize()` when the option is provided
316 
317      Developer Notes:
318        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
319        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
320        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
321 
322        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
323 
324      Level: developer
325 
326 .seealso: `PCMPIServerEnd()`, `PCMPI`
327 @*/
328 PetscErrorCode PCMPIServerBegin(void)
329 {
330   PetscMPIInt rank;
331 
332   PetscFunctionBegin;
333   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
334   if (PetscDefined(USE_SINGLE_LIBRARY)) {
335     PetscCall(VecInitializePackage());
336     PetscCall(MatInitializePackage());
337     PetscCall(DMInitializePackage());
338     PetscCall(PCInitializePackage());
339     PetscCall(KSPInitializePackage());
340     PetscCall(SNESInitializePackage());
341     PetscCall(TSInitializePackage());
342     PetscCall(TaoInitializePackage());
343   }
344 
345   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
346   if (rank == 0) {
347     PETSC_COMM_WORLD = PETSC_COMM_SELF;
348     PetscFunctionReturn(0);
349   }
350 
351   while (PETSC_TRUE) {
352     PCMPICommand request = PCMPI_CREATE;
353     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
354     switch (request) {
355     case PCMPI_CREATE:
356       PetscCall(PCMPICreate(NULL));
357       break;
358     case PCMPI_SET_MAT:
359       PetscCall(PCMPISetMat(NULL));
360       break;
361     case PCMPI_UPDATE_MAT_VALUES:
362       PetscCall(PCMPIUpdateMatValues(NULL));
363       break;
364     case PCMPI_VIEW:
365       // PetscCall(PCMPIView(NULL));
366       break;
367     case PCMPI_SOLVE:
368       PetscCall(PCMPISolve(NULL, NULL, NULL));
369       break;
370     case PCMPI_DESTROY:
371       PetscCall(PCMPIDestroy(NULL));
372       break;
373     case PCMPI_EXIT:
374       PetscCall(PetscFinalize());
375       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
376       break;
377     default:
378       break;
379     }
380   }
381   PetscFunctionReturn(0);
382 }
383 
384 /*@C
385      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
386      parallel KSP solves and management of parallel `KSP` objects.
387 
388      Logically collective on all MPI ranks except 0
389 
390      Note:
391       This is normally ended automatically in `PetscFinalize()` when the option is provided
392 
393      Level: developer
394 
395 .seealso: `PCMPIServerBegin()`, `PCMPI`
396 @*/
397 PetscErrorCode PCMPIServerEnd(void)
398 {
399   PCMPICommand request = PCMPI_EXIT;
400 
401   PetscFunctionBegin;
402   if (PetscGlobalRank == 0) {
403     PetscViewer       viewer = NULL;
404     PetscViewerFormat format;
405 
406     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
407     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
408     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
409     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
410     PetscOptionsEnd();
411     if (viewer) {
412       PetscBool isascii;
413 
414       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
415       if (isascii) {
416         PetscMPIInt size;
417         PetscInt    i, ksp = 0, mat = 0, solve = 0;
418 
419         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
420         for (i = 0; i < size; i++) {
421           ksp += PCMPIKSPCounts[i];
422           mat += PCMPIMatCounts[i];
423           solve += PCMPISolveCounts[i];
424         }
425         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n"));
426         PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel KSPs  %" PetscInt_FMT " Mats  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve));
427         PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential KSPs  %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq));
428       }
429       PetscCall(PetscViewerDestroy(&viewer));
430     }
431   }
432   PetscCall(PCMPICommsDestroy());
433   PetscFunctionReturn(0);
434 }
435 
436 /*
437     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
438     because, for example, the problem is small. This version is more efficient because it does not require copying any data
439 */
440 static PetscErrorCode PCSetUp_Seq(PC pc)
441 {
442   PC_MPI     *km = (PC_MPI *)pc->data;
443   Mat         sA;
444   const char *prefix;
445 
446   PetscFunctionBegin;
447   PetscCall(PCGetOperators(pc, NULL, &sA));
448   PetscCall(PCGetOptionsPrefix(pc, &prefix));
449   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
450   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
451   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
452   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
453   PetscCall(KSPSetFromOptions(km->ksps[0]));
454   PetscCall(KSPSetUp(km->ksps[0]));
455   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
456   PCMPIKSPCountsSeq++;
457   PetscFunctionReturn(0);
458 }
459 
460 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
461 {
462   PC_MPI *km = (PC_MPI *)pc->data;
463 
464   PetscFunctionBegin;
465   PetscCall(KSPSolve(km->ksps[0], b, x));
466   PCMPISolveCountsSeq++;
467   PetscFunctionReturn(0);
468 }
469 
470 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
471 {
472   PC_MPI     *km = (PC_MPI *)pc->data;
473   const char *prefix;
474 
475   PetscFunctionBegin;
476   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
477   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
478   PetscCall(PCGetOptionsPrefix(pc, &prefix));
479   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
480   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
481   PetscFunctionReturn(0);
482 }
483 
484 static PetscErrorCode PCDestroy_Seq(PC pc)
485 {
486   PC_MPI *km = (PC_MPI *)pc->data;
487 
488   PetscFunctionBegin;
489   PetscCall(KSPDestroy(&km->ksps[0]));
490   PetscCall(PetscFree(pc->data));
491   PetscFunctionReturn(0);
492 }
493 
494 /*
495      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
496      right hand side to the parallel PC
497 */
498 static PetscErrorCode PCSetUp_MPI(PC pc)
499 {
500   PC_MPI      *km = (PC_MPI *)pc->data;
501   PetscMPIInt  rank, size;
502   PCMPICommand request;
503   PetscBool    newmatrix = PETSC_FALSE;
504 
505   PetscFunctionBegin;
506   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
507   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
508   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
509 
510   if (!pc->setupcalled) {
511     if (!km->alwaysuseserver) {
512       PetscInt n;
513       Mat      sA;
514       /* short circuit for small systems */
515       PetscCall(PCGetOperators(pc, &sA, &sA));
516       PetscCall(MatGetSize(sA, &n, NULL));
517       if (n < 2 * km->mincntperrank - 1 || size == 1) {
518         pc->ops->setup   = NULL;
519         pc->ops->apply   = PCApply_Seq;
520         pc->ops->destroy = PCDestroy_Seq;
521         pc->ops->view    = PCView_Seq;
522         PetscCall(PCSetUp_Seq(pc));
523         PetscFunctionReturn(0);
524       }
525     }
526 
527     request = PCMPI_CREATE;
528     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
529     PetscCall(PCMPICreate(pc));
530     newmatrix = PETSC_TRUE;
531   }
532   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
533 
534   if (newmatrix) {
535     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
536     request = PCMPI_SET_MAT;
537     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
538     PetscCall(PCMPISetMat(pc));
539   } else {
540     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
541     request = PCMPI_UPDATE_MAT_VALUES;
542     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
543     PetscCall(PCMPIUpdateMatValues(pc));
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
549 {
550   PCMPICommand request = PCMPI_SOLVE;
551 
552   PetscFunctionBegin;
553   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
554   PetscCall(PCMPISolve(pc, b, x));
555   PetscFunctionReturn(0);
556 }
557 
558 PetscErrorCode PCDestroy_MPI(PC pc)
559 {
560   PCMPICommand request = PCMPI_DESTROY;
561 
562   PetscFunctionBegin;
563   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
564   PetscCall(PCMPIDestroy(pc));
565   PetscCall(PetscFree(pc->data));
566   PetscFunctionReturn(0);
567 }
568 
569 /*
570      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
571 */
572 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
573 {
574   PC_MPI     *km = (PC_MPI *)pc->data;
575   MPI_Comm    comm;
576   PetscMPIInt size;
577   const char *prefix;
578 
579   PetscFunctionBegin;
580   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
581   PetscCallMPI(MPI_Comm_size(comm, &size));
582   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
583   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
584   PetscCall(PCGetOptionsPrefix(pc, &prefix));
585   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix));
586   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n"));
587   PetscFunctionReturn(0);
588 }
589 
590 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
591 {
592   PC_MPI *km = (PC_MPI *)pc->data;
593 
594   PetscFunctionBegin;
595   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
596   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
597   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
598   PetscOptionsHeadEnd();
599   PetscFunctionReturn(0);
600 }
601 
602 /*MC
603      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
604 
605    Level: beginner
606 
607    Options Database Keys:
608 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
609 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
610 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
611 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
612 
613    Notes:
614    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
615 
616    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
617    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
618 
619    It can be particularly useful for user OpenMP code or potentially user GPU code.
620 
621    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
622    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
623 
624 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
625 M*/
626 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
627 {
628   PC_MPI *km;
629 
630   PetscFunctionBegin;
631   PetscCall(PetscNew(&km));
632   pc->data = (void *)km;
633 
634   km->mincntperrank = 10000;
635 
636   pc->ops->setup          = PCSetUp_MPI;
637   pc->ops->apply          = PCApply_MPI;
638   pc->ops->destroy        = PCDestroy_MPI;
639   pc->ops->view           = PCView_MPI;
640   pc->ops->setfromoptions = PCSetFromOptions_MPI;
641   PetscFunctionReturn(0);
642 }
643