xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 5316cbed53243aa6ff2aea37634c8efbf95d18cc)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS],PCMPISizes[PC_MPI_MAX_RANKS],PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(0);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(0);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *prefix;
90   PetscMPIInt len = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(0);
109   }
110   PetscCall(KSPCreate(comm, &ksp));
111   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
112   if (pc) {
113     size_t slen;
114 
115     PetscCallMPI(MPI_Comm_size(comm, &size));
116     PCMPIKSPCounts[size - 1]++;
117     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
118     PetscCall(PetscStrlen(prefix, &slen));
119     len = (PetscMPIInt)slen;
120   }
121   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
122   if (len) {
123     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
124     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
125     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
126   }
127   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
128   PetscFunctionReturn(0);
129 }
130 
131 static PetscErrorCode PCMPISetMat(PC pc)
132 {
133   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
134   Mat                A;
135   PetscInt           N[2], n, *ia, *ja, j, bs;
136   Mat                sA;
137   MPI_Comm           comm = PC_MPI_COMM_WORLD;
138   KSP                ksp;
139   PetscLayout        layout;
140   const PetscInt    *IA = NULL, *JA = NULL;
141   const PetscInt    *range;
142   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
143   PetscScalar       *a;
144   const PetscScalar *sa = NULL;
145 
146   PetscFunctionBegin;
147   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
148   if (!ksp) PetscFunctionReturn(0);
149   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
150   if (pc) {
151     PetscCallMPI(MPI_Comm_size(comm, &size));
152     PCMPIMatCounts[size - 1]++;
153     PetscCall(PCGetOperators(pc, &sA, &sA));
154     PetscCall(MatGetSize(sA, &N[0], &N[1]));
155     PetscCall(MatGetBlockSize(sA, &bs));
156     /* need to broadcast symmetry flags etc if set */
157   }
158   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
159   PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm));
160 
161   /* determine ownership ranges of matrix */
162   PetscCall(PetscLayoutCreate(comm, &layout));
163   PetscCall(PetscLayoutSetBlockSize(layout, bs));
164   PetscCall(PetscLayoutSetSize(layout, N[0]));
165   PetscCall(PetscLayoutSetUp(layout));
166   PetscCall(PetscLayoutGetLocalSize(layout, &n));
167 
168   /* copy over the matrix nonzero structure and values */
169   if (pc) {
170     NZ      = km->NZ;
171     NZdispl = km->NZdispl;
172     PetscCall(PetscLayoutGetRanges(layout, &range));
173     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
174     for (i = 0; i < size; i++) {
175       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
176       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
177     }
178     displi[0]  = 0;
179     NZdispl[0] = 0;
180     for (j = 1; j < size; j++) {
181       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
182       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
183     }
184     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
185   }
186   PetscCall(PetscLayoutDestroy(&layout));
187   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
188 
189   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
190   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
191   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
192   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
193 
194   if (pc) {
195     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
196     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
197   }
198 
199   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
200   ia[0] = 0;
201   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
202   PetscCall(MatSetBlockSize(A, bs));
203   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
204 
205   PetscCall(PetscFree3(ia, ja, a));
206   PetscCall(KSPSetOperators(ksp, A, A));
207   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
208   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
209     const PetscInt *range;
210 
211     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
212     for (i = 0; i < size; i++) {
213       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
214       km->displ[i]     = (PetscMPIInt)range[i];
215     }
216   }
217   PetscCall(MatDestroy(&A));
218   PetscCall(KSPSetFromOptions(ksp));
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
223 {
224   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
225   KSP                ksp;
226   Mat                sA, A;
227   MPI_Comm           comm = PC_MPI_COMM_WORLD;
228   PetscScalar       *a;
229   PetscCount         nz;
230   const PetscScalar *sa = NULL;
231   PetscMPIInt        size;
232 
233   PetscFunctionBegin;
234   if (pc) {
235     PetscCall(PCGetOperators(pc, &sA, &sA));
236     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
237   }
238   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
239   if (!ksp) PetscFunctionReturn(0);
240   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
241   PetscCallMPI(MPI_Comm_size(comm, &size));
242   PCMPIMatCounts[size - 1]++;
243   PetscCall(KSPGetOperators(ksp, NULL, &A));
244   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
245   PetscCall(PetscMalloc1(nz, &a));
246   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
247   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
248   PetscCall(MatUpdateMPIAIJWithArray(A, a));
249   PetscCall(PetscFree(a));
250   PetscFunctionReturn(0);
251 }
252 
253 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
254 {
255   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
256   KSP                ksp;
257   MPI_Comm           comm = PC_MPI_COMM_WORLD;
258   const PetscScalar *sb   = NULL, *x;
259   PetscScalar       *b, *sx = NULL;
260   PetscInt           its,n;
261   PetscMPIInt        size;
262 
263   PetscFunctionBegin;
264   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
265   if (!ksp) PetscFunctionReturn(0);
266   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
267 
268   /* TODO: optimize code to not require building counts/displ everytime */
269 
270   /* scatterv rhs */
271   PetscCallMPI(MPI_Comm_size(comm, &size));
272   if (pc) {
273     PetscInt N;
274 
275     PCMPISolveCounts[size - 1]++;
276     PetscCall(MatGetSize(pc->pmat,&N,NULL));;
277     PCMPISizes[size - 1] += N;
278     PetscCall(VecGetArrayRead(B, &sb));
279   }
280   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
281   PetscCall(VecGetArray(ksp->vec_rhs, &b));
282   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
283   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
284   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
285 
286   PetscCall(KSPSolve(ksp, NULL, NULL));
287   PetscCall(KSPGetIterationNumber(ksp,&its));
288   PCMPIIterations[size - 1] += its;
289 
290   /* gather solution */
291   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
292   if (pc) PetscCall(VecGetArray(X, &sx));
293   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
294   if (pc) PetscCall(VecRestoreArray(X, &sx));
295   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
296   PetscFunctionReturn(0);
297 }
298 
299 static PetscErrorCode PCMPIDestroy(PC pc)
300 {
301   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
302   KSP      ksp;
303   MPI_Comm comm = PC_MPI_COMM_WORLD;
304 
305   PetscFunctionBegin;
306   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
307   if (!ksp) PetscFunctionReturn(0);
308   PetscCall(KSPDestroy(&ksp));
309   PetscFunctionReturn(0);
310 }
311 
312 /*@C
313      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
314      parallel `KSP` solves and management of parallel `KSP` objects.
315 
316      Logically collective on all MPI ranks except 0
317 
318    Options Database Keys:
319 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
320 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
321 
322      Note:
323       This is normally started automatically in `PetscInitialize()` when the option is provided
324 
325      Developer Notes:
326        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
327        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
328        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
329 
330        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
331 
332      Level: developer
333 
334 .seealso: `PCMPIServerEnd()`, `PCMPI`
335 @*/
336 PetscErrorCode PCMPIServerBegin(void)
337 {
338   PetscMPIInt rank;
339 
340   PetscFunctionBegin;
341   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
342   if (PetscDefined(USE_SINGLE_LIBRARY)) {
343     PetscCall(VecInitializePackage());
344     PetscCall(MatInitializePackage());
345     PetscCall(DMInitializePackage());
346     PetscCall(PCInitializePackage());
347     PetscCall(KSPInitializePackage());
348     PetscCall(SNESInitializePackage());
349     PetscCall(TSInitializePackage());
350     PetscCall(TaoInitializePackage());
351   }
352 
353   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
354   if (rank == 0) {
355     PETSC_COMM_WORLD = PETSC_COMM_SELF;
356     PetscFunctionReturn(0);
357   }
358 
359   while (PETSC_TRUE) {
360     PCMPICommand request = PCMPI_CREATE;
361     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
362     switch (request) {
363     case PCMPI_CREATE:
364       PetscCall(PCMPICreate(NULL));
365       break;
366     case PCMPI_SET_MAT:
367       PetscCall(PCMPISetMat(NULL));
368       break;
369     case PCMPI_UPDATE_MAT_VALUES:
370       PetscCall(PCMPIUpdateMatValues(NULL));
371       break;
372     case PCMPI_VIEW:
373       // PetscCall(PCMPIView(NULL));
374       break;
375     case PCMPI_SOLVE:
376       PetscCall(PCMPISolve(NULL, NULL, NULL));
377       break;
378     case PCMPI_DESTROY:
379       PetscCall(PCMPIDestroy(NULL));
380       break;
381     case PCMPI_EXIT:
382       PetscCall(PetscFinalize());
383       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
384       break;
385     default:
386       break;
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
394      parallel KSP solves and management of parallel `KSP` objects.
395 
396      Logically collective on all MPI ranks except 0
397 
398      Note:
399       This is normally ended automatically in `PetscFinalize()` when the option is provided
400 
401      Level: developer
402 
403 .seealso: `PCMPIServerBegin()`, `PCMPI`
404 @*/
405 PetscErrorCode PCMPIServerEnd(void)
406 {
407   PCMPICommand request = PCMPI_EXIT;
408 
409   PetscFunctionBegin;
410   if (PetscGlobalRank == 0) {
411     PetscViewer       viewer = NULL;
412     PetscViewerFormat format;
413 
414     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
415     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
416     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
417     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
418     PetscOptionsEnd();
419     if (viewer) {
420       PetscBool isascii;
421 
422       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
423       if (isascii) {
424         PetscMPIInt size;
425         PetscMPIInt i;
426 
427         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
428         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
429         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
430         if (PCMPIKSPCountsSeq) {
431           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq,  PCMPIKSPCountsSeq,PCMPISizesSeq/PCMPISolveCountsSeq,PCMPIIterationsSeq/PCMPISolveCountsSeq));
432         }
433         for (i = 0; i < size; i++) {
434           if (PCMPIKSPCounts[i]) {
435             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT  "            %" PetscInt_FMT  "            %" PetscInt_FMT  "\n", i+1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i]/PCMPISolveCounts[i],PCMPIIterations[i]/PCMPISolveCounts[i]));
436           }
437         }
438       }
439       PetscCall(PetscViewerDestroy(&viewer));
440     }
441   }
442   PetscCall(PCMPICommsDestroy());
443   PetscFunctionReturn(0);
444 }
445 
446 /*
447     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
448     because, for example, the problem is small. This version is more efficient because it does not require copying any data
449 */
450 static PetscErrorCode PCSetUp_Seq(PC pc)
451 {
452   PC_MPI     *km = (PC_MPI *)pc->data;
453   Mat         sA;
454   const char *prefix;
455 
456   PetscFunctionBegin;
457   PetscCall(PCGetOperators(pc, NULL, &sA));
458   PetscCall(PCGetOptionsPrefix(pc, &prefix));
459   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
460   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
461   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
462   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
463   PetscCall(KSPSetFromOptions(km->ksps[0]));
464   PetscCall(KSPSetUp(km->ksps[0]));
465   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
466   PCMPIKSPCountsSeq++;
467   PetscFunctionReturn(0);
468 }
469 
470 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
471 {
472   PC_MPI   *km = (PC_MPI *)pc->data;
473   PetscInt its,n;
474   Mat      A;
475 
476   PetscFunctionBegin;
477   PetscCall(KSPSolve(km->ksps[0], b, x));
478   PetscCall(KSPGetIterationNumber(km->ksps[0],&its));
479   PCMPISolveCountsSeq++;
480   PCMPIIterationsSeq += its;
481   PetscCall(KSPGetOperators(km->ksps[0],NULL,&A));
482   PetscCall(MatGetSize(A,&n,NULL));
483   PCMPISizesSeq += n;
484   PetscFunctionReturn(0);
485 }
486 
487 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
488 {
489   PC_MPI     *km = (PC_MPI *)pc->data;
490   const char *prefix;
491 
492   PetscFunctionBegin;
493   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
494   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
495   PetscCall(PCGetOptionsPrefix(pc, &prefix));
496   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
497   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
498   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
499   PetscFunctionReturn(0);
500 }
501 
502 static PetscErrorCode PCDestroy_Seq(PC pc)
503 {
504   PC_MPI *km = (PC_MPI *)pc->data;
505 
506   PetscFunctionBegin;
507   PetscCall(KSPDestroy(&km->ksps[0]));
508   PetscCall(PetscFree(pc->data));
509   PetscFunctionReturn(0);
510 }
511 
512 /*
513      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
514      right hand side to the parallel PC
515 */
516 static PetscErrorCode PCSetUp_MPI(PC pc)
517 {
518   PC_MPI      *km = (PC_MPI *)pc->data;
519   PetscMPIInt  rank, size;
520   PCMPICommand request;
521   PetscBool    newmatrix = PETSC_FALSE;
522 
523   PetscFunctionBegin;
524   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
525   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
526   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
527 
528   if (!pc->setupcalled) {
529     if (!km->alwaysuseserver) {
530       PetscInt n;
531       Mat      sA;
532       /* short circuit for small systems */
533       PetscCall(PCGetOperators(pc, &sA, &sA));
534       PetscCall(MatGetSize(sA, &n, NULL));
535       if (n < 2 * km->mincntperrank - 1 || size == 1) {
536         pc->ops->setup   = NULL;
537         pc->ops->apply   = PCApply_Seq;
538         pc->ops->destroy = PCDestroy_Seq;
539         pc->ops->view    = PCView_Seq;
540         PetscCall(PCSetUp_Seq(pc));
541         PetscFunctionReturn(0);
542       }
543     }
544 
545     request = PCMPI_CREATE;
546     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
547     PetscCall(PCMPICreate(pc));
548     newmatrix = PETSC_TRUE;
549   }
550   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
551 
552   if (newmatrix) {
553     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
554     request = PCMPI_SET_MAT;
555     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
556     PetscCall(PCMPISetMat(pc));
557   } else {
558     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
559     request = PCMPI_UPDATE_MAT_VALUES;
560     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
561     PetscCall(PCMPIUpdateMatValues(pc));
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
567 {
568   PCMPICommand request = PCMPI_SOLVE;
569 
570   PetscFunctionBegin;
571   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
572   PetscCall(PCMPISolve(pc, b, x));
573   PetscFunctionReturn(0);
574 }
575 
576 PetscErrorCode PCDestroy_MPI(PC pc)
577 {
578   PCMPICommand request = PCMPI_DESTROY;
579 
580   PetscFunctionBegin;
581   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
582   PetscCall(PCMPIDestroy(pc));
583   PetscCall(PetscFree(pc->data));
584   PetscFunctionReturn(0);
585 }
586 
587 /*
588      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
589 */
590 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
591 {
592   PC_MPI     *km = (PC_MPI *)pc->data;
593   MPI_Comm    comm;
594   PetscMPIInt size;
595   const char *prefix;
596 
597   PetscFunctionBegin;
598   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
599   PetscCallMPI(MPI_Comm_size(comm, &size));
600   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
601   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
602   PetscCall(PCGetOptionsPrefix(pc, &prefix));
603   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
604   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
605   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));  PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
609 {
610   PC_MPI *km = (PC_MPI *)pc->data;
611 
612   PetscFunctionBegin;
613   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
614   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
615   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
616   PetscOptionsHeadEnd();
617   PetscFunctionReturn(0);
618 }
619 
620 /*MC
621      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
622 
623    Level: beginner
624 
625    Options Database Keys:
626 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
627 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
628 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
629 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
630 
631    Notes:
632    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
633 
634    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
635    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
636 
637    It can be particularly useful for user OpenMP code or potentially user GPU code.
638 
639    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
640    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
641 
642 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
643 M*/
644 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
645 {
646   PC_MPI *km;
647 
648   PetscFunctionBegin;
649   PetscCall(PetscNew(&km));
650   pc->data = (void *)km;
651 
652   km->mincntperrank = 10000;
653 
654   pc->ops->setup          = PCSetUp_MPI;
655   pc->ops->apply          = PCApply_MPI;
656   pc->ops->destroy        = PCDestroy_MPI;
657   pc->ops->view           = PCView_MPI;
658   pc->ops->setfromoptions = PCSetFromOptions_MPI;
659   PetscFunctionReturn(0);
660 }
661