xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 0316ec648ec12c0ddd7ba3b11a3407bb7396c5f1)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS],PCMPISizes[PC_MPI_MAX_RANKS],PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(0);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(0);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *prefix;
90   PetscMPIInt len = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(0);
109   }
110   PetscCall(PetscLogStagePush(PCMPIStage));
111   PetscCall(KSPCreate(comm, &ksp));
112   PetscCall(PetscLogStagePop());
113   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
114   if (pc) {
115     size_t slen;
116 
117     PetscCallMPI(MPI_Comm_size(comm, &size));
118     PCMPIKSPCounts[size - 1]++;
119     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
120     PetscCall(PetscStrlen(prefix, &slen));
121     len = (PetscMPIInt)slen;
122   }
123   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
124   if (len) {
125     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
126     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
127     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
128   }
129   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
130   PetscFunctionReturn(0);
131 }
132 
133 static PetscErrorCode PCMPISetMat(PC pc)
134 {
135   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
136   Mat                A;
137   PetscInt           N[2], n, *ia, *ja, j, bs;
138   Mat                sA;
139   MPI_Comm           comm = PC_MPI_COMM_WORLD;
140   KSP                ksp;
141   PetscLayout        layout;
142   const PetscInt    *IA = NULL, *JA = NULL;
143   const PetscInt    *range;
144   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
145   PetscScalar       *a;
146   const PetscScalar *sa = NULL;
147 
148   PetscFunctionBegin;
149   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
150   if (!ksp) PetscFunctionReturn(0);
151   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
152   if (pc) {
153     PetscCallMPI(MPI_Comm_size(comm, &size));
154     PCMPIMatCounts[size - 1]++;
155     PetscCall(PCGetOperators(pc, &sA, &sA));
156     PetscCall(MatGetSize(sA, &N[0], &N[1]));
157     PetscCall(MatGetBlockSize(sA, &bs));
158     /* need to broadcast symmetry flags etc if set */
159   }
160   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
161   PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm));
162 
163   /* determine ownership ranges of matrix */
164   PetscCall(PetscLayoutCreate(comm, &layout));
165   PetscCall(PetscLayoutSetBlockSize(layout, bs));
166   PetscCall(PetscLayoutSetSize(layout, N[0]));
167   PetscCall(PetscLayoutSetUp(layout));
168   PetscCall(PetscLayoutGetLocalSize(layout, &n));
169 
170   /* copy over the matrix nonzero structure and values */
171   if (pc) {
172     NZ      = km->NZ;
173     NZdispl = km->NZdispl;
174     PetscCall(PetscLayoutGetRanges(layout, &range));
175     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
176     for (i = 0; i < size; i++) {
177       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
178       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
179     }
180     displi[0]  = 0;
181     NZdispl[0] = 0;
182     for (j = 1; j < size; j++) {
183       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
184       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
185     }
186     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
187   }
188   PetscCall(PetscLayoutDestroy(&layout));
189   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
190 
191   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
192   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
193   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
194   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
195 
196   if (pc) {
197     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
198     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
199   }
200 
201   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
202   ia[0] = 0;
203   PetscCall(PetscLogStagePush(PCMPIStage));
204   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
205   PetscCall(MatSetBlockSize(A, bs));
206   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
207 
208   PetscCall(PetscFree3(ia, ja, a));
209   PetscCall(KSPSetOperators(ksp, A, A));
210   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
211   PetscCall(PetscLogStagePop());
212   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
213     const PetscInt *range;
214 
215     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
216     for (i = 0; i < size; i++) {
217       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
218       km->displ[i]     = (PetscMPIInt)range[i];
219     }
220   }
221   PetscCall(MatDestroy(&A));
222   PetscCall(KSPSetFromOptions(ksp));
223   PetscFunctionReturn(0);
224 }
225 
226 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
227 {
228   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
229   KSP                ksp;
230   Mat                sA, A;
231   MPI_Comm           comm = PC_MPI_COMM_WORLD;
232   PetscScalar       *a;
233   PetscCount         nz;
234   const PetscScalar *sa = NULL;
235   PetscMPIInt        size;
236 
237   PetscFunctionBegin;
238   if (pc) {
239     PetscCall(PCGetOperators(pc, &sA, &sA));
240     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241   }
242   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
243   if (!ksp) PetscFunctionReturn(0);
244   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
245   PetscCallMPI(MPI_Comm_size(comm, &size));
246   PCMPIMatCounts[size - 1]++;
247   PetscCall(KSPGetOperators(ksp, NULL, &A));
248   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
249   PetscCall(PetscMalloc1(nz, &a));
250   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
251   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
252   PetscCall(MatUpdateMPIAIJWithArray(A, a));
253   PetscCall(PetscFree(a));
254   PetscFunctionReturn(0);
255 }
256 
257 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
258 {
259   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
260   KSP                ksp;
261   MPI_Comm           comm = PC_MPI_COMM_WORLD;
262   const PetscScalar *sb   = NULL, *x;
263   PetscScalar       *b, *sx = NULL;
264   PetscInt           its,n;
265   PetscMPIInt        size;
266 
267   PetscFunctionBegin;
268   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
269   if (!ksp) PetscFunctionReturn(0);
270   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
271 
272   /* TODO: optimize code to not require building counts/displ everytime */
273 
274   /* scatterv rhs */
275   PetscCallMPI(MPI_Comm_size(comm, &size));
276   if (pc) {
277     PetscInt N;
278 
279     PCMPISolveCounts[size - 1]++;
280     PetscCall(MatGetSize(pc->pmat,&N,NULL));;
281     PCMPISizes[size - 1] += N;
282     PetscCall(VecGetArrayRead(B, &sb));
283   }
284   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
285   PetscCall(VecGetArray(ksp->vec_rhs, &b));
286   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
287   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
288   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
289 
290   PetscCall(PetscLogStagePush(PCMPIStage));
291   PetscCall(KSPSolve(ksp, NULL, NULL));
292   PetscCall(PetscLogStagePop());
293   PetscCall(KSPGetIterationNumber(ksp,&its));
294   PCMPIIterations[size - 1] += its;
295 
296   /* gather solution */
297   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
298   if (pc) PetscCall(VecGetArray(X, &sx));
299   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
300   if (pc) PetscCall(VecRestoreArray(X, &sx));
301   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
302   PetscFunctionReturn(0);
303 }
304 
305 static PetscErrorCode PCMPIDestroy(PC pc)
306 {
307   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
308   KSP      ksp;
309   MPI_Comm comm = PC_MPI_COMM_WORLD;
310 
311   PetscFunctionBegin;
312   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
313   if (!ksp) PetscFunctionReturn(0);
314   PetscCall(KSPDestroy(&ksp));
315   PetscFunctionReturn(0);
316 }
317 
318 /*@C
319      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
320      parallel `KSP` solves and management of parallel `KSP` objects.
321 
322      Logically collective on all MPI ranks except 0
323 
324    Options Database Keys:
325 +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
326 -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
327 
328      Note:
329       This is normally started automatically in `PetscInitialize()` when the option is provided
330 
331      Developer Notes:
332        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
333        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
334        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
335 
336        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
337 
338      Level: developer
339 
340 .seealso: `PCMPIServerEnd()`, `PCMPI`
341 @*/
342 PetscErrorCode PCMPIServerBegin(void)
343 {
344   PetscMPIInt rank;
345 
346   PetscFunctionBegin;
347   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
348   if (PetscDefined(USE_SINGLE_LIBRARY)) {
349     PetscCall(VecInitializePackage());
350     PetscCall(MatInitializePackage());
351     PetscCall(DMInitializePackage());
352     PetscCall(PCInitializePackage());
353     PetscCall(KSPInitializePackage());
354     PetscCall(SNESInitializePackage());
355     PetscCall(TSInitializePackage());
356     PetscCall(TaoInitializePackage());
357   }
358 
359   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
360   if (rank == 0) {
361     PETSC_COMM_WORLD = PETSC_COMM_SELF;
362     PetscFunctionReturn(0);
363   }
364 
365   while (PETSC_TRUE) {
366     PCMPICommand request = PCMPI_CREATE;
367     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
368     switch (request) {
369     case PCMPI_CREATE:
370       PetscCall(PCMPICreate(NULL));
371       break;
372     case PCMPI_SET_MAT:
373       PetscCall(PCMPISetMat(NULL));
374       break;
375     case PCMPI_UPDATE_MAT_VALUES:
376       PetscCall(PCMPIUpdateMatValues(NULL));
377       break;
378     case PCMPI_VIEW:
379       // PetscCall(PCMPIView(NULL));
380       break;
381     case PCMPI_SOLVE:
382       PetscCall(PCMPISolve(NULL, NULL, NULL));
383       break;
384     case PCMPI_DESTROY:
385       PetscCall(PCMPIDestroy(NULL));
386       break;
387     case PCMPI_EXIT:
388       PetscCall(PetscFinalize());
389       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
390       break;
391     default:
392       break;
393     }
394   }
395   PetscFunctionReturn(0);
396 }
397 
398 /*@C
399      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
400      parallel KSP solves and management of parallel `KSP` objects.
401 
402      Logically collective on all MPI ranks except 0
403 
404      Note:
405       This is normally ended automatically in `PetscFinalize()` when the option is provided
406 
407      Level: developer
408 
409 .seealso: `PCMPIServerBegin()`, `PCMPI`
410 @*/
411 PetscErrorCode PCMPIServerEnd(void)
412 {
413   PCMPICommand request = PCMPI_EXIT;
414 
415   PetscFunctionBegin;
416   if (PetscGlobalRank == 0) {
417     PetscViewer       viewer = NULL;
418     PetscViewerFormat format;
419 
420     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
421     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
422     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
423     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
424     PetscOptionsEnd();
425     if (viewer) {
426       PetscBool isascii;
427 
428       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
429       if (isascii) {
430         PetscMPIInt size;
431         PetscMPIInt i;
432 
433         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
434         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
435         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
436         if (PCMPIKSPCountsSeq) {
437           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq,  PCMPIKSPCountsSeq,PCMPISizesSeq/PCMPISolveCountsSeq,PCMPIIterationsSeq/PCMPISolveCountsSeq));
438         }
439         for (i = 0; i < size; i++) {
440           if (PCMPIKSPCounts[i]) {
441             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT  "            %" PetscInt_FMT  "            %" PetscInt_FMT  "\n", i+1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i]/PCMPISolveCounts[i],PCMPIIterations[i]/PCMPISolveCounts[i]));
442           }
443         }
444       }
445       PetscCall(PetscViewerDestroy(&viewer));
446     }
447   }
448   PetscCall(PCMPICommsDestroy());
449   PetscFunctionReturn(0);
450 }
451 
452 /*
453     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
454     because, for example, the problem is small. This version is more efficient because it does not require copying any data
455 */
456 static PetscErrorCode PCSetUp_Seq(PC pc)
457 {
458   PC_MPI     *km = (PC_MPI *)pc->data;
459   Mat         sA;
460   const char *prefix;
461 
462   PetscFunctionBegin;
463   PetscCall(PCGetOperators(pc, NULL, &sA));
464   PetscCall(PCGetOptionsPrefix(pc, &prefix));
465   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
466   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
467   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
468   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
469   PetscCall(KSPSetFromOptions(km->ksps[0]));
470   PetscCall(KSPSetUp(km->ksps[0]));
471   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
472   PCMPIKSPCountsSeq++;
473   PetscFunctionReturn(0);
474 }
475 
476 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
477 {
478   PC_MPI   *km = (PC_MPI *)pc->data;
479   PetscInt its,n;
480   Mat      A;
481 
482   PetscFunctionBegin;
483   PetscCall(KSPSolve(km->ksps[0], b, x));
484   PetscCall(KSPGetIterationNumber(km->ksps[0],&its));
485   PCMPISolveCountsSeq++;
486   PCMPIIterationsSeq += its;
487   PetscCall(KSPGetOperators(km->ksps[0],NULL,&A));
488   PetscCall(MatGetSize(A,&n,NULL));
489   PCMPISizesSeq += n;
490   PetscFunctionReturn(0);
491 }
492 
493 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
494 {
495   PC_MPI     *km = (PC_MPI *)pc->data;
496   const char *prefix;
497 
498   PetscFunctionBegin;
499   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
500   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
501   PetscCall(PCGetOptionsPrefix(pc, &prefix));
502   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
503   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
504   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
505   PetscFunctionReturn(0);
506 }
507 
508 static PetscErrorCode PCDestroy_Seq(PC pc)
509 {
510   PC_MPI *km = (PC_MPI *)pc->data;
511 
512   PetscFunctionBegin;
513   PetscCall(KSPDestroy(&km->ksps[0]));
514   PetscCall(PetscFree(pc->data));
515   PetscFunctionReturn(0);
516 }
517 
518 /*
519      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
520      right hand side to the parallel PC
521 */
522 static PetscErrorCode PCSetUp_MPI(PC pc)
523 {
524   PC_MPI      *km = (PC_MPI *)pc->data;
525   PetscMPIInt  rank, size;
526   PCMPICommand request;
527   PetscBool    newmatrix = PETSC_FALSE;
528 
529   PetscFunctionBegin;
530   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
531   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
532   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
533 
534   if (!pc->setupcalled) {
535     if (!km->alwaysuseserver) {
536       PetscInt n;
537       Mat      sA;
538       /* short circuit for small systems */
539       PetscCall(PCGetOperators(pc, &sA, &sA));
540       PetscCall(MatGetSize(sA, &n, NULL));
541       if (n < 2 * km->mincntperrank - 1 || size == 1) {
542         pc->ops->setup   = NULL;
543         pc->ops->apply   = PCApply_Seq;
544         pc->ops->destroy = PCDestroy_Seq;
545         pc->ops->view    = PCView_Seq;
546         PetscCall(PCSetUp_Seq(pc));
547         PetscFunctionReturn(0);
548       }
549     }
550 
551     request = PCMPI_CREATE;
552     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
553     PetscCall(PCMPICreate(pc));
554     newmatrix = PETSC_TRUE;
555   }
556   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
557 
558   if (newmatrix) {
559     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
560     request = PCMPI_SET_MAT;
561     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
562     PetscCall(PCMPISetMat(pc));
563   } else {
564     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
565     request = PCMPI_UPDATE_MAT_VALUES;
566     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
567     PetscCall(PCMPIUpdateMatValues(pc));
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
573 {
574   PCMPICommand request = PCMPI_SOLVE;
575 
576   PetscFunctionBegin;
577   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
578   PetscCall(PCMPISolve(pc, b, x));
579   PetscFunctionReturn(0);
580 }
581 
582 PetscErrorCode PCDestroy_MPI(PC pc)
583 {
584   PCMPICommand request = PCMPI_DESTROY;
585 
586   PetscFunctionBegin;
587   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
588   PetscCall(PCMPIDestroy(pc));
589   PetscCall(PetscFree(pc->data));
590   PetscFunctionReturn(0);
591 }
592 
593 /*
594      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
595 */
596 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
597 {
598   PC_MPI     *km = (PC_MPI *)pc->data;
599   MPI_Comm    comm;
600   PetscMPIInt size;
601   const char *prefix;
602 
603   PetscFunctionBegin;
604   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
605   PetscCallMPI(MPI_Comm_size(comm, &size));
606   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
607   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
608   PetscCall(PCGetOptionsPrefix(pc, &prefix));
609   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
610   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
611   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));  PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
615 {
616   PC_MPI *km = (PC_MPI *)pc->data;
617 
618   PetscFunctionBegin;
619   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
620   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
621   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
622   PetscOptionsHeadEnd();
623   PetscFunctionReturn(0);
624 }
625 
626 /*MC
627      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
628 
629    Level: beginner
630 
631    Options Database Keys:
632 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
633 .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
634 .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
635 -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
636 
637    Notes:
638    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
639 
640    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
641    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
642 
643    It can be particularly useful for user OpenMP code or potentially user GPU code.
644 
645    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
646    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
647 
648    The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
649    because they are not the actual solver objects.
650 
651    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
652    stages will be confusing since the event times are are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
653 
654 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
655 M*/
656 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
657 {
658   PC_MPI *km;
659 
660   PetscFunctionBegin;
661   PetscCall(PetscNew(&km));
662   pc->data = (void *)km;
663 
664   km->mincntperrank = 10000;
665 
666   pc->ops->setup          = PCSetUp_MPI;
667   pc->ops->apply          = PCApply_MPI;
668   pc->ops->destroy        = PCDestroy_MPI;
669   pc->ops->view           = PCView_MPI;
670   pc->ops->setfromoptions = PCSetFromOptions_MPI;
671   PetscFunctionReturn(0);
672 }
673