xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 4568237604e340b5e35202a3c58b4896aff61a77)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 
18 #define PC_MPI_MAX_RANKS  256
19 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20 
21 typedef struct {
22   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
23   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
24   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
25   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
26   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
27 } PC_MPI;
28 
29 typedef enum {
30   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
31   PCMPI_CREATE,
32   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
33   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
34   PCMPI_SOLVE,
35   PCMPI_VIEW,
36   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
37 } PCMPICommand;
38 
39 static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
40 static PetscBool     PCMPICommSet = PETSC_FALSE;
41 static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
42 static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
43 static PetscLogEvent EventServerDist;
44 
45 static PetscErrorCode PCMPICommsCreate(void)
46 {
47   MPI_Comm    comm = PC_MPI_COMM_WORLD;
48   PetscMPIInt size, rank, i;
49 
50   PetscFunctionBegin;
51   PetscCallMPI(MPI_Comm_size(comm, &size));
52   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
53   PetscCallMPI(MPI_Comm_rank(comm, &rank));
54   /* comm for size 1 is useful only for debugging */
55   for (i = 0; i < size; i++) {
56     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
57     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
58     PCMPISolveCounts[i] = 0;
59     PCMPIKSPCounts[i]   = 0;
60     PCMPIIterations[i]  = 0;
61     PCMPISizes[i]       = 0;
62   }
63   PCMPICommSet = PETSC_TRUE;
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 PetscErrorCode PCMPICommsDestroy(void)
68 {
69   MPI_Comm    comm = PC_MPI_COMM_WORLD;
70   PetscMPIInt size, rank, i;
71 
72   PetscFunctionBegin;
73   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
74   PetscCallMPI(MPI_Comm_size(comm, &size));
75   PetscCallMPI(MPI_Comm_rank(comm, &rank));
76   for (i = 0; i < size; i++) {
77     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
78   }
79   PCMPICommSet = PETSC_FALSE;
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode PCMPICreate(PC pc)
84 {
85   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
86   MPI_Comm    comm = PC_MPI_COMM_WORLD;
87   KSP         ksp;
88   PetscInt    N[2], mincntperrank = 0;
89   PetscMPIInt size;
90   Mat         sA;
91   char       *cprefix = NULL;
92   PetscMPIInt len     = 0;
93 
94   PetscFunctionBegin;
95   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
96   PetscCallMPI(MPI_Comm_size(comm, &size));
97   if (pc) {
98     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
99     PetscCall(PCGetOperators(pc, &sA, &sA));
100     PetscCall(MatGetSize(sA, &N[0], &N[1]));
101   }
102   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
103 
104   /* choose a suitable sized MPI_Comm for the problem to be solved on */
105   if (km) mincntperrank = km->mincntperrank;
106   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
107   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
108   if (comm == MPI_COMM_NULL) {
109     ksp = NULL;
110     PetscFunctionReturn(PETSC_SUCCESS);
111   }
112   PetscCall(PetscLogStagePush(PCMPIStage));
113   PetscCall(KSPCreate(comm, &ksp));
114   PetscCall(KSPSetNestLevel(ksp, 1));
115   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
116   PetscCall(PetscLogStagePop());
117   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
118   if (pc) {
119     size_t      slen;
120     const char *prefix = NULL;
121     char       *found  = NULL;
122 
123     PetscCallMPI(MPI_Comm_size(comm, &size));
124     PCMPIKSPCounts[size - 1]++;
125     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
126     PetscCall(PCGetOptionsPrefix(pc, &prefix));
127     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
128     PetscCall(PetscStrallocpy(prefix, &cprefix));
129     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
130     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
131     *found = 0;
132     PetscCall(PetscStrlen(cprefix, &slen));
133     len = (PetscMPIInt)slen;
134   }
135   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
136   if (len) {
137     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
138     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
139     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
140   }
141   PetscCall(PetscFree(cprefix));
142   PetscFunctionReturn(PETSC_SUCCESS);
143 }
144 
145 static PetscErrorCode PCMPISetMat(PC pc)
146 {
147   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
148   Mat                A;
149   PetscInt           m, n, *ia, *ja, j, bs;
150   Mat                sA;
151   MPI_Comm           comm = PC_MPI_COMM_WORLD;
152   KSP                ksp;
153   PetscLayout        layout;
154   const PetscInt    *IA = NULL, *JA = NULL;
155   const PetscInt    *range;
156   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
157   PetscScalar       *a;
158   const PetscScalar *sa               = NULL;
159   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
160   char              *cprefix;
161 
162   PetscFunctionBegin;
163   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
164   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
165   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
166   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
167   if (pc) {
168     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
169     const char *prefix;
170     size_t      clen;
171 
172     PetscCallMPI(MPI_Comm_size(comm, &size));
173     PCMPIMatCounts[size - 1]++;
174     PetscCall(PCGetOperators(pc, &sA, &sA));
175     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
176     PetscCall(MatGetBlockSize(sA, &bs));
177     matproperties[2] = bs;
178     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
179     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
180     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
181     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
182     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
183     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
184     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
185     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
186     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
187     PetscCall(MatGetOptionsPrefix(sA, &prefix));
188     PetscCall(PetscStrallocpy(prefix, &cprefix));
189     PetscCall(PetscStrlen(cprefix, &clen));
190     matproperties[7] = (PetscInt)clen;
191   }
192   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
193 
194   /* determine ownership ranges of matrix columns */
195   PetscCall(PetscLayoutCreate(comm, &layout));
196   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
197   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
198   PetscCall(PetscLayoutSetUp(layout));
199   PetscCall(PetscLayoutGetLocalSize(layout, &n));
200   PetscCall(PetscLayoutDestroy(&layout));
201 
202   /* determine ownership ranges of matrix rows */
203   PetscCall(PetscLayoutCreate(comm, &layout));
204   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
205   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
206   PetscCall(PetscLayoutSetUp(layout));
207   PetscCall(PetscLayoutGetLocalSize(layout, &m));
208 
209   /* copy over the matrix nonzero structure and values */
210   if (pc) {
211     NZ      = km->NZ;
212     NZdispl = km->NZdispl;
213     PetscCall(PetscLayoutGetRanges(layout, &range));
214     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
215     for (i = 0; i < size; i++) {
216       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
217       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
218     }
219     displi[0]  = 0;
220     NZdispl[0] = 0;
221     for (j = 1; j < size; j++) {
222       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
223       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
224     }
225     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
226   }
227   PetscCall(PetscLayoutDestroy(&layout));
228   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
229 
230   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
231   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
232   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
233   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
234 
235   if (pc) {
236     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
237     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
238   }
239 
240   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
241   ia[0] = 0;
242   PetscCall(PetscLogStagePush(PCMPIStage));
243   PetscCall(MatCreate(comm, &A));
244   if (matproperties[7] > 0) {
245     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
246     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
247     PetscCall(MatSetOptionsPrefix(A, cprefix));
248     PetscCall(PetscFree(cprefix));
249   }
250   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
251   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
252   PetscCall(MatSetType(A, MATMPIAIJ));
253   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
254   PetscCall(MatSetBlockSize(A, matproperties[2]));
255 
256   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
257   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
258   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
259   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
260 
261   PetscCall(PetscFree3(ia, ja, a));
262   PetscCall(KSPSetOperators(ksp, A, A));
263   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
264   PetscCall(PetscLogStagePop());
265   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
266     const PetscInt *range;
267 
268     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
269     for (i = 0; i < size; i++) {
270       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
271       km->displ[i]     = (PetscMPIInt)range[i];
272     }
273   }
274   PetscCall(MatDestroy(&A));
275   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
276   PetscCall(KSPSetFromOptions(ksp));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
281 {
282   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
283   KSP                ksp;
284   Mat                sA, A;
285   MPI_Comm           comm = PC_MPI_COMM_WORLD;
286   PetscScalar       *a;
287   PetscCount         nz;
288   const PetscScalar *sa = NULL;
289   PetscMPIInt        size;
290   PetscInt           matproperties[4] = {0, 0, 0, 0};
291 
292   PetscFunctionBegin;
293   if (pc) {
294     PetscCall(PCGetOperators(pc, &sA, &sA));
295     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
296   }
297   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
298   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
299   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
300   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
301   PetscCallMPI(MPI_Comm_size(comm, &size));
302   PCMPIMatCounts[size - 1]++;
303   PetscCall(KSPGetOperators(ksp, NULL, &A));
304   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
305   PetscCall(PetscMalloc1(nz, &a));
306   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
307   if (pc) {
308     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
309 
310     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
311 
312     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
313     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
314     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
315     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
316     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
317     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
318     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
319     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
320   }
321   PetscCall(MatUpdateMPIAIJWithArray(A, a));
322   PetscCall(PetscFree(a));
323   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
324   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
325   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
326   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
327   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
328   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
329   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
334 {
335   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
336   KSP                ksp;
337   MPI_Comm           comm = PC_MPI_COMM_WORLD;
338   const PetscScalar *sb   = NULL, *x;
339   PetscScalar       *b, *sx = NULL;
340   PetscInt           its, n;
341   PetscMPIInt        size;
342 
343   PetscFunctionBegin;
344   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
345   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
346   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
347   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
348 
349   /* TODO: optimize code to not require building counts/displ every time */
350 
351   /* scatterv rhs */
352   PetscCallMPI(MPI_Comm_size(comm, &size));
353   if (pc) {
354     PetscInt N;
355 
356     PCMPISolveCounts[size - 1]++;
357     PetscCall(MatGetSize(pc->pmat, &N, NULL));
358     PCMPISizes[size - 1] += N;
359     PetscCall(VecGetArrayRead(B, &sb));
360   }
361   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
362   PetscCall(VecGetArray(ksp->vec_rhs, &b));
363   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
364   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
365   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
366 
367   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
368   PetscCall(PetscLogStagePush(PCMPIStage));
369   PetscCall(KSPSolve(ksp, NULL, NULL));
370   PetscCall(PetscLogStagePop());
371   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
372   PetscCall(KSPGetIterationNumber(ksp, &its));
373   PCMPIIterations[size - 1] += its;
374 
375   /* gather solution */
376   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
377   if (pc) PetscCall(VecGetArray(X, &sx));
378   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
379   if (pc) PetscCall(VecRestoreArray(X, &sx));
380   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
381   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
382   PetscFunctionReturn(PETSC_SUCCESS);
383 }
384 
385 static PetscErrorCode PCMPIDestroy(PC pc)
386 {
387   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
388   KSP      ksp;
389   MPI_Comm comm = PC_MPI_COMM_WORLD;
390 
391   PetscFunctionBegin;
392   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
393   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
394   PetscCall(PetscLogStagePush(PCMPIStage));
395   PetscCall(KSPDestroy(&ksp));
396   PetscCall(PetscLogStagePop());
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 PetscBool PCMPIServerActive = PETSC_FALSE;
401 
402 /*@C
403   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
404   parallel `KSP` solves and management of parallel `KSP` objects.
405 
406   Logically Collective on all MPI processes except rank 0
407 
408   Options Database Keys:
409 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
410 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
411 
412   Level: developer
413 
414   Note:
415   This is normally started automatically in `PetscInitialize()` when the option is provided
416 
417   See `PCMPI` for information on using the solver with a `KSP` object
418 
419   Developer Notes:
420   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
421   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
422   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
423 
424   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
425 
426   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
427 
428   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
429 
430   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
431   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
432 
433   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
434   all MPI processes but the user callback only runs on one MPI process per node.
435 
436   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
437   the `MPI_Comm` argument from PETSc calls.
438 
439 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
440 @*/
441 PetscErrorCode PCMPIServerBegin(void)
442 {
443   PetscMPIInt rank;
444 
445   PetscFunctionBegin;
446   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
447   if (PetscDefined(USE_SINGLE_LIBRARY)) {
448     PetscCall(VecInitializePackage());
449     PetscCall(MatInitializePackage());
450     PetscCall(DMInitializePackage());
451     PetscCall(PCInitializePackage());
452     PetscCall(KSPInitializePackage());
453     PetscCall(SNESInitializePackage());
454     PetscCall(TSInitializePackage());
455     PetscCall(TaoInitializePackage());
456   }
457   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
458   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
459 
460   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
461   if (rank == 0) {
462     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
463     PCMPIServerActive = PETSC_TRUE;
464     PetscFunctionReturn(PETSC_SUCCESS);
465   }
466 
467   while (PETSC_TRUE) {
468     PCMPICommand request = PCMPI_CREATE;
469     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
470     switch (request) {
471     case PCMPI_CREATE:
472       PetscCall(PCMPICreate(NULL));
473       break;
474     case PCMPI_SET_MAT:
475       PetscCall(PCMPISetMat(NULL));
476       break;
477     case PCMPI_UPDATE_MAT_VALUES:
478       PetscCall(PCMPIUpdateMatValues(NULL));
479       break;
480     case PCMPI_VIEW:
481       // PetscCall(PCMPIView(NULL));
482       break;
483     case PCMPI_SOLVE:
484       PetscCall(PCMPISolve(NULL, NULL, NULL));
485       break;
486     case PCMPI_DESTROY:
487       PetscCall(PCMPIDestroy(NULL));
488       break;
489     case PCMPI_EXIT:
490       PetscCall(PetscFinalize());
491       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
492       break;
493     default:
494       break;
495     }
496   }
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
500 /*@C
501   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
502   parallel KSP solves and management of parallel `KSP` objects.
503 
504   Logically Collective on all MPI ranks except 0
505 
506   Level: developer
507 
508   Note:
509   This is normally ended automatically in `PetscFinalize()` when the option is provided
510 
511 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
512 @*/
513 PetscErrorCode PCMPIServerEnd(void)
514 {
515   PCMPICommand request = PCMPI_EXIT;
516 
517   PetscFunctionBegin;
518   if (PetscGlobalRank == 0) {
519     PetscViewer       viewer = NULL;
520     PetscViewerFormat format;
521 
522     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
523     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
524     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
525     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
526     PetscOptionsEnd();
527     if (viewer) {
528       PetscBool isascii;
529 
530       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
531       if (isascii) {
532         PetscMPIInt size;
533         PetscMPIInt i;
534 
535         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
536         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
537         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
538         if (PCMPIKSPCountsSeq) {
539           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
540         }
541         for (i = 0; i < size; i++) {
542           if (PCMPIKSPCounts[i]) {
543             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
544           }
545         }
546       }
547       PetscCall(PetscViewerDestroy(&viewer));
548     }
549   }
550   PetscCall(PCMPICommsDestroy());
551   PCMPIServerActive = PETSC_FALSE;
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 /*
556     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
557     because, for example, the problem is small. This version is more efficient because it does not require copying any data
558 */
559 static PetscErrorCode PCSetUp_Seq(PC pc)
560 {
561   PC_MPI     *km = (PC_MPI *)pc->data;
562   Mat         sA;
563   const char *prefix;
564   char       *found = NULL, *cprefix;
565 
566   PetscFunctionBegin;
567   PetscCall(PCGetOperators(pc, NULL, &sA));
568   PetscCall(PCGetOptionsPrefix(pc, &prefix));
569   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
570   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
571   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
572 
573   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
574   PetscCall(PCGetOptionsPrefix(pc, &prefix));
575   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
576   PetscCall(PetscStrallocpy(prefix, &cprefix));
577   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
578   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
579   *found = 0;
580   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
581   PetscCall(PetscFree(cprefix));
582 
583   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
584   PetscCall(KSPSetFromOptions(km->ksps[0]));
585   PetscCall(KSPSetUp(km->ksps[0]));
586   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
587   PCMPIKSPCountsSeq++;
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
592 {
593   PC_MPI  *km = (PC_MPI *)pc->data;
594   PetscInt its, n;
595   Mat      A;
596 
597   PetscFunctionBegin;
598   PetscCall(KSPSolve(km->ksps[0], b, x));
599   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
600   PCMPISolveCountsSeq++;
601   PCMPIIterationsSeq += its;
602   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
603   PetscCall(MatGetSize(A, &n, NULL));
604   PCMPISizesSeq += n;
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
608 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
609 {
610   PC_MPI *km = (PC_MPI *)pc->data;
611 
612   PetscFunctionBegin;
613   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
614   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
615   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 static PetscErrorCode PCDestroy_Seq(PC pc)
620 {
621   PC_MPI *km = (PC_MPI *)pc->data;
622 
623   PetscFunctionBegin;
624   PetscCall(KSPDestroy(&km->ksps[0]));
625   PetscCall(PetscFree(pc->data));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 /*
630      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
631      right-hand side to the parallel PC
632 */
633 static PetscErrorCode PCSetUp_MPI(PC pc)
634 {
635   PC_MPI      *km = (PC_MPI *)pc->data;
636   PetscMPIInt  rank, size;
637   PCMPICommand request;
638   PetscBool    newmatrix = PETSC_FALSE;
639 
640   PetscFunctionBegin;
641   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
642   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
643   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
644 
645   if (!pc->setupcalled) {
646     if (!km->alwaysuseserver) {
647       PetscInt n;
648       Mat      sA;
649       /* short circuit for small systems */
650       PetscCall(PCGetOperators(pc, &sA, &sA));
651       PetscCall(MatGetSize(sA, &n, NULL));
652       if (n < 2 * km->mincntperrank - 1 || size == 1) {
653         pc->ops->setup   = NULL;
654         pc->ops->apply   = PCApply_Seq;
655         pc->ops->destroy = PCDestroy_Seq;
656         pc->ops->view    = PCView_Seq;
657         PetscCall(PCSetUp_Seq(pc));
658         PetscFunctionReturn(PETSC_SUCCESS);
659       }
660     }
661 
662     request = PCMPI_CREATE;
663     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
664     PetscCall(PCMPICreate(pc));
665     newmatrix = PETSC_TRUE;
666   }
667   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
668 
669   if (newmatrix) {
670     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
671     request = PCMPI_SET_MAT;
672     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
673     PetscCall(PCMPISetMat(pc));
674   } else {
675     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
676     request = PCMPI_UPDATE_MAT_VALUES;
677     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
678     PetscCall(PCMPIUpdateMatValues(pc));
679   }
680   PetscFunctionReturn(PETSC_SUCCESS);
681 }
682 
683 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
684 {
685   PCMPICommand request = PCMPI_SOLVE;
686 
687   PetscFunctionBegin;
688   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
689   PetscCall(PCMPISolve(pc, b, x));
690   PetscFunctionReturn(PETSC_SUCCESS);
691 }
692 
693 static PetscErrorCode PCDestroy_MPI(PC pc)
694 {
695   PCMPICommand request = PCMPI_DESTROY;
696 
697   PetscFunctionBegin;
698   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
699   PetscCall(PCMPIDestroy(pc));
700   PetscCall(PetscFree(pc->data));
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 /*
705      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
706 */
707 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
708 {
709   PC_MPI     *km = (PC_MPI *)pc->data;
710   MPI_Comm    comm;
711   PetscMPIInt size;
712 
713   PetscFunctionBegin;
714   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
715   PetscCallMPI(MPI_Comm_size(comm, &size));
716   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
717   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
718   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
723 {
724   PC_MPI *km = (PC_MPI *)pc->data;
725 
726   PetscFunctionBegin;
727   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
728   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
729   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
730   PetscOptionsHeadEnd();
731   PetscFunctionReturn(PETSC_SUCCESS);
732 }
733 
734 /*MC
735      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
736 
737    Options Database Keys for the Server:
738 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
739 -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
740 
741    Options Database Keys for a specific `KSP` object
742 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
743 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
744 
745    Level: developer
746 
747    Notes:
748    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
749 
750    It can be particularly useful for user OpenMP code or potentially user GPU code.
751 
752    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
753    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
754 
755    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
756    because they are not the actual solver objects.
757 
758    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
759    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
760 
761    Developer Note:
762    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
763    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
764 
765 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
766 M*/
767 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
768 {
769   PC_MPI *km;
770   char   *found = NULL;
771 
772   PetscFunctionBegin;
773   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
774   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
775 
776   /* material from PCSetType() */
777   PetscTryTypeMethod(pc, destroy);
778   pc->ops->destroy = NULL;
779   pc->data         = NULL;
780 
781   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
782   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
783   pc->modifysubmatrices  = NULL;
784   pc->modifysubmatricesP = NULL;
785   pc->setupcalled        = 0;
786 
787   PetscCall(PetscNew(&km));
788   pc->data = (void *)km;
789 
790   km->mincntperrank = 10000;
791 
792   pc->ops->setup          = PCSetUp_MPI;
793   pc->ops->apply          = PCApply_MPI;
794   pc->ops->destroy        = PCDestroy_MPI;
795   pc->ops->view           = PCView_MPI;
796   pc->ops->setfromoptions = PCSetFromOptions_MPI;
797   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800