xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 8a08536730f649b5abc2c3834e482e04e1316646)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petscts.h>
16 #include <petsctao.h>
17 
18 #define PC_MPI_MAX_RANKS  256
19 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20 
21 typedef struct {
22   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
23   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
24   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
25   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
26   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
27 } PC_MPI;
28 
29 typedef enum {
30   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
31   PCMPI_CREATE,
32   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
33   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
34   PCMPI_SOLVE,
35   PCMPI_VIEW,
36   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
37 } PCMPICommand;
38 
39 static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
40 static PetscBool     PCMPICommSet = PETSC_FALSE;
41 static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
42 static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
43 static PetscLogEvent EventServerDist;
44 
45 static PetscErrorCode PCMPICommsCreate(void)
46 {
47   MPI_Comm    comm = PC_MPI_COMM_WORLD;
48   PetscMPIInt size, rank, i;
49 
50   PetscFunctionBegin;
51   PetscCallMPI(MPI_Comm_size(comm, &size));
52   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
53   PetscCallMPI(MPI_Comm_rank(comm, &rank));
54   /* comm for size 1 is useful only for debugging */
55   for (i = 0; i < size; i++) {
56     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
57     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
58     PCMPISolveCounts[i] = 0;
59     PCMPIKSPCounts[i]   = 0;
60     PCMPIIterations[i]  = 0;
61     PCMPISizes[i]       = 0;
62   }
63   PCMPICommSet = PETSC_TRUE;
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 PetscErrorCode PCMPICommsDestroy(void)
68 {
69   MPI_Comm    comm = PC_MPI_COMM_WORLD;
70   PetscMPIInt size, rank, i;
71 
72   PetscFunctionBegin;
73   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
74   PetscCallMPI(MPI_Comm_size(comm, &size));
75   PetscCallMPI(MPI_Comm_rank(comm, &rank));
76   for (i = 0; i < size; i++) {
77     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
78   }
79   PCMPICommSet = PETSC_FALSE;
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode PCMPICreate(PC pc)
84 {
85   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
86   MPI_Comm    comm = PC_MPI_COMM_WORLD;
87   KSP         ksp;
88   PetscInt    N[2], mincntperrank = 0;
89   PetscMPIInt size;
90   Mat         sA;
91   char       *cprefix = NULL;
92   PetscMPIInt len     = 0;
93 
94   PetscFunctionBegin;
95   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
96   PetscCallMPI(MPI_Comm_size(comm, &size));
97   if (pc) {
98     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
99     PetscCall(PCGetOperators(pc, &sA, &sA));
100     PetscCall(MatGetSize(sA, &N[0], &N[1]));
101   }
102   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
103 
104   /* choose a suitable sized MPI_Comm for the problem to be solved on */
105   if (km) mincntperrank = km->mincntperrank;
106   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
107   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
108   if (comm == MPI_COMM_NULL) {
109     ksp = NULL;
110     PetscFunctionReturn(PETSC_SUCCESS);
111   }
112   PetscCall(PetscLogStagePush(PCMPIStage));
113   PetscCall(KSPCreate(comm, &ksp));
114   PetscCall(KSPSetNestLevel(ksp, 1));
115   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
116   PetscCall(PetscLogStagePop());
117   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
118   if (pc) {
119     size_t      slen;
120     const char *prefix = NULL;
121     char       *found  = NULL;
122 
123     PetscCallMPI(MPI_Comm_size(comm, &size));
124     PCMPIKSPCounts[size - 1]++;
125     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
126     PetscCall(PCGetOptionsPrefix(pc, &prefix));
127     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
128     PetscCall(PetscStrallocpy(prefix, &cprefix));
129     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
130     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
131     *found = 0;
132     PetscCall(PetscStrlen(cprefix, &slen));
133     len = (PetscMPIInt)slen;
134   }
135   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
136   if (len) {
137     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
138     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
139     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
140   }
141   PetscCall(PetscFree(cprefix));
142   PetscFunctionReturn(PETSC_SUCCESS);
143 }
144 
145 static PetscErrorCode PCMPISetMat(PC pc)
146 {
147   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
148   Mat                A;
149   PetscInt           m, n, *ia, *ja, j, bs;
150   Mat                sA;
151   MPI_Comm           comm = PC_MPI_COMM_WORLD;
152   KSP                ksp;
153   PetscLayout        layout;
154   const PetscInt    *IA = NULL, *JA = NULL;
155   const PetscInt    *range;
156   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
157   PetscScalar       *a;
158   const PetscScalar *sa               = NULL;
159   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
160   char              *cprefix;
161 
162   PetscFunctionBegin;
163   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
164   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
165   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
166   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
167   if (pc) {
168     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
169     const char *prefix;
170     size_t      clen;
171 
172     PetscCallMPI(MPI_Comm_size(comm, &size));
173     PCMPIMatCounts[size - 1]++;
174     PetscCall(PCGetOperators(pc, &sA, &sA));
175     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
176     PetscCall(MatGetBlockSize(sA, &bs));
177     matproperties[2] = bs;
178     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
179     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
180     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
181     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
182     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
183     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
184     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
185     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
186     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
187     PetscCall(MatGetOptionsPrefix(sA, &prefix));
188     PetscCall(PetscStrallocpy(prefix, &cprefix));
189     PetscCall(PetscStrlen(cprefix, &clen));
190     matproperties[7] = (PetscInt)clen;
191   }
192   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
193 
194   /* determine ownership ranges of matrix columns */
195   PetscCall(PetscLayoutCreate(comm, &layout));
196   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
197   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
198   PetscCall(PetscLayoutSetUp(layout));
199   PetscCall(PetscLayoutGetLocalSize(layout, &n));
200   PetscCall(PetscLayoutDestroy(&layout));
201 
202   /* determine ownership ranges of matrix rows */
203   PetscCall(PetscLayoutCreate(comm, &layout));
204   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
205   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
206   PetscCall(PetscLayoutSetUp(layout));
207   PetscCall(PetscLayoutGetLocalSize(layout, &m));
208 
209   /* copy over the matrix nonzero structure and values */
210   if (pc) {
211     NZ      = km->NZ;
212     NZdispl = km->NZdispl;
213     PetscCall(PetscLayoutGetRanges(layout, &range));
214     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
215     for (i = 0; i < size; i++) {
216       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
217       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
218     }
219     displi[0]  = 0;
220     NZdispl[0] = 0;
221     for (j = 1; j < size; j++) {
222       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
223       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
224     }
225     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
226   }
227   PetscCall(PetscLayoutDestroy(&layout));
228   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
229 
230   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
231   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
232   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
233   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
234 
235   if (pc) {
236     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
237     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
238   }
239 
240   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
241   ia[0] = 0;
242   PetscCall(PetscLogStagePush(PCMPIStage));
243   PetscCall(MatCreate(comm, &A));
244   if (matproperties[7] > 0) {
245     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
246     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
247     PetscCall(MatSetOptionsPrefix(A, cprefix));
248     PetscCall(PetscFree(cprefix));
249   }
250   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
251   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
252   PetscCall(MatSetType(A, MATMPIAIJ));
253   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
254   PetscCall(MatSetBlockSize(A, matproperties[2]));
255 
256   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
257   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
258   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
259   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
260 
261   PetscCall(PetscFree3(ia, ja, a));
262   PetscCall(KSPSetOperators(ksp, A, A));
263   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
264   PetscCall(PetscLogStagePop());
265   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
266     const PetscInt *range;
267 
268     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
269     for (i = 0; i < size; i++) {
270       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
271       km->displ[i]     = (PetscMPIInt)range[i];
272     }
273   }
274   PetscCall(MatDestroy(&A));
275   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
276   PetscCall(KSPSetFromOptions(ksp));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
281 {
282   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
283   KSP                ksp;
284   Mat                sA, A;
285   MPI_Comm           comm = PC_MPI_COMM_WORLD;
286   PetscScalar       *a;
287   PetscCount         nz;
288   const PetscScalar *sa = NULL;
289   PetscMPIInt        size;
290   PetscInt           matproperties[4] = {0, 0, 0, 0};
291 
292   PetscFunctionBegin;
293   if (pc) {
294     PetscCall(PCGetOperators(pc, &sA, &sA));
295     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
296   }
297   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
298   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
299   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
300   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
301   PetscCallMPI(MPI_Comm_size(comm, &size));
302   PCMPIMatCounts[size - 1]++;
303   PetscCall(KSPGetOperators(ksp, NULL, &A));
304   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
305   PetscCall(PetscMalloc1(nz, &a));
306   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
307   if (pc) {
308     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
309 
310     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
311 
312     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
313     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
314     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
315     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
316     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
317     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
318     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
319     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
320   }
321   PetscCall(MatUpdateMPIAIJWithArray(A, a));
322   PetscCall(PetscFree(a));
323   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
324   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
325   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
326   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
327   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
328   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
329   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
334 {
335   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
336   KSP                ksp;
337   MPI_Comm           comm = PC_MPI_COMM_WORLD;
338   const PetscScalar *sb   = NULL, *x;
339   PetscScalar       *b, *sx = NULL;
340   PetscInt           its, n;
341   PetscMPIInt        size;
342 
343   PetscFunctionBegin;
344   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
345   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
346   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
347   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
348 
349   /* scatterv rhs */
350   PetscCallMPI(MPI_Comm_size(comm, &size));
351   if (pc) {
352     PetscInt N;
353 
354     PCMPISolveCounts[size - 1]++;
355     PetscCall(MatGetSize(pc->pmat, &N, NULL));
356     PCMPISizes[size - 1] += N;
357     PetscCall(VecGetArrayRead(B, &sb));
358   }
359   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
360   PetscCall(VecGetArray(ksp->vec_rhs, &b));
361   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
362   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
363   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
364 
365   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
366   PetscCall(PetscLogStagePush(PCMPIStage));
367   PetscCall(KSPSolve(ksp, NULL, NULL));
368   PetscCall(PetscLogStagePop());
369   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
370   PetscCall(KSPGetIterationNumber(ksp, &its));
371   PCMPIIterations[size - 1] += its;
372 
373   /* gather solution */
374   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
375   if (pc) PetscCall(VecGetArray(X, &sx));
376   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
377   if (pc) PetscCall(VecRestoreArray(X, &sx));
378   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
379   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 
383 static PetscErrorCode PCMPIDestroy(PC pc)
384 {
385   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
386   KSP      ksp;
387   MPI_Comm comm = PC_MPI_COMM_WORLD;
388 
389   PetscFunctionBegin;
390   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
391   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
392   PetscCall(PetscLogStagePush(PCMPIStage));
393   PetscCall(KSPDestroy(&ksp));
394   PetscCall(PetscLogStagePop());
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 PetscBool PCMPIServerActive = PETSC_FALSE;
399 
400 /*@C
401   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
402   parallel `KSP` solves and management of parallel `KSP` objects.
403 
404   Logically Collective on all MPI processes except rank 0
405 
406   Options Database Keys:
407 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
408 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
409 
410   Level: developer
411 
412   Note:
413   This is normally started automatically in `PetscInitialize()` when the option is provided
414 
415   See `PCMPI` for information on using the solver with a `KSP` object
416 
417   Developer Notes:
418   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
419   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
420   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
421 
422   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
423 
424   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
425 
426   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
427 
428   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
429   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
430 
431   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
432   all MPI processes but the user callback only runs on one MPI process per node.
433 
434   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
435   the `MPI_Comm` argument from PETSc calls.
436 
437 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
438 @*/
439 PetscErrorCode PCMPIServerBegin(void)
440 {
441   PetscMPIInt rank;
442 
443   PetscFunctionBegin;
444   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
445   if (PetscDefined(USE_SINGLE_LIBRARY)) {
446     PetscCall(VecInitializePackage());
447     PetscCall(MatInitializePackage());
448     PetscCall(DMInitializePackage());
449     PetscCall(PCInitializePackage());
450     PetscCall(KSPInitializePackage());
451     PetscCall(SNESInitializePackage());
452     PetscCall(TSInitializePackage());
453     PetscCall(TaoInitializePackage());
454   }
455   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
456   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
457 
458   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
459   if (rank == 0) {
460     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
461     PCMPIServerActive = PETSC_TRUE;
462     PetscFunctionReturn(PETSC_SUCCESS);
463   }
464 
465   while (PETSC_TRUE) {
466     PCMPICommand request = PCMPI_CREATE;
467     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
468     switch (request) {
469     case PCMPI_CREATE:
470       PetscCall(PCMPICreate(NULL));
471       break;
472     case PCMPI_SET_MAT:
473       PetscCall(PCMPISetMat(NULL));
474       break;
475     case PCMPI_UPDATE_MAT_VALUES:
476       PetscCall(PCMPIUpdateMatValues(NULL));
477       break;
478     case PCMPI_VIEW:
479       // PetscCall(PCMPIView(NULL));
480       break;
481     case PCMPI_SOLVE:
482       PetscCall(PCMPISolve(NULL, NULL, NULL));
483       break;
484     case PCMPI_DESTROY:
485       PetscCall(PCMPIDestroy(NULL));
486       break;
487     case PCMPI_EXIT:
488       PetscCall(PetscFinalize());
489       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
490       break;
491     default:
492       break;
493     }
494   }
495   PetscFunctionReturn(PETSC_SUCCESS);
496 }
497 
498 /*@C
499   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
500   parallel KSP solves and management of parallel `KSP` objects.
501 
502   Logically Collective on all MPI ranks except 0
503 
504   Level: developer
505 
506   Note:
507   This is normally ended automatically in `PetscFinalize()` when the option is provided
508 
509 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
510 @*/
511 PetscErrorCode PCMPIServerEnd(void)
512 {
513   PCMPICommand request = PCMPI_EXIT;
514 
515   PetscFunctionBegin;
516   if (PetscGlobalRank == 0) {
517     PetscViewer       viewer = NULL;
518     PetscViewerFormat format;
519 
520     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
521     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
522     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
523     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
524     PetscOptionsEnd();
525     if (viewer) {
526       PetscBool isascii;
527 
528       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
529       if (isascii) {
530         PetscMPIInt size;
531         PetscMPIInt i;
532 
533         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
534         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
535         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
536         if (PCMPIKSPCountsSeq) {
537           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
538         }
539         for (i = 0; i < size; i++) {
540           if (PCMPIKSPCounts[i]) {
541             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
542           }
543         }
544       }
545       PetscCall(PetscViewerDestroy(&viewer));
546     }
547   }
548   PetscCall(PCMPICommsDestroy());
549   PCMPIServerActive = PETSC_FALSE;
550   PetscFunctionReturn(PETSC_SUCCESS);
551 }
552 
553 /*
554     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
555     because, for example, the problem is small. This version is more efficient because it does not require copying any data
556 */
557 static PetscErrorCode PCSetUp_Seq(PC pc)
558 {
559   PC_MPI     *km = (PC_MPI *)pc->data;
560   Mat         sA;
561   const char *prefix;
562   char       *found = NULL, *cprefix;
563 
564   PetscFunctionBegin;
565   PetscCall(PCGetOperators(pc, NULL, &sA));
566   PetscCall(PCGetOptionsPrefix(pc, &prefix));
567   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
568   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
569   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
570 
571   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
572   PetscCall(PCGetOptionsPrefix(pc, &prefix));
573   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
574   PetscCall(PetscStrallocpy(prefix, &cprefix));
575   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
576   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
577   *found = 0;
578   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
579   PetscCall(PetscFree(cprefix));
580 
581   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
582   PetscCall(KSPSetFromOptions(km->ksps[0]));
583   PetscCall(KSPSetUp(km->ksps[0]));
584   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
585   PCMPIKSPCountsSeq++;
586   PetscFunctionReturn(PETSC_SUCCESS);
587 }
588 
589 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
590 {
591   PC_MPI  *km = (PC_MPI *)pc->data;
592   PetscInt its, n;
593   Mat      A;
594 
595   PetscFunctionBegin;
596   PetscCall(KSPSolve(km->ksps[0], b, x));
597   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
598   PCMPISolveCountsSeq++;
599   PCMPIIterationsSeq += its;
600   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
601   PetscCall(MatGetSize(A, &n, NULL));
602   PCMPISizesSeq += n;
603   PetscFunctionReturn(PETSC_SUCCESS);
604 }
605 
606 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
607 {
608   PC_MPI *km = (PC_MPI *)pc->data;
609 
610   PetscFunctionBegin;
611   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
612   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
613   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
614   PetscFunctionReturn(PETSC_SUCCESS);
615 }
616 
617 static PetscErrorCode PCDestroy_Seq(PC pc)
618 {
619   PC_MPI *km = (PC_MPI *)pc->data;
620 
621   PetscFunctionBegin;
622   PetscCall(KSPDestroy(&km->ksps[0]));
623   PetscCall(PetscFree(pc->data));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 /*
628      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
629      right-hand side to the parallel PC
630 */
631 static PetscErrorCode PCSetUp_MPI(PC pc)
632 {
633   PC_MPI      *km = (PC_MPI *)pc->data;
634   PetscMPIInt  rank, size;
635   PCMPICommand request;
636   PetscBool    newmatrix = PETSC_FALSE;
637 
638   PetscFunctionBegin;
639   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
640   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
641   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
642 
643   if (!pc->setupcalled) {
644     if (!km->alwaysuseserver) {
645       PetscInt n;
646       Mat      sA;
647       /* short circuit for small systems */
648       PetscCall(PCGetOperators(pc, &sA, &sA));
649       PetscCall(MatGetSize(sA, &n, NULL));
650       if (n < 2 * km->mincntperrank - 1 || size == 1) {
651         pc->ops->setup   = NULL;
652         pc->ops->apply   = PCApply_Seq;
653         pc->ops->destroy = PCDestroy_Seq;
654         pc->ops->view    = PCView_Seq;
655         PetscCall(PCSetUp_Seq(pc));
656         PetscFunctionReturn(PETSC_SUCCESS);
657       }
658     }
659 
660     request = PCMPI_CREATE;
661     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
662     PetscCall(PCMPICreate(pc));
663     newmatrix = PETSC_TRUE;
664   }
665   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
666 
667   if (newmatrix) {
668     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
669     request = PCMPI_SET_MAT;
670     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
671     PetscCall(PCMPISetMat(pc));
672   } else {
673     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
674     request = PCMPI_UPDATE_MAT_VALUES;
675     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
676     PetscCall(PCMPIUpdateMatValues(pc));
677   }
678   PetscFunctionReturn(PETSC_SUCCESS);
679 }
680 
681 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
682 {
683   PCMPICommand request = PCMPI_SOLVE;
684 
685   PetscFunctionBegin;
686   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
687   PetscCall(PCMPISolve(pc, b, x));
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 static PetscErrorCode PCDestroy_MPI(PC pc)
692 {
693   PCMPICommand request = PCMPI_DESTROY;
694 
695   PetscFunctionBegin;
696   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
697   PetscCall(PCMPIDestroy(pc));
698   PetscCall(PetscFree(pc->data));
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 /*
703      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
704 */
705 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
706 {
707   PC_MPI     *km = (PC_MPI *)pc->data;
708   MPI_Comm    comm;
709   PetscMPIInt size;
710 
711   PetscFunctionBegin;
712   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
713   PetscCallMPI(MPI_Comm_size(comm, &size));
714   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
715   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
716   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
717   PetscFunctionReturn(PETSC_SUCCESS);
718 }
719 
720 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
721 {
722   PC_MPI *km = (PC_MPI *)pc->data;
723 
724   PetscFunctionBegin;
725   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
726   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
727   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
728   PetscOptionsHeadEnd();
729   PetscFunctionReturn(PETSC_SUCCESS);
730 }
731 
732 /*MC
733      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
734 
735    Options Database Keys for the Server:
736 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
737 -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
738 
739    Options Database Keys for a specific `KSP` object
740 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
741 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
742 
743    Level: developer
744 
745    Notes:
746    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
747 
748    It can be particularly useful for user OpenMP code or potentially user GPU code.
749 
750    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
751    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
752 
753    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
754    because they are not the actual solver objects.
755 
756    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
757    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
758 
759    Developer Note:
760    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
761    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
762 
763 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
764 M*/
765 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
766 {
767   PC_MPI *km;
768   char   *found = NULL;
769 
770   PetscFunctionBegin;
771   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
772   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
773 
774   /* material from PCSetType() */
775   PetscTryTypeMethod(pc, destroy);
776   pc->ops->destroy = NULL;
777   pc->data         = NULL;
778 
779   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
780   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
781   pc->modifysubmatrices  = NULL;
782   pc->modifysubmatricesP = NULL;
783   pc->setupcalled        = 0;
784 
785   PetscCall(PetscNew(&km));
786   pc->data = (void *)km;
787 
788   km->mincntperrank = 10000;
789 
790   pc->ops->setup          = PCSetUp_MPI;
791   pc->ops->apply          = PCApply_MPI;
792   pc->ops->destroy        = PCDestroy_MPI;
793   pc->ops->view           = PCView_MPI;
794   pc->ops->setfromoptions = PCSetFromOptions_MPI;
795   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
796   PetscFunctionReturn(PETSC_SUCCESS);
797 }
798