xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 956255ef4c0066f3bac8fadf6ffb1cdd4f8366a5)
1 /*
2     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4 
5     That program may use OpenMP to compute the right hand side and matrix for the linear system
6 
7     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8 
9     The resulting KSP and PC can only be controlled via the options database, though some common commands
10     could be passed through the server.
11 
12 */
13 #include <petsc/private/pcimpl.h>
14 #include <petsc/private/kspimpl.h>
15 #include <petsc.h>
16 
17 #define PC_MPI_MAX_RANKS  256
18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19 
20 typedef struct {
21   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26 } PC_MPI;
27 
28 typedef enum {
29   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30   PCMPI_CREATE,
31   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33   PCMPI_SOLVE,
34   PCMPI_VIEW,
35   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36 } PCMPICommand;
37 
38 static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39 static PetscBool PCMPICommSet = PETSC_FALSE;
40 static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41 static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42 
43 static PetscErrorCode PCMPICommsCreate(void)
44 {
45   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46   PetscMPIInt size, rank, i;
47 
48   PetscFunctionBegin;
49   PetscCallMPI(MPI_Comm_size(comm, &size));
50   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52   /* comm for size 1 is useful only for debugging */
53   for (i = 0; i < size; i++) {
54     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56     PCMPISolveCounts[i] = 0;
57     PCMPIKSPCounts[i]   = 0;
58     PCMPIIterations[i]  = 0;
59     PCMPISizes[i]       = 0;
60   }
61   PCMPICommSet = PETSC_TRUE;
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
65 PetscErrorCode PCMPICommsDestroy(void)
66 {
67   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68   PetscMPIInt size, rank, i;
69 
70   PetscFunctionBegin;
71   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72   PetscCallMPI(MPI_Comm_size(comm, &size));
73   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74   for (i = 0; i < size; i++) {
75     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76   }
77   PCMPICommSet = PETSC_FALSE;
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 static PetscErrorCode PCMPICreate(PC pc)
82 {
83   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85   KSP         ksp;
86   PetscInt    N[2], mincntperrank = 0;
87   PetscMPIInt size;
88   Mat         sA;
89   char       *cprefix = NULL;
90   PetscMPIInt len     = 0;
91 
92   PetscFunctionBegin;
93   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94   PetscCallMPI(MPI_Comm_size(comm, &size));
95   if (pc) {
96     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97     PetscCall(PCGetOperators(pc, &sA, &sA));
98     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99   }
100   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101 
102   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103   if (km) mincntperrank = km->mincntperrank;
104   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106   if (comm == MPI_COMM_NULL) {
107     ksp = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110   PetscCall(PetscLogStagePush(PCMPIStage));
111   PetscCall(KSPCreate(comm, &ksp));
112   PetscCall(KSPSetNestLevel(ksp, 1));
113   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
114   PetscCall(PetscLogStagePop());
115   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
116   if (pc) {
117     size_t      slen;
118     const char *prefix = NULL;
119     char       *found  = NULL;
120 
121     PetscCallMPI(MPI_Comm_size(comm, &size));
122     PCMPIKSPCounts[size - 1]++;
123     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
124     PetscCall(PCGetOptionsPrefix(pc, &prefix));
125     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
126     PetscCall(PetscStrallocpy(prefix, &cprefix));
127     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
128     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
129     *found = 0;
130     PetscCall(PetscStrlen(cprefix, &slen));
131     len = (PetscMPIInt)slen;
132   }
133   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
134   if (len) {
135     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
136     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
137     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
138   }
139   PetscCall(PetscFree(cprefix));
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 static PetscErrorCode PCMPISetMat(PC pc)
144 {
145   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
146   Mat                A;
147   PetscInt           m, n, *ia, *ja, j, bs;
148   Mat                sA;
149   MPI_Comm           comm = PC_MPI_COMM_WORLD;
150   KSP                ksp;
151   PetscLayout        layout;
152   const PetscInt    *IA = NULL, *JA = NULL;
153   const PetscInt    *range;
154   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
155   PetscScalar       *a;
156   const PetscScalar *sa               = NULL;
157   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
158   char              *cprefix;
159 
160   PetscFunctionBegin;
161   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
162   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
163   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
164   if (pc) {
165     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
166     const char *prefix;
167     size_t      clen;
168 
169     PetscCallMPI(MPI_Comm_size(comm, &size));
170     PCMPIMatCounts[size - 1]++;
171     PetscCall(PCGetOperators(pc, &sA, &sA));
172     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
173     PetscCall(MatGetBlockSize(sA, &bs));
174     matproperties[2] = bs;
175     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
176     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
177     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
178     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
179     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
180     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
181     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
182     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
183     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
184     PetscCall(MatGetOptionsPrefix(sA, &prefix));
185     PetscCall(PetscStrallocpy(prefix, &cprefix));
186     PetscCall(PetscStrlen(cprefix, &clen));
187     matproperties[7] = (PetscInt)clen;
188   }
189   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
190 
191   /* determine ownership ranges of matrix columns */
192   PetscCall(PetscLayoutCreate(comm, &layout));
193   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
194   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
195   PetscCall(PetscLayoutSetUp(layout));
196   PetscCall(PetscLayoutGetLocalSize(layout, &n));
197   PetscCall(PetscLayoutDestroy(&layout));
198 
199   /* determine ownership ranges of matrix rows */
200   PetscCall(PetscLayoutCreate(comm, &layout));
201   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
202   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
203   PetscCall(PetscLayoutSetUp(layout));
204   PetscCall(PetscLayoutGetLocalSize(layout, &m));
205 
206   /* copy over the matrix nonzero structure and values */
207   if (pc) {
208     NZ      = km->NZ;
209     NZdispl = km->NZdispl;
210     PetscCall(PetscLayoutGetRanges(layout, &range));
211     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
212     for (i = 0; i < size; i++) {
213       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
214       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
215     }
216     displi[0]  = 0;
217     NZdispl[0] = 0;
218     for (j = 1; j < size; j++) {
219       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
220       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
221     }
222     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
223   }
224   PetscCall(PetscLayoutDestroy(&layout));
225   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
226 
227   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
228   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
229   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
230   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
231 
232   if (pc) {
233     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
234     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
235   }
236 
237   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
238   ia[0] = 0;
239   PetscCall(PetscLogStagePush(PCMPIStage));
240   PetscCall(MatCreate(comm, &A));
241   if (matproperties[7] > 0) {
242     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
243     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
244     PetscCall(MatSetOptionsPrefix(A, cprefix));
245     PetscCall(PetscFree(cprefix));
246   }
247   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
248   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
249   PetscCall(MatSetType(A, MATMPIAIJ));
250   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
251   PetscCall(MatSetBlockSize(A, matproperties[2]));
252 
253   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
254   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
255   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
256   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
257 
258   PetscCall(PetscFree3(ia, ja, a));
259   PetscCall(KSPSetOperators(ksp, A, A));
260   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
261   PetscCall(PetscLogStagePop());
262   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
263     const PetscInt *range;
264 
265     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
266     for (i = 0; i < size; i++) {
267       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
268       km->displ[i]     = (PetscMPIInt)range[i];
269     }
270   }
271   PetscCall(MatDestroy(&A));
272   PetscCall(KSPSetFromOptions(ksp));
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 
276 static PetscErrorCode PCMPIUpdateMatValues(PC pc)
277 {
278   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
279   KSP                ksp;
280   Mat                sA, A;
281   MPI_Comm           comm = PC_MPI_COMM_WORLD;
282   PetscScalar       *a;
283   PetscCount         nz;
284   const PetscScalar *sa = NULL;
285   PetscMPIInt        size;
286   PetscInt           matproperties[4] = {0, 0, 0, 0};
287 
288   PetscFunctionBegin;
289   if (pc) {
290     PetscCall(PCGetOperators(pc, &sA, &sA));
291     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
292   }
293   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
294   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
295   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
296   PetscCallMPI(MPI_Comm_size(comm, &size));
297   PCMPIMatCounts[size - 1]++;
298   PetscCall(KSPGetOperators(ksp, NULL, &A));
299   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
300   PetscCall(PetscMalloc1(nz, &a));
301   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
302   if (pc) {
303     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
304 
305     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
306 
307     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
308     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
309     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
310     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
311     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
312     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
313     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
314     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
315   }
316   PetscCall(MatUpdateMPIAIJWithArray(A, a));
317   PetscCall(PetscFree(a));
318   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
319   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
320   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
321   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
322   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
323   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
328 {
329   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
330   KSP                ksp;
331   MPI_Comm           comm = PC_MPI_COMM_WORLD;
332   const PetscScalar *sb   = NULL, *x;
333   PetscScalar       *b, *sx = NULL;
334   PetscInt           its, n;
335   PetscMPIInt        size;
336 
337   PetscFunctionBegin;
338   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
339   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
340   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
341 
342   /* TODO: optimize code to not require building counts/displ every time */
343 
344   /* scatterv rhs */
345   PetscCallMPI(MPI_Comm_size(comm, &size));
346   if (pc) {
347     PetscInt N;
348 
349     PCMPISolveCounts[size - 1]++;
350     PetscCall(MatGetSize(pc->pmat, &N, NULL));
351     PCMPISizes[size - 1] += N;
352     PetscCall(VecGetArrayRead(B, &sb));
353   }
354   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
355   PetscCall(VecGetArray(ksp->vec_rhs, &b));
356   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
357   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
358   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
359 
360   PetscCall(PetscLogStagePush(PCMPIStage));
361   PetscCall(KSPSolve(ksp, NULL, NULL));
362   PetscCall(PetscLogStagePop());
363   PetscCall(KSPGetIterationNumber(ksp, &its));
364   PCMPIIterations[size - 1] += its;
365 
366   /* gather solution */
367   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
368   if (pc) PetscCall(VecGetArray(X, &sx));
369   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
370   if (pc) PetscCall(VecRestoreArray(X, &sx));
371   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 static PetscErrorCode PCMPIDestroy(PC pc)
376 {
377   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
378   KSP      ksp;
379   MPI_Comm comm = PC_MPI_COMM_WORLD;
380 
381   PetscFunctionBegin;
382   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
383   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
384   PetscCall(KSPDestroy(&ksp));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 PetscBool PCMPIServerActive = PETSC_FALSE;
389 
390 /*@C
391   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
392   parallel `KSP` solves and management of parallel `KSP` objects.
393 
394   Logically Collective on all MPI processes except rank 0
395 
396   Options Database Keys:
397 + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
398 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
399 
400   Level: developer
401 
402   Note:
403   This is normally started automatically in `PetscInitialize()` when the option is provided
404 
405   See `PCMPI` for information on using the solver with a `KSP` object
406 
407   Developer Notes:
408   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
409   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
410   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
411 
412   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
413 
414   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
415 
416   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
417 
418   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
419   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
420 
421   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
422   all MPI processes but the user callback only runs on one MPI process per node.
423 
424   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
425   the `MPI_Comm` argument from PETSc calls.
426 
427 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
428 @*/
429 PetscErrorCode PCMPIServerBegin(void)
430 {
431   PetscMPIInt rank;
432 
433   PetscFunctionBegin;
434   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
435   if (PetscDefined(USE_SINGLE_LIBRARY)) {
436     PetscCall(VecInitializePackage());
437     PetscCall(MatInitializePackage());
438     PetscCall(DMInitializePackage());
439     PetscCall(PCInitializePackage());
440     PetscCall(KSPInitializePackage());
441     PetscCall(SNESInitializePackage());
442     PetscCall(TSInitializePackage());
443     PetscCall(TaoInitializePackage());
444   }
445   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
446 
447   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
448   if (rank == 0) {
449     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
450     PCMPIServerActive = PETSC_TRUE;
451     PetscFunctionReturn(PETSC_SUCCESS);
452   }
453 
454   while (PETSC_TRUE) {
455     PCMPICommand request = PCMPI_CREATE;
456     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
457     switch (request) {
458     case PCMPI_CREATE:
459       PetscCall(PCMPICreate(NULL));
460       break;
461     case PCMPI_SET_MAT:
462       PetscCall(PCMPISetMat(NULL));
463       break;
464     case PCMPI_UPDATE_MAT_VALUES:
465       PetscCall(PCMPIUpdateMatValues(NULL));
466       break;
467     case PCMPI_VIEW:
468       // PetscCall(PCMPIView(NULL));
469       break;
470     case PCMPI_SOLVE:
471       PetscCall(PCMPISolve(NULL, NULL, NULL));
472       break;
473     case PCMPI_DESTROY:
474       PetscCall(PCMPIDestroy(NULL));
475       break;
476     case PCMPI_EXIT:
477       PetscCall(PetscFinalize());
478       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
479       break;
480     default:
481       break;
482     }
483   }
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
487 /*@C
488   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
489   parallel KSP solves and management of parallel `KSP` objects.
490 
491   Logically Collective on all MPI ranks except 0
492 
493   Level: developer
494 
495   Note:
496   This is normally ended automatically in `PetscFinalize()` when the option is provided
497 
498 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
499 @*/
500 PetscErrorCode PCMPIServerEnd(void)
501 {
502   PCMPICommand request = PCMPI_EXIT;
503 
504   PetscFunctionBegin;
505   if (PetscGlobalRank == 0) {
506     PetscViewer       viewer = NULL;
507     PetscViewerFormat format;
508 
509     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
510     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
511     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
512     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
513     PetscOptionsEnd();
514     if (viewer) {
515       PetscBool isascii;
516 
517       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
518       if (isascii) {
519         PetscMPIInt size;
520         PetscMPIInt i;
521 
522         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
523         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
524         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
525         if (PCMPIKSPCountsSeq) {
526           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
527         }
528         for (i = 0; i < size; i++) {
529           if (PCMPIKSPCounts[i]) {
530             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
531           }
532         }
533       }
534       PetscCall(PetscViewerDestroy(&viewer));
535     }
536   }
537   PetscCall(PCMPICommsDestroy());
538   PCMPIServerActive = PETSC_FALSE;
539   PetscFunctionReturn(PETSC_SUCCESS);
540 }
541 
542 /*
543     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
544     because, for example, the problem is small. This version is more efficient because it does not require copying any data
545 */
546 static PetscErrorCode PCSetUp_Seq(PC pc)
547 {
548   PC_MPI     *km = (PC_MPI *)pc->data;
549   Mat         sA;
550   const char *prefix;
551   char       *found = NULL, *cprefix;
552 
553   PetscFunctionBegin;
554   PetscCall(PCGetOperators(pc, NULL, &sA));
555   PetscCall(PCGetOptionsPrefix(pc, &prefix));
556   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
557   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
558   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
559 
560   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
561   PetscCall(PCGetOptionsPrefix(pc, &prefix));
562   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
563   PetscCall(PetscStrallocpy(prefix, &cprefix));
564   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
565   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
566   *found = 0;
567   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
568   PetscCall(PetscFree(cprefix));
569 
570   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
571   PetscCall(KSPSetFromOptions(km->ksps[0]));
572   PetscCall(KSPSetUp(km->ksps[0]));
573   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
574   PCMPIKSPCountsSeq++;
575   PetscFunctionReturn(PETSC_SUCCESS);
576 }
577 
578 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
579 {
580   PC_MPI  *km = (PC_MPI *)pc->data;
581   PetscInt its, n;
582   Mat      A;
583 
584   PetscFunctionBegin;
585   PetscCall(KSPSolve(km->ksps[0], b, x));
586   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
587   PCMPISolveCountsSeq++;
588   PCMPIIterationsSeq += its;
589   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
590   PetscCall(MatGetSize(A, &n, NULL));
591   PCMPISizesSeq += n;
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
596 {
597   PC_MPI *km = (PC_MPI *)pc->data;
598 
599   PetscFunctionBegin;
600   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
601   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
602   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
603   PetscFunctionReturn(PETSC_SUCCESS);
604 }
605 
606 static PetscErrorCode PCDestroy_Seq(PC pc)
607 {
608   PC_MPI *km = (PC_MPI *)pc->data;
609 
610   PetscFunctionBegin;
611   PetscCall(KSPDestroy(&km->ksps[0]));
612   PetscCall(PetscFree(pc->data));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 /*
617      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
618      right hand side to the parallel PC
619 */
620 static PetscErrorCode PCSetUp_MPI(PC pc)
621 {
622   PC_MPI      *km = (PC_MPI *)pc->data;
623   PetscMPIInt  rank, size;
624   PCMPICommand request;
625   PetscBool    newmatrix = PETSC_FALSE;
626 
627   PetscFunctionBegin;
628   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
629   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
630   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
631 
632   if (!pc->setupcalled) {
633     if (!km->alwaysuseserver) {
634       PetscInt n;
635       Mat      sA;
636       /* short circuit for small systems */
637       PetscCall(PCGetOperators(pc, &sA, &sA));
638       PetscCall(MatGetSize(sA, &n, NULL));
639       if (n < 2 * km->mincntperrank - 1 || size == 1) {
640         pc->ops->setup   = NULL;
641         pc->ops->apply   = PCApply_Seq;
642         pc->ops->destroy = PCDestroy_Seq;
643         pc->ops->view    = PCView_Seq;
644         PetscCall(PCSetUp_Seq(pc));
645         PetscFunctionReturn(PETSC_SUCCESS);
646       }
647     }
648 
649     request = PCMPI_CREATE;
650     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
651     PetscCall(PCMPICreate(pc));
652     newmatrix = PETSC_TRUE;
653   }
654   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
655 
656   if (newmatrix) {
657     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
658     request = PCMPI_SET_MAT;
659     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
660     PetscCall(PCMPISetMat(pc));
661   } else {
662     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
663     request = PCMPI_UPDATE_MAT_VALUES;
664     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
665     PetscCall(PCMPIUpdateMatValues(pc));
666   }
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
671 {
672   PCMPICommand request = PCMPI_SOLVE;
673 
674   PetscFunctionBegin;
675   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
676   PetscCall(PCMPISolve(pc, b, x));
677   PetscFunctionReturn(PETSC_SUCCESS);
678 }
679 
680 static PetscErrorCode PCDestroy_MPI(PC pc)
681 {
682   PCMPICommand request = PCMPI_DESTROY;
683 
684   PetscFunctionBegin;
685   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
686   PetscCall(PCMPIDestroy(pc));
687   PetscCall(PetscFree(pc->data));
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 /*
692      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
693 */
694 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
695 {
696   PC_MPI     *km = (PC_MPI *)pc->data;
697   MPI_Comm    comm;
698   PetscMPIInt size;
699 
700   PetscFunctionBegin;
701   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
702   PetscCallMPI(MPI_Comm_size(comm, &size));
703   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
704   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
705   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
706   PetscFunctionReturn(PETSC_SUCCESS);
707 }
708 
709 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
710 {
711   PC_MPI *km = (PC_MPI *)pc->data;
712 
713   PetscFunctionBegin;
714   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
715   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
716   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
717   PetscOptionsHeadEnd();
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
721 /*MC
722      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
723 
724    Options Database Keys for the Server:
725 +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
726 -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
727 
728    Options Database Keys for a specific `KSP` object
729 +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
730 -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
731 
732    Level: developer
733 
734    Notes:
735    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
736 
737    It can be particularly useful for user OpenMP code or potentially user GPU code.
738 
739    When the program is running with a single MPI process then it directly uses the provided matrix and right hand side
740    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
741 
742    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
743    because they are not the actual solver objects.
744 
745    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
746    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
747 
748    Developer Note:
749    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
750    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
751 
752 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
753 M*/
754 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
755 {
756   PC_MPI *km;
757   char   *found = NULL;
758 
759   PetscFunctionBegin;
760   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
761   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
762 
763   /* material from PCSetType() */
764   PetscTryTypeMethod(pc, destroy);
765   pc->ops->destroy = NULL;
766   pc->data         = NULL;
767 
768   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
769   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
770   pc->modifysubmatrices  = NULL;
771   pc->modifysubmatricesP = NULL;
772   pc->setupcalled        = 0;
773 
774   PetscCall(PetscNew(&km));
775   pc->data = (void *)km;
776 
777   km->mincntperrank = 10000;
778 
779   pc->ops->setup          = PCSetUp_MPI;
780   pc->ops->apply          = PCApply_MPI;
781   pc->ops->destroy        = PCDestroy_MPI;
782   pc->ops->view           = PCView_MPI;
783   pc->ops->setfromoptions = PCSetFromOptions_MPI;
784   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787