xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 5316cbed53243aa6ff2aea37634c8efbf95d18cc)
1f1f2ae84SBarry Smith /*
2f1f2ae84SBarry Smith     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3f1f2ae84SBarry Smith     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4f1f2ae84SBarry Smith 
5f1f2ae84SBarry Smith     That program may use OpenMP to compute the right hand side and matrix for the linear system
6f1f2ae84SBarry Smith 
7f1f2ae84SBarry Smith     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8f1f2ae84SBarry Smith 
9f1f2ae84SBarry Smith     The resulting KSP and PC can only be controlled via the options database, though some common commands
10f1f2ae84SBarry Smith     could be passed through the server.
11f1f2ae84SBarry Smith 
12f1f2ae84SBarry Smith */
13f1f2ae84SBarry Smith #include <petsc/private/pcimpl.h>
14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h>
155e1a0e3cSBarry Smith #include <petsc.h>
16f1f2ae84SBarry Smith 
17f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS  256
18f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19f1f2ae84SBarry Smith 
20f1f2ae84SBarry Smith typedef struct {
21f1f2ae84SBarry Smith   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22f1f2ae84SBarry Smith   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23f1f2ae84SBarry Smith   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24f1f2ae84SBarry Smith   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25f1f2ae84SBarry Smith   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26f1f2ae84SBarry Smith } PC_MPI;
27f1f2ae84SBarry Smith 
289371c9d4SSatish Balay typedef enum {
299371c9d4SSatish Balay   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30f1f2ae84SBarry Smith   PCMPI_CREATE,
31f1f2ae84SBarry Smith   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32f1f2ae84SBarry Smith   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33f1f2ae84SBarry Smith   PCMPI_SOLVE,
34f1f2ae84SBarry Smith   PCMPI_VIEW,
35f1f2ae84SBarry Smith   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36f1f2ae84SBarry Smith } PCMPICommand;
37f1f2ae84SBarry Smith 
38f1f2ae84SBarry Smith static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39f1f2ae84SBarry Smith static PetscBool PCMPICommSet = PETSC_FALSE;
40f1f2ae84SBarry Smith static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41*5316cbedSBarry Smith static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS],PCMPISizes[PC_MPI_MAX_RANKS],PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42f1f2ae84SBarry Smith 
43d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void)
44d71ae5a4SJacob Faibussowitsch {
45f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
47f1f2ae84SBarry Smith 
48f1f2ae84SBarry Smith   PetscFunctionBegin;
49f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
50f1f2ae84SBarry Smith   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52f1f2ae84SBarry Smith   /* comm for size 1 is useful only for debugging */
53f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
54f1f2ae84SBarry Smith     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56f1f2ae84SBarry Smith     PCMPISolveCounts[i] = 0;
57f1f2ae84SBarry Smith     PCMPIKSPCounts[i]   = 0;
58*5316cbedSBarry Smith     PCMPIIterations[i]  = 0;
59*5316cbedSBarry Smith     PCMPISizes[i]       = 0;
60f1f2ae84SBarry Smith   }
61f1f2ae84SBarry Smith   PCMPICommSet = PETSC_TRUE;
62f1f2ae84SBarry Smith   PetscFunctionReturn(0);
63f1f2ae84SBarry Smith }
64f1f2ae84SBarry Smith 
65d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPICommsDestroy(void)
66d71ae5a4SJacob Faibussowitsch {
67f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
69f1f2ae84SBarry Smith 
70f1f2ae84SBarry Smith   PetscFunctionBegin;
71f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscFunctionReturn(0);
72f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
73f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
75f1f2ae84SBarry Smith     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76f1f2ae84SBarry Smith   }
77f1f2ae84SBarry Smith   PCMPICommSet = PETSC_FALSE;
78f1f2ae84SBarry Smith   PetscFunctionReturn(0);
79f1f2ae84SBarry Smith }
80f1f2ae84SBarry Smith 
81d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc)
82d71ae5a4SJacob Faibussowitsch {
83f1f2ae84SBarry Smith   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85f1f2ae84SBarry Smith   KSP         ksp;
86f1f2ae84SBarry Smith   PetscInt    N[2], mincntperrank = 0;
87f1f2ae84SBarry Smith   PetscMPIInt size;
88f1f2ae84SBarry Smith   Mat         sA;
89f1f2ae84SBarry Smith   char       *prefix;
90f1f2ae84SBarry Smith   PetscMPIInt len = 0;
91f1f2ae84SBarry Smith 
92f1f2ae84SBarry Smith   PetscFunctionBegin;
93f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
95f1f2ae84SBarry Smith   if (pc) {
96f1f2ae84SBarry Smith     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
98f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99f1f2ae84SBarry Smith   }
100f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101f1f2ae84SBarry Smith 
102f1f2ae84SBarry Smith   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103f1f2ae84SBarry Smith   if (km) mincntperrank = km->mincntperrank;
104f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105f1f2ae84SBarry Smith   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106f1f2ae84SBarry Smith   if (comm == MPI_COMM_NULL) {
107f1f2ae84SBarry Smith     ksp = NULL;
108f1f2ae84SBarry Smith     PetscFunctionReturn(0);
109f1f2ae84SBarry Smith   }
110f1f2ae84SBarry Smith   PetscCall(KSPCreate(comm, &ksp));
111f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
112f1f2ae84SBarry Smith   if (pc) {
113f1f2ae84SBarry Smith     size_t slen;
114f1f2ae84SBarry Smith 
115f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
116dad3da8eSBarry Smith     PCMPIKSPCounts[size - 1]++;
117f1f2ae84SBarry Smith     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
118f1f2ae84SBarry Smith     PetscCall(PetscStrlen(prefix, &slen));
119f1f2ae84SBarry Smith     len = (PetscMPIInt)slen;
120f1f2ae84SBarry Smith   }
121f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
122f1f2ae84SBarry Smith   if (len) {
12348a46eb9SPierre Jolivet     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
124f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
125f1f2ae84SBarry Smith     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
126f1f2ae84SBarry Smith   }
127f1f2ae84SBarry Smith   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
128f1f2ae84SBarry Smith   PetscFunctionReturn(0);
129f1f2ae84SBarry Smith }
130f1f2ae84SBarry Smith 
131d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc)
132d71ae5a4SJacob Faibussowitsch {
133f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
134f1f2ae84SBarry Smith   Mat                A;
135dd0d27b1SBarry Smith   PetscInt           N[2], n, *ia, *ja, j, bs;
136f1f2ae84SBarry Smith   Mat                sA;
137f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
138f1f2ae84SBarry Smith   KSP                ksp;
139f1f2ae84SBarry Smith   PetscLayout        layout;
140f1f2ae84SBarry Smith   const PetscInt    *IA = NULL, *JA = NULL;
141f1f2ae84SBarry Smith   const PetscInt    *range;
142f1f2ae84SBarry Smith   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
143f1f2ae84SBarry Smith   PetscScalar       *a;
144f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
145f1f2ae84SBarry Smith 
146f1f2ae84SBarry Smith   PetscFunctionBegin;
147f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
148f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
149f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
150f1f2ae84SBarry Smith   if (pc) {
151f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
152dad3da8eSBarry Smith     PCMPIMatCounts[size - 1]++;
153f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
154f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
155dd0d27b1SBarry Smith     PetscCall(MatGetBlockSize(sA, &bs));
156dd0d27b1SBarry Smith     /* need to broadcast symmetry flags etc if set */
157f1f2ae84SBarry Smith   }
158f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
159dd0d27b1SBarry Smith   PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm));
160f1f2ae84SBarry Smith 
161f1f2ae84SBarry Smith   /* determine ownership ranges of matrix */
162f1f2ae84SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
163dd0d27b1SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, bs));
164f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetSize(layout, N[0]));
165f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
166f1f2ae84SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &n));
167f1f2ae84SBarry Smith 
168f1f2ae84SBarry Smith   /* copy over the matrix nonzero structure and values */
169f1f2ae84SBarry Smith   if (pc) {
170f1f2ae84SBarry Smith     NZ      = km->NZ;
171f1f2ae84SBarry Smith     NZdispl = km->NZdispl;
172f1f2ae84SBarry Smith     PetscCall(PetscLayoutGetRanges(layout, &range));
173f1f2ae84SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
174f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
175f1f2ae84SBarry Smith       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
176f1f2ae84SBarry Smith       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
177f1f2ae84SBarry Smith     }
178f1f2ae84SBarry Smith     displi[0]  = 0;
179f1f2ae84SBarry Smith     NZdispl[0] = 0;
180f1f2ae84SBarry Smith     for (j = 1; j < size; j++) {
181f1f2ae84SBarry Smith       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
182f1f2ae84SBarry Smith       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
183f1f2ae84SBarry Smith     }
184f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
185f1f2ae84SBarry Smith   }
186f1f2ae84SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
187f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
188f1f2ae84SBarry Smith 
189f1f2ae84SBarry Smith   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
190f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
191f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
192f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
193f1f2ae84SBarry Smith 
194f1f2ae84SBarry Smith   if (pc) {
195f1f2ae84SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
196f1f2ae84SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
197f1f2ae84SBarry Smith   }
198f1f2ae84SBarry Smith 
199ad540459SPierre Jolivet   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
200f1f2ae84SBarry Smith   ia[0] = 0;
201f1f2ae84SBarry Smith   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
202dd0d27b1SBarry Smith   PetscCall(MatSetBlockSize(A, bs));
203f1f2ae84SBarry Smith   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
204f1f2ae84SBarry Smith 
205f1f2ae84SBarry Smith   PetscCall(PetscFree3(ia, ja, a));
206f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(ksp, A, A));
207f1f2ae84SBarry Smith   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
208f1f2ae84SBarry Smith   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
209f1f2ae84SBarry Smith     const PetscInt *range;
210f1f2ae84SBarry Smith 
211f1f2ae84SBarry Smith     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
212f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
213f1f2ae84SBarry Smith       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
214f1f2ae84SBarry Smith       km->displ[i]     = (PetscMPIInt)range[i];
215f1f2ae84SBarry Smith     }
216f1f2ae84SBarry Smith   }
217f1f2ae84SBarry Smith   PetscCall(MatDestroy(&A));
218f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(ksp));
219f1f2ae84SBarry Smith   PetscFunctionReturn(0);
220f1f2ae84SBarry Smith }
221f1f2ae84SBarry Smith 
222d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc)
223d71ae5a4SJacob Faibussowitsch {
224f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
225f1f2ae84SBarry Smith   KSP                ksp;
226f1f2ae84SBarry Smith   Mat                sA, A;
227f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
228f1f2ae84SBarry Smith   PetscScalar       *a;
229f1f2ae84SBarry Smith   PetscCount         nz;
230f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
231dad3da8eSBarry Smith   PetscMPIInt        size;
232f1f2ae84SBarry Smith 
233f1f2ae84SBarry Smith   PetscFunctionBegin;
234f1f2ae84SBarry Smith   if (pc) {
235f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
236f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
237f1f2ae84SBarry Smith   }
238f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
239f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
240f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
241dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
242dad3da8eSBarry Smith   PCMPIMatCounts[size - 1]++;
243f1f2ae84SBarry Smith   PetscCall(KSPGetOperators(ksp, NULL, &A));
244f1f2ae84SBarry Smith   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
245f1f2ae84SBarry Smith   PetscCall(PetscMalloc1(nz, &a));
246f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
247f1f2ae84SBarry Smith   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
248f1f2ae84SBarry Smith   PetscCall(MatUpdateMPIAIJWithArray(A, a));
249f1f2ae84SBarry Smith   PetscCall(PetscFree(a));
250f1f2ae84SBarry Smith   PetscFunctionReturn(0);
251f1f2ae84SBarry Smith }
252f1f2ae84SBarry Smith 
253d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
254d71ae5a4SJacob Faibussowitsch {
255f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
256f1f2ae84SBarry Smith   KSP                ksp;
257f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
258f1f2ae84SBarry Smith   const PetscScalar *sb   = NULL, *x;
259f1f2ae84SBarry Smith   PetscScalar       *b, *sx = NULL;
260*5316cbedSBarry Smith   PetscInt           its,n;
261*5316cbedSBarry Smith   PetscMPIInt        size;
262f1f2ae84SBarry Smith 
263f1f2ae84SBarry Smith   PetscFunctionBegin;
264f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
265f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
266f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
267f1f2ae84SBarry Smith 
268f1f2ae84SBarry Smith   /* TODO: optimize code to not require building counts/displ everytime */
269f1f2ae84SBarry Smith 
270f1f2ae84SBarry Smith   /* scatterv rhs */
271dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
272*5316cbedSBarry Smith   if (pc) {
273*5316cbedSBarry Smith     PetscInt N;
274*5316cbedSBarry Smith 
275dad3da8eSBarry Smith     PCMPISolveCounts[size - 1]++;
276*5316cbedSBarry Smith     PetscCall(MatGetSize(pc->pmat,&N,NULL));;
277*5316cbedSBarry Smith     PCMPISizes[size - 1] += N;
278f1f2ae84SBarry Smith     PetscCall(VecGetArrayRead(B, &sb));
279f1f2ae84SBarry Smith   }
280f1f2ae84SBarry Smith   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
281f1f2ae84SBarry Smith   PetscCall(VecGetArray(ksp->vec_rhs, &b));
282f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
283f1f2ae84SBarry Smith   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
284f1f2ae84SBarry Smith   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
285f1f2ae84SBarry Smith 
286f1f2ae84SBarry Smith   PetscCall(KSPSolve(ksp, NULL, NULL));
287*5316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(ksp,&its));
288*5316cbedSBarry Smith   PCMPIIterations[size - 1] += its;
289f1f2ae84SBarry Smith 
290f1f2ae84SBarry Smith   /* gather solution */
291f1f2ae84SBarry Smith   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
292f1f2ae84SBarry Smith   if (pc) PetscCall(VecGetArray(X, &sx));
293f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
294f1f2ae84SBarry Smith   if (pc) PetscCall(VecRestoreArray(X, &sx));
295f1f2ae84SBarry Smith   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
296f1f2ae84SBarry Smith   PetscFunctionReturn(0);
297f1f2ae84SBarry Smith }
298f1f2ae84SBarry Smith 
299d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc)
300d71ae5a4SJacob Faibussowitsch {
301f1f2ae84SBarry Smith   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
302f1f2ae84SBarry Smith   KSP      ksp;
303f1f2ae84SBarry Smith   MPI_Comm comm = PC_MPI_COMM_WORLD;
304f1f2ae84SBarry Smith 
305f1f2ae84SBarry Smith   PetscFunctionBegin;
306f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
307f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
308f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&ksp));
309f1f2ae84SBarry Smith   PetscFunctionReturn(0);
310f1f2ae84SBarry Smith }
311f1f2ae84SBarry Smith 
312f1f2ae84SBarry Smith /*@C
313f1f2ae84SBarry Smith      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
314f1580f4eSBarry Smith      parallel `KSP` solves and management of parallel `KSP` objects.
315f1f2ae84SBarry Smith 
316f1f2ae84SBarry Smith      Logically collective on all MPI ranks except 0
317f1f2ae84SBarry Smith 
318f1580f4eSBarry Smith    Options Database Keys:
319f1f2ae84SBarry Smith +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
320f1f2ae84SBarry Smith -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
321f1f2ae84SBarry Smith 
322f1580f4eSBarry Smith      Note:
323f1f2ae84SBarry Smith       This is normally started automatically in `PetscInitialize()` when the option is provided
324f1f2ae84SBarry Smith 
325f1f2ae84SBarry Smith      Developer Notes:
326f1580f4eSBarry Smith        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
327f1580f4eSBarry Smith        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
328f1580f4eSBarry Smith        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
329f1f2ae84SBarry Smith 
330f1580f4eSBarry Smith        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
331f1f2ae84SBarry Smith 
332f1f2ae84SBarry Smith      Level: developer
333f1f2ae84SBarry Smith 
334f1580f4eSBarry Smith .seealso: `PCMPIServerEnd()`, `PCMPI`
335f1f2ae84SBarry Smith @*/
336d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void)
337d71ae5a4SJacob Faibussowitsch {
338f1f2ae84SBarry Smith   PetscMPIInt rank;
339f1f2ae84SBarry Smith 
340f1f2ae84SBarry Smith   PetscFunctionBegin;
341f1f2ae84SBarry Smith   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
3425e1a0e3cSBarry Smith   if (PetscDefined(USE_SINGLE_LIBRARY)) {
3435e1a0e3cSBarry Smith     PetscCall(VecInitializePackage());
3445e1a0e3cSBarry Smith     PetscCall(MatInitializePackage());
3455e1a0e3cSBarry Smith     PetscCall(DMInitializePackage());
3465e1a0e3cSBarry Smith     PetscCall(PCInitializePackage());
3475e1a0e3cSBarry Smith     PetscCall(KSPInitializePackage());
3485e1a0e3cSBarry Smith     PetscCall(SNESInitializePackage());
3495e1a0e3cSBarry Smith     PetscCall(TSInitializePackage());
3505e1a0e3cSBarry Smith     PetscCall(TaoInitializePackage());
3515e1a0e3cSBarry Smith   }
3525e1a0e3cSBarry Smith 
353f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
354f1f2ae84SBarry Smith   if (rank == 0) {
355f1f2ae84SBarry Smith     PETSC_COMM_WORLD = PETSC_COMM_SELF;
356f1f2ae84SBarry Smith     PetscFunctionReturn(0);
357f1f2ae84SBarry Smith   }
358f1f2ae84SBarry Smith 
359f1f2ae84SBarry Smith   while (PETSC_TRUE) {
360f1f2ae84SBarry Smith     PCMPICommand request = PCMPI_CREATE;
361f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
362f1f2ae84SBarry Smith     switch (request) {
363d71ae5a4SJacob Faibussowitsch     case PCMPI_CREATE:
364d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPICreate(NULL));
365d71ae5a4SJacob Faibussowitsch       break;
366d71ae5a4SJacob Faibussowitsch     case PCMPI_SET_MAT:
367d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISetMat(NULL));
368d71ae5a4SJacob Faibussowitsch       break;
369d71ae5a4SJacob Faibussowitsch     case PCMPI_UPDATE_MAT_VALUES:
370d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIUpdateMatValues(NULL));
371d71ae5a4SJacob Faibussowitsch       break;
372f1f2ae84SBarry Smith     case PCMPI_VIEW:
373f1f2ae84SBarry Smith       // PetscCall(PCMPIView(NULL));
374f1f2ae84SBarry Smith       break;
375d71ae5a4SJacob Faibussowitsch     case PCMPI_SOLVE:
376d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISolve(NULL, NULL, NULL));
377d71ae5a4SJacob Faibussowitsch       break;
378d71ae5a4SJacob Faibussowitsch     case PCMPI_DESTROY:
379d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIDestroy(NULL));
380d71ae5a4SJacob Faibussowitsch       break;
381f1f2ae84SBarry Smith     case PCMPI_EXIT:
382f1f2ae84SBarry Smith       PetscCall(PetscFinalize());
383f1f2ae84SBarry Smith       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
384f1f2ae84SBarry Smith       break;
385d71ae5a4SJacob Faibussowitsch     default:
386d71ae5a4SJacob Faibussowitsch       break;
387f1f2ae84SBarry Smith     }
388f1f2ae84SBarry Smith   }
389f1f2ae84SBarry Smith   PetscFunctionReturn(0);
390f1f2ae84SBarry Smith }
391f1f2ae84SBarry Smith 
392f1f2ae84SBarry Smith /*@C
393f1f2ae84SBarry Smith      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
394f1580f4eSBarry Smith      parallel KSP solves and management of parallel `KSP` objects.
395f1f2ae84SBarry Smith 
396f1f2ae84SBarry Smith      Logically collective on all MPI ranks except 0
397f1f2ae84SBarry Smith 
398f1580f4eSBarry Smith      Note:
399f1f2ae84SBarry Smith       This is normally ended automatically in `PetscFinalize()` when the option is provided
400f1f2ae84SBarry Smith 
401f1f2ae84SBarry Smith      Level: developer
402f1f2ae84SBarry Smith 
403f1580f4eSBarry Smith .seealso: `PCMPIServerBegin()`, `PCMPI`
404f1f2ae84SBarry Smith @*/
405d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void)
406d71ae5a4SJacob Faibussowitsch {
407f1f2ae84SBarry Smith   PCMPICommand request = PCMPI_EXIT;
408f1f2ae84SBarry Smith 
409f1f2ae84SBarry Smith   PetscFunctionBegin;
410f1f2ae84SBarry Smith   if (PetscGlobalRank == 0) {
411f1f2ae84SBarry Smith     PetscViewer       viewer = NULL;
412f1f2ae84SBarry Smith     PetscViewerFormat format;
413f1f2ae84SBarry Smith 
414f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
415f1f2ae84SBarry Smith     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
416f1f2ae84SBarry Smith     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
417f1f2ae84SBarry Smith     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
418f1f2ae84SBarry Smith     PetscOptionsEnd();
419f1f2ae84SBarry Smith     if (viewer) {
420f1f2ae84SBarry Smith       PetscBool isascii;
421f1f2ae84SBarry Smith 
422f1f2ae84SBarry Smith       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
423f1f2ae84SBarry Smith       if (isascii) {
424f1f2ae84SBarry Smith         PetscMPIInt size;
425*5316cbedSBarry Smith         PetscMPIInt i;
426f1f2ae84SBarry Smith 
427f1f2ae84SBarry Smith         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
428*5316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
429*5316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
430*5316cbedSBarry Smith         if (PCMPIKSPCountsSeq) {
431*5316cbedSBarry Smith           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq,  PCMPIKSPCountsSeq,PCMPISizesSeq/PCMPISolveCountsSeq,PCMPIIterationsSeq/PCMPISolveCountsSeq));
432f1f2ae84SBarry Smith         }
433*5316cbedSBarry Smith         for (i = 0; i < size; i++) {
434*5316cbedSBarry Smith           if (PCMPIKSPCounts[i]) {
435*5316cbedSBarry Smith             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT  "            %" PetscInt_FMT  "            %" PetscInt_FMT  "\n", i+1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i]/PCMPISolveCounts[i],PCMPIIterations[i]/PCMPISolveCounts[i]));
436*5316cbedSBarry Smith           }
437*5316cbedSBarry Smith         }
438f1f2ae84SBarry Smith       }
439f1f2ae84SBarry Smith       PetscCall(PetscViewerDestroy(&viewer));
440f1f2ae84SBarry Smith     }
441f1f2ae84SBarry Smith   }
442f1f2ae84SBarry Smith   PetscCall(PCMPICommsDestroy());
443f1f2ae84SBarry Smith   PetscFunctionReturn(0);
444f1f2ae84SBarry Smith }
445f1f2ae84SBarry Smith 
446f1f2ae84SBarry Smith /*
447f1f2ae84SBarry Smith     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
448f1f2ae84SBarry Smith     because, for example, the problem is small. This version is more efficient because it does not require copying any data
449f1f2ae84SBarry Smith */
450d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc)
451d71ae5a4SJacob Faibussowitsch {
452f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
453f1f2ae84SBarry Smith   Mat         sA;
454f1f2ae84SBarry Smith   const char *prefix;
455f1f2ae84SBarry Smith 
456f1f2ae84SBarry Smith   PetscFunctionBegin;
457f1f2ae84SBarry Smith   PetscCall(PCGetOperators(pc, NULL, &sA));
458f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
459f1f2ae84SBarry Smith   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
460f1f2ae84SBarry Smith   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
461f1f2ae84SBarry Smith   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
462f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
463f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(km->ksps[0]));
464f1f2ae84SBarry Smith   PetscCall(KSPSetUp(km->ksps[0]));
465f1f2ae84SBarry Smith   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
466f1f2ae84SBarry Smith   PCMPIKSPCountsSeq++;
467f1f2ae84SBarry Smith   PetscFunctionReturn(0);
468f1f2ae84SBarry Smith }
469f1f2ae84SBarry Smith 
470d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
471d71ae5a4SJacob Faibussowitsch {
472f1f2ae84SBarry Smith   PC_MPI   *km = (PC_MPI *)pc->data;
473*5316cbedSBarry Smith   PetscInt its,n;
474*5316cbedSBarry Smith   Mat      A;
475f1f2ae84SBarry Smith 
476f1f2ae84SBarry Smith   PetscFunctionBegin;
477f1f2ae84SBarry Smith   PetscCall(KSPSolve(km->ksps[0], b, x));
478*5316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(km->ksps[0],&its));
479f1f2ae84SBarry Smith   PCMPISolveCountsSeq++;
480*5316cbedSBarry Smith   PCMPIIterationsSeq += its;
481*5316cbedSBarry Smith   PetscCall(KSPGetOperators(km->ksps[0],NULL,&A));
482*5316cbedSBarry Smith   PetscCall(MatGetSize(A,&n,NULL));
483*5316cbedSBarry Smith   PCMPISizesSeq += n;
484f1f2ae84SBarry Smith   PetscFunctionReturn(0);
485f1f2ae84SBarry Smith }
486f1f2ae84SBarry Smith 
487d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
488d71ae5a4SJacob Faibussowitsch {
489f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
490f1f2ae84SBarry Smith   const char *prefix;
491f1f2ae84SBarry Smith 
492f1f2ae84SBarry Smith   PetscFunctionBegin;
493f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
494f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
495f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
496f1f2ae84SBarry Smith   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
497f1f2ae84SBarry Smith   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
498*5316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
499f1f2ae84SBarry Smith   PetscFunctionReturn(0);
500f1f2ae84SBarry Smith }
501f1f2ae84SBarry Smith 
502d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc)
503d71ae5a4SJacob Faibussowitsch {
504f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
505f1f2ae84SBarry Smith 
506f1f2ae84SBarry Smith   PetscFunctionBegin;
507f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&km->ksps[0]));
508f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
509f1f2ae84SBarry Smith   PetscFunctionReturn(0);
510f1f2ae84SBarry Smith }
511f1f2ae84SBarry Smith 
512f1f2ae84SBarry Smith /*
513f1f2ae84SBarry Smith      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
514f1f2ae84SBarry Smith      right hand side to the parallel PC
515f1f2ae84SBarry Smith */
516d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc)
517d71ae5a4SJacob Faibussowitsch {
518f1f2ae84SBarry Smith   PC_MPI      *km = (PC_MPI *)pc->data;
519f1f2ae84SBarry Smith   PetscMPIInt  rank, size;
520f1f2ae84SBarry Smith   PCMPICommand request;
521f1f2ae84SBarry Smith   PetscBool    newmatrix = PETSC_FALSE;
522f1f2ae84SBarry Smith 
523f1f2ae84SBarry Smith   PetscFunctionBegin;
524f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
525f1f2ae84SBarry Smith   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
526f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
527f1f2ae84SBarry Smith 
528f1f2ae84SBarry Smith   if (!pc->setupcalled) {
529f1f2ae84SBarry Smith     if (!km->alwaysuseserver) {
530f1f2ae84SBarry Smith       PetscInt n;
531f1f2ae84SBarry Smith       Mat      sA;
532f1f2ae84SBarry Smith       /* short circuit for small systems */
533f1f2ae84SBarry Smith       PetscCall(PCGetOperators(pc, &sA, &sA));
534f1f2ae84SBarry Smith       PetscCall(MatGetSize(sA, &n, NULL));
535f1f2ae84SBarry Smith       if (n < 2 * km->mincntperrank - 1 || size == 1) {
536f1f2ae84SBarry Smith         pc->ops->setup   = NULL;
537f1f2ae84SBarry Smith         pc->ops->apply   = PCApply_Seq;
538f1f2ae84SBarry Smith         pc->ops->destroy = PCDestroy_Seq;
539f1f2ae84SBarry Smith         pc->ops->view    = PCView_Seq;
540f1f2ae84SBarry Smith         PetscCall(PCSetUp_Seq(pc));
541f1f2ae84SBarry Smith         PetscFunctionReturn(0);
542f1f2ae84SBarry Smith       }
543f1f2ae84SBarry Smith     }
544f1f2ae84SBarry Smith 
545f1f2ae84SBarry Smith     request = PCMPI_CREATE;
546f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
547f1f2ae84SBarry Smith     PetscCall(PCMPICreate(pc));
548f1f2ae84SBarry Smith     newmatrix = PETSC_TRUE;
5499371c9d4SSatish Balay   }
5509371c9d4SSatish Balay   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
551f1f2ae84SBarry Smith 
552f1f2ae84SBarry Smith   if (newmatrix) {
553f1f2ae84SBarry Smith     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
554f1f2ae84SBarry Smith     request = PCMPI_SET_MAT;
555f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
556f1f2ae84SBarry Smith     PetscCall(PCMPISetMat(pc));
557f1f2ae84SBarry Smith   } else {
558f1f2ae84SBarry Smith     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
559f1f2ae84SBarry Smith     request = PCMPI_UPDATE_MAT_VALUES;
560f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
561f1f2ae84SBarry Smith     PetscCall(PCMPIUpdateMatValues(pc));
562f1f2ae84SBarry Smith   }
563f1f2ae84SBarry Smith   PetscFunctionReturn(0);
564f1f2ae84SBarry Smith }
565f1f2ae84SBarry Smith 
566d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
567d71ae5a4SJacob Faibussowitsch {
568f1f2ae84SBarry Smith   PCMPICommand request = PCMPI_SOLVE;
569f1f2ae84SBarry Smith 
570f1f2ae84SBarry Smith   PetscFunctionBegin;
571f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
572f1f2ae84SBarry Smith   PetscCall(PCMPISolve(pc, b, x));
573f1f2ae84SBarry Smith   PetscFunctionReturn(0);
574f1f2ae84SBarry Smith }
575f1f2ae84SBarry Smith 
576d71ae5a4SJacob Faibussowitsch PetscErrorCode PCDestroy_MPI(PC pc)
577d71ae5a4SJacob Faibussowitsch {
578f1f2ae84SBarry Smith   PCMPICommand request = PCMPI_DESTROY;
579f1f2ae84SBarry Smith 
580f1f2ae84SBarry Smith   PetscFunctionBegin;
581f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
582f1f2ae84SBarry Smith   PetscCall(PCMPIDestroy(pc));
583f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
584f1f2ae84SBarry Smith   PetscFunctionReturn(0);
585f1f2ae84SBarry Smith }
586f1f2ae84SBarry Smith 
587f1f2ae84SBarry Smith /*
588f1f2ae84SBarry Smith      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
589f1f2ae84SBarry Smith */
590d71ae5a4SJacob Faibussowitsch PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
591d71ae5a4SJacob Faibussowitsch {
592f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
593f1f2ae84SBarry Smith   MPI_Comm    comm;
594f1f2ae84SBarry Smith   PetscMPIInt size;
595f1f2ae84SBarry Smith   const char *prefix;
596f1f2ae84SBarry Smith 
597f1f2ae84SBarry Smith   PetscFunctionBegin;
598f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
599f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
600f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
601f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
602f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
603f1f2ae84SBarry Smith   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
604f1f2ae84SBarry Smith   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
605*5316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));  PetscFunctionReturn(0);
606f1f2ae84SBarry Smith }
607f1f2ae84SBarry Smith 
608d71ae5a4SJacob Faibussowitsch PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
609d71ae5a4SJacob Faibussowitsch {
610f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
611f1f2ae84SBarry Smith 
612f1f2ae84SBarry Smith   PetscFunctionBegin;
613f1f2ae84SBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
614f1f2ae84SBarry Smith   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
615f1f2ae84SBarry Smith   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
616f1f2ae84SBarry Smith   PetscOptionsHeadEnd();
617f1f2ae84SBarry Smith   PetscFunctionReturn(0);
618f1f2ae84SBarry Smith }
619f1f2ae84SBarry Smith 
620f1f2ae84SBarry Smith /*MC
621f1580f4eSBarry Smith      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
622f1f2ae84SBarry Smith 
623f1f2ae84SBarry Smith    Level: beginner
624f1f2ae84SBarry Smith 
625f1580f4eSBarry Smith    Options Database Keys:
626f1f2ae84SBarry Smith +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
627f1f2ae84SBarry Smith .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
628f1f2ae84SBarry Smith .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
629f1f2ae84SBarry Smith -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
630f1f2ae84SBarry Smith 
631f1f2ae84SBarry Smith    Notes:
632f1580f4eSBarry Smith    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
633f1f2ae84SBarry Smith 
634f1f2ae84SBarry Smith    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
635f1f2ae84SBarry Smith    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
636f1f2ae84SBarry Smith 
637f1f2ae84SBarry Smith    It can be particularly useful for user OpenMP code or potentially user GPU code.
638f1f2ae84SBarry Smith 
639f1580f4eSBarry Smith    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
640f1580f4eSBarry Smith    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
641f1f2ae84SBarry Smith 
642f1f2ae84SBarry Smith .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
643f1f2ae84SBarry Smith M*/
644d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
645d71ae5a4SJacob Faibussowitsch {
646f1f2ae84SBarry Smith   PC_MPI *km;
647f1f2ae84SBarry Smith 
648f1f2ae84SBarry Smith   PetscFunctionBegin;
6494dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&km));
650f1f2ae84SBarry Smith   pc->data = (void *)km;
651f1f2ae84SBarry Smith 
652f1f2ae84SBarry Smith   km->mincntperrank = 10000;
653f1f2ae84SBarry Smith 
654f1f2ae84SBarry Smith   pc->ops->setup          = PCSetUp_MPI;
655f1f2ae84SBarry Smith   pc->ops->apply          = PCApply_MPI;
656f1f2ae84SBarry Smith   pc->ops->destroy        = PCDestroy_MPI;
657f1f2ae84SBarry Smith   pc->ops->view           = PCView_MPI;
658f1f2ae84SBarry Smith   pc->ops->setfromoptions = PCSetFromOptions_MPI;
659f1f2ae84SBarry Smith   PetscFunctionReturn(0);
660f1f2ae84SBarry Smith }
661