xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 0316ec648ec12c0ddd7ba3b11a3407bb7396c5f1)
1f1f2ae84SBarry Smith /*
2f1f2ae84SBarry Smith     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3f1f2ae84SBarry Smith     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4f1f2ae84SBarry Smith 
5f1f2ae84SBarry Smith     That program may use OpenMP to compute the right hand side and matrix for the linear system
6f1f2ae84SBarry Smith 
7f1f2ae84SBarry Smith     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8f1f2ae84SBarry Smith 
9f1f2ae84SBarry Smith     The resulting KSP and PC can only be controlled via the options database, though some common commands
10f1f2ae84SBarry Smith     could be passed through the server.
11f1f2ae84SBarry Smith 
12f1f2ae84SBarry Smith */
13f1f2ae84SBarry Smith #include <petsc/private/pcimpl.h>
14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h>
155e1a0e3cSBarry Smith #include <petsc.h>
16f1f2ae84SBarry Smith 
17f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS  256
18f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
19f1f2ae84SBarry Smith 
20f1f2ae84SBarry Smith typedef struct {
21f1f2ae84SBarry Smith   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22f1f2ae84SBarry Smith   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23f1f2ae84SBarry Smith   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24f1f2ae84SBarry Smith   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25f1f2ae84SBarry Smith   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26f1f2ae84SBarry Smith } PC_MPI;
27f1f2ae84SBarry Smith 
289371c9d4SSatish Balay typedef enum {
299371c9d4SSatish Balay   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30f1f2ae84SBarry Smith   PCMPI_CREATE,
31f1f2ae84SBarry Smith   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
32f1f2ae84SBarry Smith   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33f1f2ae84SBarry Smith   PCMPI_SOLVE,
34f1f2ae84SBarry Smith   PCMPI_VIEW,
35f1f2ae84SBarry Smith   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36f1f2ae84SBarry Smith } PCMPICommand;
37f1f2ae84SBarry Smith 
38f1f2ae84SBarry Smith static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
39f1f2ae84SBarry Smith static PetscBool PCMPICommSet = PETSC_FALSE;
40f1f2ae84SBarry Smith static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
415316cbedSBarry Smith static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS],PCMPISizes[PC_MPI_MAX_RANKS],PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
42f1f2ae84SBarry Smith 
43d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void)
44d71ae5a4SJacob Faibussowitsch {
45f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
46f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
47f1f2ae84SBarry Smith 
48f1f2ae84SBarry Smith   PetscFunctionBegin;
49f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
50f1f2ae84SBarry Smith   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
52f1f2ae84SBarry Smith   /* comm for size 1 is useful only for debugging */
53f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
54f1f2ae84SBarry Smith     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56f1f2ae84SBarry Smith     PCMPISolveCounts[i] = 0;
57f1f2ae84SBarry Smith     PCMPIKSPCounts[i]   = 0;
585316cbedSBarry Smith     PCMPIIterations[i]  = 0;
595316cbedSBarry Smith     PCMPISizes[i]       = 0;
60f1f2ae84SBarry Smith   }
61f1f2ae84SBarry Smith   PCMPICommSet = PETSC_TRUE;
62f1f2ae84SBarry Smith   PetscFunctionReturn(0);
63f1f2ae84SBarry Smith }
64f1f2ae84SBarry Smith 
65d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPICommsDestroy(void)
66d71ae5a4SJacob Faibussowitsch {
67f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
68f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
69f1f2ae84SBarry Smith 
70f1f2ae84SBarry Smith   PetscFunctionBegin;
71f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscFunctionReturn(0);
72f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
73f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
74f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
75f1f2ae84SBarry Smith     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76f1f2ae84SBarry Smith   }
77f1f2ae84SBarry Smith   PCMPICommSet = PETSC_FALSE;
78f1f2ae84SBarry Smith   PetscFunctionReturn(0);
79f1f2ae84SBarry Smith }
80f1f2ae84SBarry Smith 
81d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc)
82d71ae5a4SJacob Faibussowitsch {
83f1f2ae84SBarry Smith   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
84f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
85f1f2ae84SBarry Smith   KSP         ksp;
86f1f2ae84SBarry Smith   PetscInt    N[2], mincntperrank = 0;
87f1f2ae84SBarry Smith   PetscMPIInt size;
88f1f2ae84SBarry Smith   Mat         sA;
89f1f2ae84SBarry Smith   char       *prefix;
90f1f2ae84SBarry Smith   PetscMPIInt len = 0;
91f1f2ae84SBarry Smith 
92f1f2ae84SBarry Smith   PetscFunctionBegin;
93f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
95f1f2ae84SBarry Smith   if (pc) {
96f1f2ae84SBarry Smith     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
98f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
99f1f2ae84SBarry Smith   }
100f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
101f1f2ae84SBarry Smith 
102f1f2ae84SBarry Smith   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103f1f2ae84SBarry Smith   if (km) mincntperrank = km->mincntperrank;
104f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105f1f2ae84SBarry Smith   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106f1f2ae84SBarry Smith   if (comm == MPI_COMM_NULL) {
107f1f2ae84SBarry Smith     ksp = NULL;
108f1f2ae84SBarry Smith     PetscFunctionReturn(0);
109f1f2ae84SBarry Smith   }
110*0316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
111f1f2ae84SBarry Smith   PetscCall(KSPCreate(comm, &ksp));
112*0316ec64SBarry Smith   PetscCall(PetscLogStagePop());
113f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
114f1f2ae84SBarry Smith   if (pc) {
115f1f2ae84SBarry Smith     size_t slen;
116f1f2ae84SBarry Smith 
117f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
118dad3da8eSBarry Smith     PCMPIKSPCounts[size - 1]++;
119f1f2ae84SBarry Smith     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
120f1f2ae84SBarry Smith     PetscCall(PetscStrlen(prefix, &slen));
121f1f2ae84SBarry Smith     len = (PetscMPIInt)slen;
122f1f2ae84SBarry Smith   }
123f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
124f1f2ae84SBarry Smith   if (len) {
12548a46eb9SPierre Jolivet     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
126f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
127f1f2ae84SBarry Smith     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
128f1f2ae84SBarry Smith   }
129f1f2ae84SBarry Smith   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
130f1f2ae84SBarry Smith   PetscFunctionReturn(0);
131f1f2ae84SBarry Smith }
132f1f2ae84SBarry Smith 
133d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc)
134d71ae5a4SJacob Faibussowitsch {
135f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
136f1f2ae84SBarry Smith   Mat                A;
137dd0d27b1SBarry Smith   PetscInt           N[2], n, *ia, *ja, j, bs;
138f1f2ae84SBarry Smith   Mat                sA;
139f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
140f1f2ae84SBarry Smith   KSP                ksp;
141f1f2ae84SBarry Smith   PetscLayout        layout;
142f1f2ae84SBarry Smith   const PetscInt    *IA = NULL, *JA = NULL;
143f1f2ae84SBarry Smith   const PetscInt    *range;
144f1f2ae84SBarry Smith   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
145f1f2ae84SBarry Smith   PetscScalar       *a;
146f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
147f1f2ae84SBarry Smith 
148f1f2ae84SBarry Smith   PetscFunctionBegin;
149f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
150f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
151f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
152f1f2ae84SBarry Smith   if (pc) {
153f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
154dad3da8eSBarry Smith     PCMPIMatCounts[size - 1]++;
155f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
156f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
157dd0d27b1SBarry Smith     PetscCall(MatGetBlockSize(sA, &bs));
158dd0d27b1SBarry Smith     /* need to broadcast symmetry flags etc if set */
159f1f2ae84SBarry Smith   }
160f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
161dd0d27b1SBarry Smith   PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm));
162f1f2ae84SBarry Smith 
163f1f2ae84SBarry Smith   /* determine ownership ranges of matrix */
164f1f2ae84SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
165dd0d27b1SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, bs));
166f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetSize(layout, N[0]));
167f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
168f1f2ae84SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &n));
169f1f2ae84SBarry Smith 
170f1f2ae84SBarry Smith   /* copy over the matrix nonzero structure and values */
171f1f2ae84SBarry Smith   if (pc) {
172f1f2ae84SBarry Smith     NZ      = km->NZ;
173f1f2ae84SBarry Smith     NZdispl = km->NZdispl;
174f1f2ae84SBarry Smith     PetscCall(PetscLayoutGetRanges(layout, &range));
175f1f2ae84SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
176f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
177f1f2ae84SBarry Smith       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
178f1f2ae84SBarry Smith       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
179f1f2ae84SBarry Smith     }
180f1f2ae84SBarry Smith     displi[0]  = 0;
181f1f2ae84SBarry Smith     NZdispl[0] = 0;
182f1f2ae84SBarry Smith     for (j = 1; j < size; j++) {
183f1f2ae84SBarry Smith       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
184f1f2ae84SBarry Smith       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
185f1f2ae84SBarry Smith     }
186f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
187f1f2ae84SBarry Smith   }
188f1f2ae84SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
189f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
190f1f2ae84SBarry Smith 
191f1f2ae84SBarry Smith   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
192f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
193f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
194f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
195f1f2ae84SBarry Smith 
196f1f2ae84SBarry Smith   if (pc) {
197f1f2ae84SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
198f1f2ae84SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
199f1f2ae84SBarry Smith   }
200f1f2ae84SBarry Smith 
201ad540459SPierre Jolivet   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
202f1f2ae84SBarry Smith   ia[0] = 0;
203*0316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
204f1f2ae84SBarry Smith   PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A));
205dd0d27b1SBarry Smith   PetscCall(MatSetBlockSize(A, bs));
206f1f2ae84SBarry Smith   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
207f1f2ae84SBarry Smith 
208f1f2ae84SBarry Smith   PetscCall(PetscFree3(ia, ja, a));
209f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(ksp, A, A));
210f1f2ae84SBarry Smith   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
211*0316ec64SBarry Smith   PetscCall(PetscLogStagePop());
212f1f2ae84SBarry Smith   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
213f1f2ae84SBarry Smith     const PetscInt *range;
214f1f2ae84SBarry Smith 
215f1f2ae84SBarry Smith     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
216f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
217f1f2ae84SBarry Smith       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
218f1f2ae84SBarry Smith       km->displ[i]     = (PetscMPIInt)range[i];
219f1f2ae84SBarry Smith     }
220f1f2ae84SBarry Smith   }
221f1f2ae84SBarry Smith   PetscCall(MatDestroy(&A));
222f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(ksp));
223f1f2ae84SBarry Smith   PetscFunctionReturn(0);
224f1f2ae84SBarry Smith }
225f1f2ae84SBarry Smith 
226d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc)
227d71ae5a4SJacob Faibussowitsch {
228f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
229f1f2ae84SBarry Smith   KSP                ksp;
230f1f2ae84SBarry Smith   Mat                sA, A;
231f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
232f1f2ae84SBarry Smith   PetscScalar       *a;
233f1f2ae84SBarry Smith   PetscCount         nz;
234f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
235dad3da8eSBarry Smith   PetscMPIInt        size;
236f1f2ae84SBarry Smith 
237f1f2ae84SBarry Smith   PetscFunctionBegin;
238f1f2ae84SBarry Smith   if (pc) {
239f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
240f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241f1f2ae84SBarry Smith   }
242f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
243f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
244f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
245dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
246dad3da8eSBarry Smith   PCMPIMatCounts[size - 1]++;
247f1f2ae84SBarry Smith   PetscCall(KSPGetOperators(ksp, NULL, &A));
248f1f2ae84SBarry Smith   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
249f1f2ae84SBarry Smith   PetscCall(PetscMalloc1(nz, &a));
250f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
251f1f2ae84SBarry Smith   if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
252f1f2ae84SBarry Smith   PetscCall(MatUpdateMPIAIJWithArray(A, a));
253f1f2ae84SBarry Smith   PetscCall(PetscFree(a));
254f1f2ae84SBarry Smith   PetscFunctionReturn(0);
255f1f2ae84SBarry Smith }
256f1f2ae84SBarry Smith 
257d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
258d71ae5a4SJacob Faibussowitsch {
259f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
260f1f2ae84SBarry Smith   KSP                ksp;
261f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
262f1f2ae84SBarry Smith   const PetscScalar *sb   = NULL, *x;
263f1f2ae84SBarry Smith   PetscScalar       *b, *sx = NULL;
2645316cbedSBarry Smith   PetscInt           its,n;
2655316cbedSBarry Smith   PetscMPIInt        size;
266f1f2ae84SBarry Smith 
267f1f2ae84SBarry Smith   PetscFunctionBegin;
268f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
269f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
270f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
271f1f2ae84SBarry Smith 
272f1f2ae84SBarry Smith   /* TODO: optimize code to not require building counts/displ everytime */
273f1f2ae84SBarry Smith 
274f1f2ae84SBarry Smith   /* scatterv rhs */
275dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
2765316cbedSBarry Smith   if (pc) {
2775316cbedSBarry Smith     PetscInt N;
2785316cbedSBarry Smith 
279dad3da8eSBarry Smith     PCMPISolveCounts[size - 1]++;
2805316cbedSBarry Smith     PetscCall(MatGetSize(pc->pmat,&N,NULL));;
2815316cbedSBarry Smith     PCMPISizes[size - 1] += N;
282f1f2ae84SBarry Smith     PetscCall(VecGetArrayRead(B, &sb));
283f1f2ae84SBarry Smith   }
284f1f2ae84SBarry Smith   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
285f1f2ae84SBarry Smith   PetscCall(VecGetArray(ksp->vec_rhs, &b));
286f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
287f1f2ae84SBarry Smith   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
288f1f2ae84SBarry Smith   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
289f1f2ae84SBarry Smith 
290*0316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
291f1f2ae84SBarry Smith   PetscCall(KSPSolve(ksp, NULL, NULL));
292*0316ec64SBarry Smith   PetscCall(PetscLogStagePop());
2935316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(ksp,&its));
2945316cbedSBarry Smith   PCMPIIterations[size - 1] += its;
295f1f2ae84SBarry Smith 
296f1f2ae84SBarry Smith   /* gather solution */
297f1f2ae84SBarry Smith   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
298f1f2ae84SBarry Smith   if (pc) PetscCall(VecGetArray(X, &sx));
299f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
300f1f2ae84SBarry Smith   if (pc) PetscCall(VecRestoreArray(X, &sx));
301f1f2ae84SBarry Smith   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
302f1f2ae84SBarry Smith   PetscFunctionReturn(0);
303f1f2ae84SBarry Smith }
304f1f2ae84SBarry Smith 
305d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc)
306d71ae5a4SJacob Faibussowitsch {
307f1f2ae84SBarry Smith   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
308f1f2ae84SBarry Smith   KSP      ksp;
309f1f2ae84SBarry Smith   MPI_Comm comm = PC_MPI_COMM_WORLD;
310f1f2ae84SBarry Smith 
311f1f2ae84SBarry Smith   PetscFunctionBegin;
312f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
313f1f2ae84SBarry Smith   if (!ksp) PetscFunctionReturn(0);
314f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&ksp));
315f1f2ae84SBarry Smith   PetscFunctionReturn(0);
316f1f2ae84SBarry Smith }
317f1f2ae84SBarry Smith 
318f1f2ae84SBarry Smith /*@C
319f1f2ae84SBarry Smith      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
320f1580f4eSBarry Smith      parallel `KSP` solves and management of parallel `KSP` objects.
321f1f2ae84SBarry Smith 
322f1f2ae84SBarry Smith      Logically collective on all MPI ranks except 0
323f1f2ae84SBarry Smith 
324f1580f4eSBarry Smith    Options Database Keys:
325f1f2ae84SBarry Smith +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
326f1f2ae84SBarry Smith -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
327f1f2ae84SBarry Smith 
328f1580f4eSBarry Smith      Note:
329f1f2ae84SBarry Smith       This is normally started automatically in `PetscInitialize()` when the option is provided
330f1f2ae84SBarry Smith 
331f1f2ae84SBarry Smith      Developer Notes:
332f1580f4eSBarry Smith        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
333f1580f4eSBarry Smith        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
334f1580f4eSBarry Smith        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
335f1f2ae84SBarry Smith 
336f1580f4eSBarry Smith        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
337f1f2ae84SBarry Smith 
338f1f2ae84SBarry Smith      Level: developer
339f1f2ae84SBarry Smith 
340f1580f4eSBarry Smith .seealso: `PCMPIServerEnd()`, `PCMPI`
341f1f2ae84SBarry Smith @*/
342d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void)
343d71ae5a4SJacob Faibussowitsch {
344f1f2ae84SBarry Smith   PetscMPIInt rank;
345f1f2ae84SBarry Smith 
346f1f2ae84SBarry Smith   PetscFunctionBegin;
347f1f2ae84SBarry Smith   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
3485e1a0e3cSBarry Smith   if (PetscDefined(USE_SINGLE_LIBRARY)) {
3495e1a0e3cSBarry Smith     PetscCall(VecInitializePackage());
3505e1a0e3cSBarry Smith     PetscCall(MatInitializePackage());
3515e1a0e3cSBarry Smith     PetscCall(DMInitializePackage());
3525e1a0e3cSBarry Smith     PetscCall(PCInitializePackage());
3535e1a0e3cSBarry Smith     PetscCall(KSPInitializePackage());
3545e1a0e3cSBarry Smith     PetscCall(SNESInitializePackage());
3555e1a0e3cSBarry Smith     PetscCall(TSInitializePackage());
3565e1a0e3cSBarry Smith     PetscCall(TaoInitializePackage());
3575e1a0e3cSBarry Smith   }
3585e1a0e3cSBarry Smith 
359f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
360f1f2ae84SBarry Smith   if (rank == 0) {
361f1f2ae84SBarry Smith     PETSC_COMM_WORLD = PETSC_COMM_SELF;
362f1f2ae84SBarry Smith     PetscFunctionReturn(0);
363f1f2ae84SBarry Smith   }
364f1f2ae84SBarry Smith 
365f1f2ae84SBarry Smith   while (PETSC_TRUE) {
366f1f2ae84SBarry Smith     PCMPICommand request = PCMPI_CREATE;
367f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
368f1f2ae84SBarry Smith     switch (request) {
369d71ae5a4SJacob Faibussowitsch     case PCMPI_CREATE:
370d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPICreate(NULL));
371d71ae5a4SJacob Faibussowitsch       break;
372d71ae5a4SJacob Faibussowitsch     case PCMPI_SET_MAT:
373d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISetMat(NULL));
374d71ae5a4SJacob Faibussowitsch       break;
375d71ae5a4SJacob Faibussowitsch     case PCMPI_UPDATE_MAT_VALUES:
376d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIUpdateMatValues(NULL));
377d71ae5a4SJacob Faibussowitsch       break;
378f1f2ae84SBarry Smith     case PCMPI_VIEW:
379f1f2ae84SBarry Smith       // PetscCall(PCMPIView(NULL));
380f1f2ae84SBarry Smith       break;
381d71ae5a4SJacob Faibussowitsch     case PCMPI_SOLVE:
382d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISolve(NULL, NULL, NULL));
383d71ae5a4SJacob Faibussowitsch       break;
384d71ae5a4SJacob Faibussowitsch     case PCMPI_DESTROY:
385d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIDestroy(NULL));
386d71ae5a4SJacob Faibussowitsch       break;
387f1f2ae84SBarry Smith     case PCMPI_EXIT:
388f1f2ae84SBarry Smith       PetscCall(PetscFinalize());
389f1f2ae84SBarry Smith       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
390f1f2ae84SBarry Smith       break;
391d71ae5a4SJacob Faibussowitsch     default:
392d71ae5a4SJacob Faibussowitsch       break;
393f1f2ae84SBarry Smith     }
394f1f2ae84SBarry Smith   }
395f1f2ae84SBarry Smith   PetscFunctionReturn(0);
396f1f2ae84SBarry Smith }
397f1f2ae84SBarry Smith 
398f1f2ae84SBarry Smith /*@C
399f1f2ae84SBarry Smith      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
400f1580f4eSBarry Smith      parallel KSP solves and management of parallel `KSP` objects.
401f1f2ae84SBarry Smith 
402f1f2ae84SBarry Smith      Logically collective on all MPI ranks except 0
403f1f2ae84SBarry Smith 
404f1580f4eSBarry Smith      Note:
405f1f2ae84SBarry Smith       This is normally ended automatically in `PetscFinalize()` when the option is provided
406f1f2ae84SBarry Smith 
407f1f2ae84SBarry Smith      Level: developer
408f1f2ae84SBarry Smith 
409f1580f4eSBarry Smith .seealso: `PCMPIServerBegin()`, `PCMPI`
410f1f2ae84SBarry Smith @*/
411d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void)
412d71ae5a4SJacob Faibussowitsch {
413f1f2ae84SBarry Smith   PCMPICommand request = PCMPI_EXIT;
414f1f2ae84SBarry Smith 
415f1f2ae84SBarry Smith   PetscFunctionBegin;
416f1f2ae84SBarry Smith   if (PetscGlobalRank == 0) {
417f1f2ae84SBarry Smith     PetscViewer       viewer = NULL;
418f1f2ae84SBarry Smith     PetscViewerFormat format;
419f1f2ae84SBarry Smith 
420f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
421f1f2ae84SBarry Smith     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
422f1f2ae84SBarry Smith     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
423f1f2ae84SBarry Smith     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
424f1f2ae84SBarry Smith     PetscOptionsEnd();
425f1f2ae84SBarry Smith     if (viewer) {
426f1f2ae84SBarry Smith       PetscBool isascii;
427f1f2ae84SBarry Smith 
428f1f2ae84SBarry Smith       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
429f1f2ae84SBarry Smith       if (isascii) {
430f1f2ae84SBarry Smith         PetscMPIInt size;
4315316cbedSBarry Smith         PetscMPIInt i;
432f1f2ae84SBarry Smith 
433f1f2ae84SBarry Smith         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
4345316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
4355316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
4365316cbedSBarry Smith         if (PCMPIKSPCountsSeq) {
4375316cbedSBarry Smith           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq,  PCMPIKSPCountsSeq,PCMPISizesSeq/PCMPISolveCountsSeq,PCMPIIterationsSeq/PCMPISolveCountsSeq));
438f1f2ae84SBarry Smith         }
4395316cbedSBarry Smith         for (i = 0; i < size; i++) {
4405316cbedSBarry Smith           if (PCMPIKSPCounts[i]) {
4415316cbedSBarry Smith             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT  "            %" PetscInt_FMT  "            %" PetscInt_FMT  "\n", i+1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i]/PCMPISolveCounts[i],PCMPIIterations[i]/PCMPISolveCounts[i]));
4425316cbedSBarry Smith           }
4435316cbedSBarry Smith         }
444f1f2ae84SBarry Smith       }
445f1f2ae84SBarry Smith       PetscCall(PetscViewerDestroy(&viewer));
446f1f2ae84SBarry Smith     }
447f1f2ae84SBarry Smith   }
448f1f2ae84SBarry Smith   PetscCall(PCMPICommsDestroy());
449f1f2ae84SBarry Smith   PetscFunctionReturn(0);
450f1f2ae84SBarry Smith }
451f1f2ae84SBarry Smith 
452f1f2ae84SBarry Smith /*
453f1f2ae84SBarry Smith     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
454f1f2ae84SBarry Smith     because, for example, the problem is small. This version is more efficient because it does not require copying any data
455f1f2ae84SBarry Smith */
456d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc)
457d71ae5a4SJacob Faibussowitsch {
458f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
459f1f2ae84SBarry Smith   Mat         sA;
460f1f2ae84SBarry Smith   const char *prefix;
461f1f2ae84SBarry Smith 
462f1f2ae84SBarry Smith   PetscFunctionBegin;
463f1f2ae84SBarry Smith   PetscCall(PCGetOperators(pc, NULL, &sA));
464f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
465f1f2ae84SBarry Smith   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
466f1f2ae84SBarry Smith   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
467f1f2ae84SBarry Smith   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
468f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
469f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(km->ksps[0]));
470f1f2ae84SBarry Smith   PetscCall(KSPSetUp(km->ksps[0]));
471f1f2ae84SBarry Smith   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
472f1f2ae84SBarry Smith   PCMPIKSPCountsSeq++;
473f1f2ae84SBarry Smith   PetscFunctionReturn(0);
474f1f2ae84SBarry Smith }
475f1f2ae84SBarry Smith 
476d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
477d71ae5a4SJacob Faibussowitsch {
478f1f2ae84SBarry Smith   PC_MPI   *km = (PC_MPI *)pc->data;
4795316cbedSBarry Smith   PetscInt its,n;
4805316cbedSBarry Smith   Mat      A;
481f1f2ae84SBarry Smith 
482f1f2ae84SBarry Smith   PetscFunctionBegin;
483f1f2ae84SBarry Smith   PetscCall(KSPSolve(km->ksps[0], b, x));
4845316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(km->ksps[0],&its));
485f1f2ae84SBarry Smith   PCMPISolveCountsSeq++;
4865316cbedSBarry Smith   PCMPIIterationsSeq += its;
4875316cbedSBarry Smith   PetscCall(KSPGetOperators(km->ksps[0],NULL,&A));
4885316cbedSBarry Smith   PetscCall(MatGetSize(A,&n,NULL));
4895316cbedSBarry Smith   PCMPISizesSeq += n;
490f1f2ae84SBarry Smith   PetscFunctionReturn(0);
491f1f2ae84SBarry Smith }
492f1f2ae84SBarry Smith 
493d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
494d71ae5a4SJacob Faibussowitsch {
495f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
496f1f2ae84SBarry Smith   const char *prefix;
497f1f2ae84SBarry Smith 
498f1f2ae84SBarry Smith   PetscFunctionBegin;
499f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
500f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
501f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
502f1f2ae84SBarry Smith   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
503f1f2ae84SBarry Smith   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
5045316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
505f1f2ae84SBarry Smith   PetscFunctionReturn(0);
506f1f2ae84SBarry Smith }
507f1f2ae84SBarry Smith 
508d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc)
509d71ae5a4SJacob Faibussowitsch {
510f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
511f1f2ae84SBarry Smith 
512f1f2ae84SBarry Smith   PetscFunctionBegin;
513f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&km->ksps[0]));
514f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
515f1f2ae84SBarry Smith   PetscFunctionReturn(0);
516f1f2ae84SBarry Smith }
517f1f2ae84SBarry Smith 
518f1f2ae84SBarry Smith /*
519f1f2ae84SBarry Smith      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
520f1f2ae84SBarry Smith      right hand side to the parallel PC
521f1f2ae84SBarry Smith */
522d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc)
523d71ae5a4SJacob Faibussowitsch {
524f1f2ae84SBarry Smith   PC_MPI      *km = (PC_MPI *)pc->data;
525f1f2ae84SBarry Smith   PetscMPIInt  rank, size;
526f1f2ae84SBarry Smith   PCMPICommand request;
527f1f2ae84SBarry Smith   PetscBool    newmatrix = PETSC_FALSE;
528f1f2ae84SBarry Smith 
529f1f2ae84SBarry Smith   PetscFunctionBegin;
530f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
531f1f2ae84SBarry Smith   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
532f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
533f1f2ae84SBarry Smith 
534f1f2ae84SBarry Smith   if (!pc->setupcalled) {
535f1f2ae84SBarry Smith     if (!km->alwaysuseserver) {
536f1f2ae84SBarry Smith       PetscInt n;
537f1f2ae84SBarry Smith       Mat      sA;
538f1f2ae84SBarry Smith       /* short circuit for small systems */
539f1f2ae84SBarry Smith       PetscCall(PCGetOperators(pc, &sA, &sA));
540f1f2ae84SBarry Smith       PetscCall(MatGetSize(sA, &n, NULL));
541f1f2ae84SBarry Smith       if (n < 2 * km->mincntperrank - 1 || size == 1) {
542f1f2ae84SBarry Smith         pc->ops->setup   = NULL;
543f1f2ae84SBarry Smith         pc->ops->apply   = PCApply_Seq;
544f1f2ae84SBarry Smith         pc->ops->destroy = PCDestroy_Seq;
545f1f2ae84SBarry Smith         pc->ops->view    = PCView_Seq;
546f1f2ae84SBarry Smith         PetscCall(PCSetUp_Seq(pc));
547f1f2ae84SBarry Smith         PetscFunctionReturn(0);
548f1f2ae84SBarry Smith       }
549f1f2ae84SBarry Smith     }
550f1f2ae84SBarry Smith 
551f1f2ae84SBarry Smith     request = PCMPI_CREATE;
552f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
553f1f2ae84SBarry Smith     PetscCall(PCMPICreate(pc));
554f1f2ae84SBarry Smith     newmatrix = PETSC_TRUE;
5559371c9d4SSatish Balay   }
5569371c9d4SSatish Balay   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
557f1f2ae84SBarry Smith 
558f1f2ae84SBarry Smith   if (newmatrix) {
559f1f2ae84SBarry Smith     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
560f1f2ae84SBarry Smith     request = PCMPI_SET_MAT;
561f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
562f1f2ae84SBarry Smith     PetscCall(PCMPISetMat(pc));
563f1f2ae84SBarry Smith   } else {
564f1f2ae84SBarry Smith     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
565f1f2ae84SBarry Smith     request = PCMPI_UPDATE_MAT_VALUES;
566f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
567f1f2ae84SBarry Smith     PetscCall(PCMPIUpdateMatValues(pc));
568f1f2ae84SBarry Smith   }
569f1f2ae84SBarry Smith   PetscFunctionReturn(0);
570f1f2ae84SBarry Smith }
571f1f2ae84SBarry Smith 
572d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
573d71ae5a4SJacob Faibussowitsch {
574f1f2ae84SBarry Smith   PCMPICommand request = PCMPI_SOLVE;
575f1f2ae84SBarry Smith 
576f1f2ae84SBarry Smith   PetscFunctionBegin;
577f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
578f1f2ae84SBarry Smith   PetscCall(PCMPISolve(pc, b, x));
579f1f2ae84SBarry Smith   PetscFunctionReturn(0);
580f1f2ae84SBarry Smith }
581f1f2ae84SBarry Smith 
582d71ae5a4SJacob Faibussowitsch PetscErrorCode PCDestroy_MPI(PC pc)
583d71ae5a4SJacob Faibussowitsch {
584f1f2ae84SBarry Smith   PCMPICommand request = PCMPI_DESTROY;
585f1f2ae84SBarry Smith 
586f1f2ae84SBarry Smith   PetscFunctionBegin;
587f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
588f1f2ae84SBarry Smith   PetscCall(PCMPIDestroy(pc));
589f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
590f1f2ae84SBarry Smith   PetscFunctionReturn(0);
591f1f2ae84SBarry Smith }
592f1f2ae84SBarry Smith 
593f1f2ae84SBarry Smith /*
594f1f2ae84SBarry Smith      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
595f1f2ae84SBarry Smith */
596d71ae5a4SJacob Faibussowitsch PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
597d71ae5a4SJacob Faibussowitsch {
598f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
599f1f2ae84SBarry Smith   MPI_Comm    comm;
600f1f2ae84SBarry Smith   PetscMPIInt size;
601f1f2ae84SBarry Smith   const char *prefix;
602f1f2ae84SBarry Smith 
603f1f2ae84SBarry Smith   PetscFunctionBegin;
604f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
605f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
606f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
607f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
608f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
609f1f2ae84SBarry Smith   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
610f1f2ae84SBarry Smith   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
6115316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));  PetscFunctionReturn(0);
612f1f2ae84SBarry Smith }
613f1f2ae84SBarry Smith 
614d71ae5a4SJacob Faibussowitsch PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
615d71ae5a4SJacob Faibussowitsch {
616f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
617f1f2ae84SBarry Smith 
618f1f2ae84SBarry Smith   PetscFunctionBegin;
619f1f2ae84SBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
620f1f2ae84SBarry Smith   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
621f1f2ae84SBarry Smith   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
622f1f2ae84SBarry Smith   PetscOptionsHeadEnd();
623f1f2ae84SBarry Smith   PetscFunctionReturn(0);
624f1f2ae84SBarry Smith }
625f1f2ae84SBarry Smith 
626f1f2ae84SBarry Smith /*MC
627f1580f4eSBarry Smith      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
628f1f2ae84SBarry Smith 
629f1f2ae84SBarry Smith    Level: beginner
630f1f2ae84SBarry Smith 
631f1580f4eSBarry Smith    Options Database Keys:
632f1f2ae84SBarry Smith +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
633f1f2ae84SBarry Smith .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
634f1f2ae84SBarry Smith .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
635f1f2ae84SBarry Smith -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
636f1f2ae84SBarry Smith 
637f1f2ae84SBarry Smith    Notes:
638f1580f4eSBarry Smith    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
639f1f2ae84SBarry Smith 
640f1f2ae84SBarry Smith    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
641f1f2ae84SBarry Smith    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
642f1f2ae84SBarry Smith 
643f1f2ae84SBarry Smith    It can be particularly useful for user OpenMP code or potentially user GPU code.
644f1f2ae84SBarry Smith 
645f1580f4eSBarry Smith    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
646f1580f4eSBarry Smith    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
647f1f2ae84SBarry Smith 
648*0316ec64SBarry Smith    The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
649*0316ec64SBarry Smith    because they are not the actual solver objects.
650*0316ec64SBarry Smith 
651*0316ec64SBarry Smith    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
652*0316ec64SBarry Smith    stages will be confusing since the event times are are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
653*0316ec64SBarry Smith 
654f1f2ae84SBarry Smith .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
655f1f2ae84SBarry Smith M*/
656d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
657d71ae5a4SJacob Faibussowitsch {
658f1f2ae84SBarry Smith   PC_MPI *km;
659f1f2ae84SBarry Smith 
660f1f2ae84SBarry Smith   PetscFunctionBegin;
6614dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&km));
662f1f2ae84SBarry Smith   pc->data = (void *)km;
663f1f2ae84SBarry Smith 
664f1f2ae84SBarry Smith   km->mincntperrank = 10000;
665f1f2ae84SBarry Smith 
666f1f2ae84SBarry Smith   pc->ops->setup          = PCSetUp_MPI;
667f1f2ae84SBarry Smith   pc->ops->apply          = PCApply_MPI;
668f1f2ae84SBarry Smith   pc->ops->destroy        = PCDestroy_MPI;
669f1f2ae84SBarry Smith   pc->ops->view           = PCView_MPI;
670f1f2ae84SBarry Smith   pc->ops->setfromoptions = PCSetFromOptions_MPI;
671f1f2ae84SBarry Smith   PetscFunctionReturn(0);
672f1f2ae84SBarry Smith }
673