xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 9f0612e409f6220a780be6348417bea34ef34962)
1f1f2ae84SBarry Smith /*
2f1f2ae84SBarry Smith     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3f1f2ae84SBarry Smith     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4f1f2ae84SBarry Smith 
5dd8e379bSPierre Jolivet     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6f1f2ae84SBarry Smith 
7f1f2ae84SBarry Smith     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8f1f2ae84SBarry Smith 
9f1f2ae84SBarry Smith     The resulting KSP and PC can only be controlled via the options database, though some common commands
10f1f2ae84SBarry Smith     could be passed through the server.
11f1f2ae84SBarry Smith 
12f1f2ae84SBarry Smith */
13*9f0612e4SBarry Smith #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h>
15789afff4SPierre Jolivet #include <petscts.h>
16789afff4SPierre Jolivet #include <petsctao.h>
17*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
18*9f0612e4SBarry Smith   #include <pthread.h>
19*9f0612e4SBarry Smith #endif
20f1f2ae84SBarry Smith 
21f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS  256
22f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23f1f2ae84SBarry Smith 
24f1f2ae84SBarry Smith typedef struct {
25*9f0612e4SBarry Smith   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26f1f2ae84SBarry Smith   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27f1f2ae84SBarry Smith   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
28*9f0612e4SBarry Smith   PetscInt    mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
29*9f0612e4SBarry Smith   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30f1f2ae84SBarry Smith } PC_MPI;
31f1f2ae84SBarry Smith 
329371c9d4SSatish Balay typedef enum {
339371c9d4SSatish Balay   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34f1f2ae84SBarry Smith   PCMPI_CREATE,
35f1f2ae84SBarry Smith   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
36f1f2ae84SBarry Smith   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37f1f2ae84SBarry Smith   PCMPI_SOLVE,
38f1f2ae84SBarry Smith   PCMPI_VIEW,
39*9f0612e4SBarry Smith   PCMPI_DESTROY /* destroy a PC that is no longer needed */
40f1f2ae84SBarry Smith } PCMPICommand;
41f1f2ae84SBarry Smith 
42f1f2ae84SBarry Smith static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
43f1f2ae84SBarry Smith static PetscBool     PCMPICommSet = PETSC_FALSE;
44f1f2ae84SBarry Smith static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
455316cbedSBarry Smith static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
46*9f0612e4SBarry Smith static PetscLogEvent EventServerDist, EventServerDistMPI;
47*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
48*9f0612e4SBarry Smith static pthread_mutex_t *PCMPIServerLocks;
49*9f0612e4SBarry Smith #else
50*9f0612e4SBarry Smith static void *PCMPIServerLocks;
51*9f0612e4SBarry Smith #endif
52f1f2ae84SBarry Smith 
53d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void)
54d71ae5a4SJacob Faibussowitsch {
55f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
56f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
57f1f2ae84SBarry Smith 
58f1f2ae84SBarry Smith   PetscFunctionBegin;
59f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
60f1f2ae84SBarry Smith   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62f1f2ae84SBarry Smith   /* comm for size 1 is useful only for debugging */
63f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
64f1f2ae84SBarry Smith     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66f1f2ae84SBarry Smith     PCMPISolveCounts[i] = 0;
67f1f2ae84SBarry Smith     PCMPIKSPCounts[i]   = 0;
685316cbedSBarry Smith     PCMPIIterations[i]  = 0;
695316cbedSBarry Smith     PCMPISizes[i]       = 0;
70f1f2ae84SBarry Smith   }
71f1f2ae84SBarry Smith   PCMPICommSet = PETSC_TRUE;
723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
73f1f2ae84SBarry Smith }
74f1f2ae84SBarry Smith 
75*9f0612e4SBarry Smith static PetscErrorCode PCMPICommsDestroy(void)
76d71ae5a4SJacob Faibussowitsch {
77f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
79f1f2ae84SBarry Smith 
80f1f2ae84SBarry Smith   PetscFunctionBegin;
813ba16761SJacob Faibussowitsch   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
83f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
84f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
85f1f2ae84SBarry Smith     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86f1f2ae84SBarry Smith   }
87f1f2ae84SBarry Smith   PCMPICommSet = PETSC_FALSE;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89f1f2ae84SBarry Smith }
90f1f2ae84SBarry Smith 
91d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc)
92d71ae5a4SJacob Faibussowitsch {
93f1f2ae84SBarry Smith   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
94f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
95f1f2ae84SBarry Smith   KSP         ksp;
96f1f2ae84SBarry Smith   PetscInt    N[2], mincntperrank = 0;
97f1f2ae84SBarry Smith   PetscMPIInt size;
98f1f2ae84SBarry Smith   Mat         sA;
993821be0aSBarry Smith   char       *cprefix = NULL;
100f1f2ae84SBarry Smith   PetscMPIInt len     = 0;
101f1f2ae84SBarry Smith 
102f1f2ae84SBarry Smith   PetscFunctionBegin;
103*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
104f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
106f1f2ae84SBarry Smith   if (pc) {
107f1f2ae84SBarry Smith     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
109f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110f1f2ae84SBarry Smith   }
111f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112f1f2ae84SBarry Smith 
113f1f2ae84SBarry Smith   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114f1f2ae84SBarry Smith   if (km) mincntperrank = km->mincntperrank;
115f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116f1f2ae84SBarry Smith   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117f1f2ae84SBarry Smith   if (comm == MPI_COMM_NULL) {
118f1f2ae84SBarry Smith     ksp                = NULL;
119*9f0612e4SBarry Smith     PCMPIServerInSolve = PETSC_FALSE;
1203ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
121f1f2ae84SBarry Smith   }
1220316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
123f1f2ae84SBarry Smith   PetscCall(KSPCreate(comm, &ksp));
1243821be0aSBarry Smith   PetscCall(KSPSetNestLevel(ksp, 1));
1253821be0aSBarry Smith   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
1260316ec64SBarry Smith   PetscCall(PetscLogStagePop());
127f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128f1f2ae84SBarry Smith   if (pc) {
129f1f2ae84SBarry Smith     size_t      slen;
1303821be0aSBarry Smith     const char *prefix = NULL;
131f9818f3cSJose E. Roman     char       *found  = NULL;
132f1f2ae84SBarry Smith 
133f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
134dad3da8eSBarry Smith     PCMPIKSPCounts[size - 1]++;
1353821be0aSBarry Smith     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
1363821be0aSBarry Smith     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1373821be0aSBarry Smith     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
1383821be0aSBarry Smith     PetscCall(PetscStrallocpy(prefix, &cprefix));
1393821be0aSBarry Smith     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
1403821be0aSBarry Smith     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
1413821be0aSBarry Smith     *found = 0;
1423821be0aSBarry Smith     PetscCall(PetscStrlen(cprefix, &slen));
143f1f2ae84SBarry Smith     len = (PetscMPIInt)slen;
144f1f2ae84SBarry Smith   }
145f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146f1f2ae84SBarry Smith   if (len) {
1473821be0aSBarry Smith     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
1483821be0aSBarry Smith     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
1493821be0aSBarry Smith     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150f1f2ae84SBarry Smith   }
1513821be0aSBarry Smith   PetscCall(PetscFree(cprefix));
152*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
1533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
154f1f2ae84SBarry Smith }
155f1f2ae84SBarry Smith 
156d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc)
157d71ae5a4SJacob Faibussowitsch {
158f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159f1f2ae84SBarry Smith   Mat                A;
160*9f0612e4SBarry Smith   PetscInt           m, n, j, bs;
161f1f2ae84SBarry Smith   Mat                sA;
162f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163f1f2ae84SBarry Smith   KSP                ksp;
164f1f2ae84SBarry Smith   PetscLayout        layout;
165*9f0612e4SBarry Smith   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166f1f2ae84SBarry Smith   const PetscInt    *range;
167f1f2ae84SBarry Smith   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
168*9f0612e4SBarry Smith   const PetscScalar *a                = NULL, *sa;
169*9f0612e4SBarry Smith   PetscInt           matproperties[8] = {0}, rstart, rend;
1703821be0aSBarry Smith   char              *cprefix;
171f1f2ae84SBarry Smith 
172f1f2ae84SBarry Smith   PetscFunctionBegin;
173f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
1743ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
175*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
17645682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
177f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
178f1f2ae84SBarry Smith   if (pc) {
17968a21331SBarry Smith     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
1803821be0aSBarry Smith     const char *prefix;
1813821be0aSBarry Smith     size_t      clen;
18268a21331SBarry Smith 
183f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
184dad3da8eSBarry Smith     PCMPIMatCounts[size - 1]++;
185f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
18668a21331SBarry Smith     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
187dd0d27b1SBarry Smith     PetscCall(MatGetBlockSize(sA, &bs));
18868a21331SBarry Smith     matproperties[2] = bs;
18968a21331SBarry Smith     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
19068a21331SBarry Smith     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
19168a21331SBarry Smith     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
19268a21331SBarry Smith     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
19368a21331SBarry Smith     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
19468a21331SBarry Smith     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
19568a21331SBarry Smith     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
19668a21331SBarry Smith     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
1977a99bfcaSBarry Smith     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
1983821be0aSBarry Smith     PetscCall(MatGetOptionsPrefix(sA, &prefix));
1993821be0aSBarry Smith     PetscCall(PetscStrallocpy(prefix, &cprefix));
2003821be0aSBarry Smith     PetscCall(PetscStrlen(cprefix, &clen));
2013821be0aSBarry Smith     matproperties[7] = (PetscInt)clen;
202f1f2ae84SBarry Smith   }
2033821be0aSBarry Smith   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
204f1f2ae84SBarry Smith 
20568a21331SBarry Smith   /* determine ownership ranges of matrix columns */
206f1f2ae84SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
20768a21331SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
20868a21331SBarry Smith   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
209f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
210f1f2ae84SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &n));
21168a21331SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
21268a21331SBarry Smith 
21368a21331SBarry Smith   /* determine ownership ranges of matrix rows */
21468a21331SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
21568a21331SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
21668a21331SBarry Smith   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
21768a21331SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
21868a21331SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &m));
219*9f0612e4SBarry Smith   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
220f1f2ae84SBarry Smith 
221*9f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
222f1f2ae84SBarry Smith   /* copy over the matrix nonzero structure and values */
223f1f2ae84SBarry Smith   if (pc) {
224*9f0612e4SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
225*9f0612e4SBarry Smith     if (!PCMPIServerUseShmget) {
226f1f2ae84SBarry Smith       NZ      = km->NZ;
227f1f2ae84SBarry Smith       NZdispl = km->NZdispl;
228f1f2ae84SBarry Smith       PetscCall(PetscLayoutGetRanges(layout, &range));
229f1f2ae84SBarry Smith       for (i = 0; i < size; i++) {
230f1f2ae84SBarry Smith         sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
231f1f2ae84SBarry Smith         NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
232f1f2ae84SBarry Smith       }
233f1f2ae84SBarry Smith       displi[0]  = 0;
234f1f2ae84SBarry Smith       NZdispl[0] = 0;
235f1f2ae84SBarry Smith       for (j = 1; j < size; j++) {
236f1f2ae84SBarry Smith         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
237f1f2ae84SBarry Smith         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
238f1f2ae84SBarry Smith       }
239*9f0612e4SBarry Smith     }
240f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
241f1f2ae84SBarry Smith   }
242f1f2ae84SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
243f1f2ae84SBarry Smith 
2443821be0aSBarry Smith   PetscCall(MatCreate(comm, &A));
2453821be0aSBarry Smith   if (matproperties[7] > 0) {
2463821be0aSBarry Smith     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
2473821be0aSBarry Smith     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
2483821be0aSBarry Smith     PetscCall(MatSetOptionsPrefix(A, cprefix));
2493821be0aSBarry Smith     PetscCall(PetscFree(cprefix));
2503821be0aSBarry Smith   }
2513821be0aSBarry Smith   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
2523821be0aSBarry Smith   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
2533821be0aSBarry Smith   PetscCall(MatSetType(A, MATMPIAIJ));
254*9f0612e4SBarry Smith 
255*9f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
256*9f0612e4SBarry Smith     PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
257*9f0612e4SBarry Smith     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
258*9f0612e4SBarry Smith     PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm));
259*9f0612e4SBarry Smith     PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
260*9f0612e4SBarry Smith     PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
261*9f0612e4SBarry Smith   } else {
262*9f0612e4SBarry Smith     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
263*9f0612e4SBarry Smith     PCMPIServerAddresses *addresses;
264*9f0612e4SBarry Smith 
265*9f0612e4SBarry Smith     PetscCall(PetscNew(&addresses));
266*9f0612e4SBarry Smith     addresses->n = 3;
267*9f0612e4SBarry Smith     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
268*9f0612e4SBarry Smith     ia = rstart + (PetscInt *)addresses->addr[0];
269*9f0612e4SBarry Smith     ja = ia[0] + (PetscInt *)addresses->addr[1];
270*9f0612e4SBarry Smith     a  = ia[0] + (PetscScalar *)addresses->addr[2];
271*9f0612e4SBarry Smith     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy));
272*9f0612e4SBarry Smith   }
273*9f0612e4SBarry Smith 
274*9f0612e4SBarry Smith   if (pc) {
275*9f0612e4SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
276*9f0612e4SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
277*9f0612e4SBarry Smith   }
278*9f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
279*9f0612e4SBarry Smith 
280*9f0612e4SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
2813821be0aSBarry Smith   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
28268a21331SBarry Smith   PetscCall(MatSetBlockSize(A, matproperties[2]));
2833821be0aSBarry Smith 
28468a21331SBarry Smith   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
28568a21331SBarry Smith   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
28668a21331SBarry Smith   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
28768a21331SBarry Smith   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
288f1f2ae84SBarry Smith 
289*9f0612e4SBarry Smith   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
290f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(ksp, A, A));
291f1f2ae84SBarry Smith   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
2920316ec64SBarry Smith   PetscCall(PetscLogStagePop());
293*9f0612e4SBarry Smith   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
294f1f2ae84SBarry Smith     const PetscInt *range;
295f1f2ae84SBarry Smith 
296f1f2ae84SBarry Smith     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
297f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
298f1f2ae84SBarry Smith       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
299f1f2ae84SBarry Smith       km->displ[i]     = (PetscMPIInt)range[i];
300f1f2ae84SBarry Smith     }
301f1f2ae84SBarry Smith   }
302f1f2ae84SBarry Smith   PetscCall(MatDestroy(&A));
30345682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
304f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(ksp));
305*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
3063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
307f1f2ae84SBarry Smith }
308f1f2ae84SBarry Smith 
309d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc)
310d71ae5a4SJacob Faibussowitsch {
311f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
312f1f2ae84SBarry Smith   KSP                ksp;
313f1f2ae84SBarry Smith   Mat                sA, A;
314f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
315*9f0612e4SBarry Smith   const PetscInt    *ia, *IA;
316*9f0612e4SBarry Smith   const PetscScalar *a;
317f1f2ae84SBarry Smith   PetscCount         nz;
318f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
319dad3da8eSBarry Smith   PetscMPIInt        size;
320*9f0612e4SBarry Smith   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};
321f1f2ae84SBarry Smith 
322f1f2ae84SBarry Smith   PetscFunctionBegin;
323f1f2ae84SBarry Smith   if (pc) {
324f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
325f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
326*9f0612e4SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
327f1f2ae84SBarry Smith   }
328f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3293ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
330*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
33145682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
332f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
333dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
334dad3da8eSBarry Smith   PCMPIMatCounts[size - 1]++;
335f1f2ae84SBarry Smith   PetscCall(KSPGetOperators(ksp, NULL, &A));
336*9f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
337*9f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
338f1f2ae84SBarry Smith     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
339f1f2ae84SBarry Smith     PetscCall(PetscMalloc1(nz, &a));
340*9f0612e4SBarry Smith     PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
341*9f0612e4SBarry Smith   } else {
342*9f0612e4SBarry Smith     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
343*9f0612e4SBarry Smith     PCMPIServerAddresses *addresses;
344*9f0612e4SBarry Smith     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
345*9f0612e4SBarry Smith     ia = rstart + (PetscInt *)addresses->addr[0];
346*9f0612e4SBarry Smith     a  = ia[0] + (PetscScalar *)addresses->addr[2];
347*9f0612e4SBarry Smith   }
348*9f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
34968a21331SBarry Smith   if (pc) {
35068a21331SBarry Smith     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
35168a21331SBarry Smith 
35268a21331SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
353*9f0612e4SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
35468a21331SBarry Smith 
35568a21331SBarry Smith     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
35668a21331SBarry Smith     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
35768a21331SBarry Smith     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
35868a21331SBarry Smith     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
35968a21331SBarry Smith     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
36068a21331SBarry Smith     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
36168a21331SBarry Smith     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
36268a21331SBarry Smith     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
36368a21331SBarry Smith   }
364f1f2ae84SBarry Smith   PetscCall(MatUpdateMPIAIJWithArray(A, a));
365*9f0612e4SBarry Smith   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
36668a21331SBarry Smith   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
36768a21331SBarry Smith   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
36868a21331SBarry Smith   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
36968a21331SBarry Smith   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
37068a21331SBarry Smith   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
37168a21331SBarry Smith   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
37245682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
373*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
3743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
375f1f2ae84SBarry Smith }
376f1f2ae84SBarry Smith 
377d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
378d71ae5a4SJacob Faibussowitsch {
379f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
380f1f2ae84SBarry Smith   KSP                ksp;
381f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
382f1f2ae84SBarry Smith   const PetscScalar *sb   = NULL, *x;
383f1f2ae84SBarry Smith   PetscScalar       *b, *sx = NULL;
3845316cbedSBarry Smith   PetscInt           its, n;
3855316cbedSBarry Smith   PetscMPIInt        size;
386*9f0612e4SBarry Smith   void              *addr[2];
387f1f2ae84SBarry Smith 
388f1f2ae84SBarry Smith   PetscFunctionBegin;
389f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3903ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
391*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
39245682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
393f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
394f1f2ae84SBarry Smith 
395f1f2ae84SBarry Smith   /* scatterv rhs */
396dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
3975316cbedSBarry Smith   if (pc) {
3985316cbedSBarry Smith     PetscInt N;
3995316cbedSBarry Smith 
400dad3da8eSBarry Smith     PCMPISolveCounts[size - 1]++;
40168a21331SBarry Smith     PetscCall(MatGetSize(pc->pmat, &N, NULL));
4025316cbedSBarry Smith     PCMPISizes[size - 1] += N;
403f1f2ae84SBarry Smith   }
404f1f2ae84SBarry Smith   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
405*9f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
406*9f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
407f1f2ae84SBarry Smith     PetscCall(VecGetArray(ksp->vec_rhs, &b));
408*9f0612e4SBarry Smith     if (pc) PetscCall(VecGetArrayRead(B, &sb));
409f1f2ae84SBarry Smith     PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
410f1f2ae84SBarry Smith     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
411*9f0612e4SBarry Smith     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
412*9f0612e4SBarry Smith     // TODO: scatter initial guess if needed
413*9f0612e4SBarry Smith   } else {
414*9f0612e4SBarry Smith     PetscInt rstart;
415*9f0612e4SBarry Smith 
416*9f0612e4SBarry Smith     if (pc) PetscCall(VecGetArrayRead(B, &sb));
417*9f0612e4SBarry Smith     if (pc) PetscCall(VecGetArray(X, &sx));
418*9f0612e4SBarry Smith     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
419*9f0612e4SBarry Smith     if (pc) PetscCall(VecRestoreArray(X, &sx));
420*9f0612e4SBarry Smith     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
421*9f0612e4SBarry Smith 
422*9f0612e4SBarry Smith     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
423*9f0612e4SBarry Smith     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
424*9f0612e4SBarry Smith     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
425*9f0612e4SBarry Smith     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
426*9f0612e4SBarry Smith   }
427*9f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
428f1f2ae84SBarry Smith 
42945682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4300316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
431f1f2ae84SBarry Smith   PetscCall(KSPSolve(ksp, NULL, NULL));
4320316ec64SBarry Smith   PetscCall(PetscLogStagePop());
43345682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
4345316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(ksp, &its));
4355316cbedSBarry Smith   PCMPIIterations[size - 1] += its;
436*9f0612e4SBarry Smith   // TODO: send iterations up to outer KSP
437*9f0612e4SBarry Smith 
438*9f0612e4SBarry Smith   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
439f1f2ae84SBarry Smith 
440f1f2ae84SBarry Smith   /* gather solution */
441*9f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
442*9f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
443f1f2ae84SBarry Smith     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
444f1f2ae84SBarry Smith     if (pc) PetscCall(VecGetArray(X, &sx));
445f1f2ae84SBarry Smith     PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
446f1f2ae84SBarry Smith     if (pc) PetscCall(VecRestoreArray(X, &sx));
447f1f2ae84SBarry Smith     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
448*9f0612e4SBarry Smith   } else {
449*9f0612e4SBarry Smith     PetscCall(VecResetArray(ksp->vec_rhs));
450*9f0612e4SBarry Smith     PetscCall(VecResetArray(ksp->vec_sol));
451*9f0612e4SBarry Smith   }
452*9f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
45345682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
454*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
4553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
456f1f2ae84SBarry Smith }
457f1f2ae84SBarry Smith 
458d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc)
459d71ae5a4SJacob Faibussowitsch {
460f1f2ae84SBarry Smith   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
461f1f2ae84SBarry Smith   KSP      ksp;
462f1f2ae84SBarry Smith   MPI_Comm comm = PC_MPI_COMM_WORLD;
463f1f2ae84SBarry Smith 
464f1f2ae84SBarry Smith   PetscFunctionBegin;
465f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
4663ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
467c7d372c4SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
468*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
469f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&ksp));
470c7d372c4SBarry Smith   PetscCall(PetscLogStagePop());
471*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
4723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
473f1f2ae84SBarry Smith }
474f1f2ae84SBarry Smith 
475*9f0612e4SBarry Smith static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
476*9f0612e4SBarry Smith {
477*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
478*9f0612e4SBarry Smith   PetscMPIInt dummy1 = 1, dummy2;
479*9f0612e4SBarry Smith #endif
480*9f0612e4SBarry Smith 
481*9f0612e4SBarry Smith   PetscFunctionBegin;
482*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
483*9f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
484*9f0612e4SBarry Smith     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
485*9f0612e4SBarry Smith   }
486*9f0612e4SBarry Smith #endif
487*9f0612e4SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
488*9f0612e4SBarry Smith   /* next line ensures the sender has already taken the lock */
489*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
490*9f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
491*9f0612e4SBarry Smith     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
492*9f0612e4SBarry Smith     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
493*9f0612e4SBarry Smith   }
494*9f0612e4SBarry Smith #endif
495*9f0612e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
496*9f0612e4SBarry Smith }
4973821be0aSBarry Smith 
498f1f2ae84SBarry Smith /*@C
4997a99bfcaSBarry Smith   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
500f1580f4eSBarry Smith   parallel `KSP` solves and management of parallel `KSP` objects.
501f1f2ae84SBarry Smith 
5023821be0aSBarry Smith   Logically Collective on all MPI processes except rank 0
503f1f2ae84SBarry Smith 
504f1580f4eSBarry Smith   Options Database Keys:
505f1f2ae84SBarry Smith + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
506*9f0612e4SBarry Smith . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
507*9f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
508f1f2ae84SBarry Smith 
50920f4b53cSBarry Smith   Level: developer
51020f4b53cSBarry Smith 
511f1580f4eSBarry Smith   Note:
512f1f2ae84SBarry Smith   This is normally started automatically in `PetscInitialize()` when the option is provided
513f1f2ae84SBarry Smith 
5143821be0aSBarry Smith   See `PCMPI` for information on using the solver with a `KSP` object
5153821be0aSBarry Smith 
516f1f2ae84SBarry Smith   Developer Notes:
5173821be0aSBarry Smith   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
518f1580f4eSBarry Smith   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
519f1580f4eSBarry Smith   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
520f1f2ae84SBarry Smith 
521f1580f4eSBarry Smith   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
522f1f2ae84SBarry Smith 
5233821be0aSBarry Smith   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
5243821be0aSBarry Smith 
5253821be0aSBarry Smith   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
5263821be0aSBarry Smith 
527baca6076SPierre Jolivet   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
5283821be0aSBarry Smith   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
5293821be0aSBarry Smith 
5307a99bfcaSBarry Smith   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
5313821be0aSBarry Smith   all MPI processes but the user callback only runs on one MPI process per node.
5323821be0aSBarry Smith 
5333821be0aSBarry Smith   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
5343821be0aSBarry Smith   the `MPI_Comm` argument from PETSc calls.
5353821be0aSBarry Smith 
5363821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
537f1f2ae84SBarry Smith @*/
538d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void)
539d71ae5a4SJacob Faibussowitsch {
540f1f2ae84SBarry Smith   PetscMPIInt rank;
541f1f2ae84SBarry Smith 
542f1f2ae84SBarry Smith   PetscFunctionBegin;
5439d3446b2SPierre Jolivet   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
5445e1a0e3cSBarry Smith   if (PetscDefined(USE_SINGLE_LIBRARY)) {
5455e1a0e3cSBarry Smith     PetscCall(VecInitializePackage());
5465e1a0e3cSBarry Smith     PetscCall(MatInitializePackage());
5475e1a0e3cSBarry Smith     PetscCall(DMInitializePackage());
5485e1a0e3cSBarry Smith     PetscCall(PCInitializePackage());
5495e1a0e3cSBarry Smith     PetscCall(KSPInitializePackage());
5505e1a0e3cSBarry Smith     PetscCall(SNESInitializePackage());
5515e1a0e3cSBarry Smith     PetscCall(TSInitializePackage());
5525e1a0e3cSBarry Smith     PetscCall(TaoInitializePackage());
5535e1a0e3cSBarry Smith   }
554956255efSBarry Smith   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
55545682376SBarry Smith   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
556*9f0612e4SBarry Smith   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
557*9f0612e4SBarry Smith 
558*9f0612e4SBarry Smith   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
559*9f0612e4SBarry Smith   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
5605e1a0e3cSBarry Smith 
561f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
562*9f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
563*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
564*9f0612e4SBarry Smith     PetscMPIInt size;
565*9f0612e4SBarry Smith 
566*9f0612e4SBarry Smith     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
567*9f0612e4SBarry Smith     if (size > 1) {
568*9f0612e4SBarry Smith       pthread_mutex_t *locks;
569*9f0612e4SBarry Smith 
570*9f0612e4SBarry Smith       if (rank == 0) {
571*9f0612e4SBarry Smith         PCMPIServerActive = PETSC_TRUE;
572*9f0612e4SBarry Smith         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
573*9f0612e4SBarry Smith       }
574*9f0612e4SBarry Smith       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
575*9f0612e4SBarry Smith       if (rank == 0) {
576*9f0612e4SBarry Smith         pthread_mutexattr_t attr;
577*9f0612e4SBarry Smith 
578*9f0612e4SBarry Smith         pthread_mutexattr_init(&attr);
579*9f0612e4SBarry Smith         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
580*9f0612e4SBarry Smith 
581*9f0612e4SBarry Smith         for (int i = 1; i < size; i++) {
582*9f0612e4SBarry Smith           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
583*9f0612e4SBarry Smith           pthread_mutex_lock(&PCMPIServerLocks[i]);
584*9f0612e4SBarry Smith         }
585*9f0612e4SBarry Smith       }
586*9f0612e4SBarry Smith       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
587*9f0612e4SBarry Smith     }
588*9f0612e4SBarry Smith #endif
589*9f0612e4SBarry Smith   }
590f1f2ae84SBarry Smith   if (rank == 0) {
591f1f2ae84SBarry Smith     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
5923821be0aSBarry Smith     PCMPIServerActive = PETSC_TRUE;
5933ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
594f1f2ae84SBarry Smith   }
595f1f2ae84SBarry Smith 
596f1f2ae84SBarry Smith   while (PETSC_TRUE) {
597f1f2ae84SBarry Smith     PCMPICommand request = PCMPI_CREATE;
598*9f0612e4SBarry Smith     PetscMPIInt  dummy1  = 1, dummy2;
599*9f0612e4SBarry Smith 
600*9f0612e4SBarry Smith     // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters?
601*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
602*9f0612e4SBarry Smith     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
603*9f0612e4SBarry Smith #endif
604f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
605*9f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
606*9f0612e4SBarry Smith     if (PCMPIServerUseShmget) {
607*9f0612e4SBarry Smith       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
608*9f0612e4SBarry Smith       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
609*9f0612e4SBarry Smith       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
610*9f0612e4SBarry Smith     }
611*9f0612e4SBarry Smith #endif
612f1f2ae84SBarry Smith     switch (request) {
613d71ae5a4SJacob Faibussowitsch     case PCMPI_CREATE:
614d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPICreate(NULL));
615d71ae5a4SJacob Faibussowitsch       break;
616d71ae5a4SJacob Faibussowitsch     case PCMPI_SET_MAT:
617d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISetMat(NULL));
618d71ae5a4SJacob Faibussowitsch       break;
619d71ae5a4SJacob Faibussowitsch     case PCMPI_UPDATE_MAT_VALUES:
620d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIUpdateMatValues(NULL));
621d71ae5a4SJacob Faibussowitsch       break;
622f1f2ae84SBarry Smith     case PCMPI_VIEW:
623f1f2ae84SBarry Smith       // PetscCall(PCMPIView(NULL));
624f1f2ae84SBarry Smith       break;
625d71ae5a4SJacob Faibussowitsch     case PCMPI_SOLVE:
626d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISolve(NULL, NULL, NULL));
627d71ae5a4SJacob Faibussowitsch       break;
628d71ae5a4SJacob Faibussowitsch     case PCMPI_DESTROY:
629d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIDestroy(NULL));
630d71ae5a4SJacob Faibussowitsch       break;
631f1f2ae84SBarry Smith     case PCMPI_EXIT:
632*9f0612e4SBarry Smith       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
633f1f2ae84SBarry Smith       PetscCall(PetscFinalize());
634f1f2ae84SBarry Smith       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
635f1f2ae84SBarry Smith       break;
636d71ae5a4SJacob Faibussowitsch     default:
637d71ae5a4SJacob Faibussowitsch       break;
638f1f2ae84SBarry Smith     }
639f1f2ae84SBarry Smith   }
6403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
641f1f2ae84SBarry Smith }
642f1f2ae84SBarry Smith 
643f1f2ae84SBarry Smith /*@C
644f1f2ae84SBarry Smith   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
645f1580f4eSBarry Smith   parallel KSP solves and management of parallel `KSP` objects.
646f1f2ae84SBarry Smith 
64720f4b53cSBarry Smith   Logically Collective on all MPI ranks except 0
64820f4b53cSBarry Smith 
64920f4b53cSBarry Smith   Level: developer
650f1f2ae84SBarry Smith 
651f1580f4eSBarry Smith   Note:
652*9f0612e4SBarry Smith   This is normally called automatically in `PetscFinalize()`
653f1f2ae84SBarry Smith 
6543821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
655f1f2ae84SBarry Smith @*/
656d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void)
657d71ae5a4SJacob Faibussowitsch {
658f1f2ae84SBarry Smith   PetscFunctionBegin;
659f1f2ae84SBarry Smith   if (PetscGlobalRank == 0) {
660f1f2ae84SBarry Smith     PetscViewer       viewer = NULL;
661f1f2ae84SBarry Smith     PetscViewerFormat format;
662f1f2ae84SBarry Smith 
663*9f0612e4SBarry Smith     PetscCall(PetscShmgetAddressesFinalize());
664*9f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
665*9f0612e4SBarry Smith     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
666f1f2ae84SBarry Smith     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
667f1f2ae84SBarry Smith     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
668f1f2ae84SBarry Smith     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
669f1f2ae84SBarry Smith     PetscOptionsEnd();
670f1f2ae84SBarry Smith     if (viewer) {
671f1f2ae84SBarry Smith       PetscBool isascii;
672f1f2ae84SBarry Smith 
673f1f2ae84SBarry Smith       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
674f1f2ae84SBarry Smith       if (isascii) {
675f1f2ae84SBarry Smith         PetscMPIInt size;
6765316cbedSBarry Smith         PetscMPIInt i;
677f1f2ae84SBarry Smith 
678f1f2ae84SBarry Smith         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
6795316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
6805316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
6815316cbedSBarry Smith         if (PCMPIKSPCountsSeq) {
6825316cbedSBarry Smith           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
683f1f2ae84SBarry Smith         }
6845316cbedSBarry Smith         for (i = 0; i < size; i++) {
6855316cbedSBarry Smith           if (PCMPIKSPCounts[i]) {
6865316cbedSBarry Smith             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
6875316cbedSBarry Smith           }
6885316cbedSBarry Smith         }
689*9f0612e4SBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
690f1f2ae84SBarry Smith       }
691648c30bcSBarry Smith       PetscCall(PetscViewerDestroy(&viewer));
692f1f2ae84SBarry Smith     }
693f1f2ae84SBarry Smith   }
694f1f2ae84SBarry Smith   PetscCall(PCMPICommsDestroy());
6953821be0aSBarry Smith   PCMPIServerActive = PETSC_FALSE;
6963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
697f1f2ae84SBarry Smith }
698f1f2ae84SBarry Smith 
699f1f2ae84SBarry Smith /*
700f1f2ae84SBarry Smith     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
701f1f2ae84SBarry Smith     because, for example, the problem is small. This version is more efficient because it does not require copying any data
702f1f2ae84SBarry Smith */
703d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc)
704d71ae5a4SJacob Faibussowitsch {
705f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
706f1f2ae84SBarry Smith   Mat         sA;
707f1f2ae84SBarry Smith   const char *prefix;
708f9818f3cSJose E. Roman   char       *found = NULL, *cprefix;
709f1f2ae84SBarry Smith 
710f1f2ae84SBarry Smith   PetscFunctionBegin;
711*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
712f1f2ae84SBarry Smith   PetscCall(PCGetOperators(pc, NULL, &sA));
713f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
714f1f2ae84SBarry Smith   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
7153821be0aSBarry Smith   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
7163821be0aSBarry Smith   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
7173821be0aSBarry Smith 
7183821be0aSBarry Smith   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
7193821be0aSBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
7203821be0aSBarry Smith   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
7213821be0aSBarry Smith   PetscCall(PetscStrallocpy(prefix, &cprefix));
7223821be0aSBarry Smith   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
7233821be0aSBarry Smith   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
7243821be0aSBarry Smith   *found = 0;
7253821be0aSBarry Smith   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
7263821be0aSBarry Smith   PetscCall(PetscFree(cprefix));
7273821be0aSBarry Smith 
728f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
729f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(km->ksps[0]));
730f1f2ae84SBarry Smith   PetscCall(KSPSetUp(km->ksps[0]));
7313ba16761SJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
732f1f2ae84SBarry Smith   PCMPIKSPCountsSeq++;
733*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
7343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
735f1f2ae84SBarry Smith }
736f1f2ae84SBarry Smith 
737d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
738d71ae5a4SJacob Faibussowitsch {
739f1f2ae84SBarry Smith   PC_MPI  *km = (PC_MPI *)pc->data;
7405316cbedSBarry Smith   PetscInt its, n;
7415316cbedSBarry Smith   Mat      A;
742f1f2ae84SBarry Smith 
743f1f2ae84SBarry Smith   PetscFunctionBegin;
744*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
745f1f2ae84SBarry Smith   PetscCall(KSPSolve(km->ksps[0], b, x));
7465316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
747f1f2ae84SBarry Smith   PCMPISolveCountsSeq++;
7485316cbedSBarry Smith   PCMPIIterationsSeq += its;
7495316cbedSBarry Smith   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
7505316cbedSBarry Smith   PetscCall(MatGetSize(A, &n, NULL));
7515316cbedSBarry Smith   PCMPISizesSeq += n;
752*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
753*9f0612e4SBarry Smith   /*
754*9f0612e4SBarry Smith     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
755*9f0612e4SBarry Smith     my use PetscFree() instead of PCMPIArrayDeallocate()
756*9f0612e4SBarry Smith   */
757*9f0612e4SBarry Smith   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
758*9f0612e4SBarry Smith   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
7593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
760f1f2ae84SBarry Smith }
761f1f2ae84SBarry Smith 
762d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
763d71ae5a4SJacob Faibussowitsch {
764f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
765f1f2ae84SBarry Smith 
766f1f2ae84SBarry Smith   PetscFunctionBegin;
767f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
768f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
7695316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
7703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
771f1f2ae84SBarry Smith }
772f1f2ae84SBarry Smith 
773d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc)
774d71ae5a4SJacob Faibussowitsch {
775f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
776*9f0612e4SBarry Smith   Mat     A, B;
777*9f0612e4SBarry Smith   Vec     x, b;
778f1f2ae84SBarry Smith 
779f1f2ae84SBarry Smith   PetscFunctionBegin;
780*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
781*9f0612e4SBarry Smith   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
782*9f0612e4SBarry Smith   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
783*9f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)A));
784*9f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)B));
785*9f0612e4SBarry Smith   PetscCall(KSPGetSolution(km->ksps[0], &x));
786*9f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)x));
787*9f0612e4SBarry Smith   PetscCall(KSPGetRhs(km->ksps[0], &b));
788*9f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)b));
789f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&km->ksps[0]));
790f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
791*9f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
792*9f0612e4SBarry Smith   PetscCall(MatDestroy(&A));
793*9f0612e4SBarry Smith   PetscCall(MatDestroy(&B));
794*9f0612e4SBarry Smith   PetscCall(VecDestroy(&x));
795*9f0612e4SBarry Smith   PetscCall(VecDestroy(&b));
7963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
797f1f2ae84SBarry Smith }
798f1f2ae84SBarry Smith 
799f1f2ae84SBarry Smith /*
800f1f2ae84SBarry Smith      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
801dd8e379bSPierre Jolivet      right-hand side to the parallel PC
802f1f2ae84SBarry Smith */
803d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc)
804d71ae5a4SJacob Faibussowitsch {
805f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
806f1f2ae84SBarry Smith   PetscMPIInt rank, size;
807f1f2ae84SBarry Smith   PetscBool   newmatrix = PETSC_FALSE;
808f1f2ae84SBarry Smith 
809f1f2ae84SBarry Smith   PetscFunctionBegin;
810f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
811f1f2ae84SBarry Smith   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
812f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
813f1f2ae84SBarry Smith 
814f1f2ae84SBarry Smith   if (!pc->setupcalled) {
815f1f2ae84SBarry Smith     if (!km->alwaysuseserver) {
816f1f2ae84SBarry Smith       PetscInt n;
817f1f2ae84SBarry Smith       Mat      sA;
818f1f2ae84SBarry Smith       /* short circuit for small systems */
819f1f2ae84SBarry Smith       PetscCall(PCGetOperators(pc, &sA, &sA));
820f1f2ae84SBarry Smith       PetscCall(MatGetSize(sA, &n, NULL));
821f1f2ae84SBarry Smith       if (n < 2 * km->mincntperrank - 1 || size == 1) {
822f1f2ae84SBarry Smith         pc->ops->setup   = NULL;
823f1f2ae84SBarry Smith         pc->ops->apply   = PCApply_Seq;
824f1f2ae84SBarry Smith         pc->ops->destroy = PCDestroy_Seq;
825f1f2ae84SBarry Smith         pc->ops->view    = PCView_Seq;
826f1f2ae84SBarry Smith         PetscCall(PCSetUp_Seq(pc));
8273ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
828f1f2ae84SBarry Smith       }
829f1f2ae84SBarry Smith     }
830f1f2ae84SBarry Smith 
831*9f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
832f1f2ae84SBarry Smith     PetscCall(PCMPICreate(pc));
833f1f2ae84SBarry Smith     newmatrix = PETSC_TRUE;
8349371c9d4SSatish Balay   }
8359371c9d4SSatish Balay   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
836f1f2ae84SBarry Smith 
837f1f2ae84SBarry Smith   if (newmatrix) {
8383ba16761SJacob Faibussowitsch     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
839*9f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
840f1f2ae84SBarry Smith     PetscCall(PCMPISetMat(pc));
841f1f2ae84SBarry Smith   } else {
842bbea24aaSStefano Zampini     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
843*9f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
844f1f2ae84SBarry Smith     PetscCall(PCMPIUpdateMatValues(pc));
845f1f2ae84SBarry Smith   }
8463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
847f1f2ae84SBarry Smith }
848f1f2ae84SBarry Smith 
849d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
850d71ae5a4SJacob Faibussowitsch {
851f1f2ae84SBarry Smith   PetscFunctionBegin;
852*9f0612e4SBarry Smith   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
853f1f2ae84SBarry Smith   PetscCall(PCMPISolve(pc, b, x));
8543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
855f1f2ae84SBarry Smith }
856f1f2ae84SBarry Smith 
85766976f2fSJacob Faibussowitsch static PetscErrorCode PCDestroy_MPI(PC pc)
858d71ae5a4SJacob Faibussowitsch {
859f1f2ae84SBarry Smith   PetscFunctionBegin;
860*9f0612e4SBarry Smith   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
861f1f2ae84SBarry Smith   PetscCall(PCMPIDestroy(pc));
862f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
8633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
864f1f2ae84SBarry Smith }
865f1f2ae84SBarry Smith 
866f1f2ae84SBarry Smith /*
867*9f0612e4SBarry Smith      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
868f1f2ae84SBarry Smith */
86966976f2fSJacob Faibussowitsch static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
870d71ae5a4SJacob Faibussowitsch {
871f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
872f1f2ae84SBarry Smith   MPI_Comm    comm;
873f1f2ae84SBarry Smith   PetscMPIInt size;
874f1f2ae84SBarry Smith 
875f1f2ae84SBarry Smith   PetscFunctionBegin;
876f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
877f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
878f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
879*9f0612e4SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank));
880*9f0612e4SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
8813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
882f1f2ae84SBarry Smith }
883f1f2ae84SBarry Smith 
88466976f2fSJacob Faibussowitsch static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
885d71ae5a4SJacob Faibussowitsch {
886f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
887f1f2ae84SBarry Smith 
888f1f2ae84SBarry Smith   PetscFunctionBegin;
889f1f2ae84SBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
8903821be0aSBarry Smith   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
8913821be0aSBarry Smith   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
892f1f2ae84SBarry Smith   PetscOptionsHeadEnd();
8933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
894f1f2ae84SBarry Smith }
895f1f2ae84SBarry Smith 
896f1f2ae84SBarry Smith /*MC
897f1580f4eSBarry Smith      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
898f1f2ae84SBarry Smith 
8993821be0aSBarry Smith    Options Database Keys for the Server:
900f1f2ae84SBarry Smith +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
901*9f0612e4SBarry Smith .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
902*9f0612e4SBarry Smith -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
903f1f2ae84SBarry Smith 
9043821be0aSBarry Smith    Options Database Keys for a specific `KSP` object
9053821be0aSBarry Smith +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
9063821be0aSBarry Smith -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
9073821be0aSBarry Smith 
9083821be0aSBarry Smith    Level: developer
90920f4b53cSBarry Smith 
910f1f2ae84SBarry Smith    Notes:
911*9f0612e4SBarry Smith    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
912*9f0612e4SBarry Smith    `MatCreateSeqAIJWithArrays()`
913*9f0612e4SBarry Smith 
91446bbbc36SPierre Jolivet    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
915f1f2ae84SBarry Smith 
916f1f2ae84SBarry Smith    It can be particularly useful for user OpenMP code or potentially user GPU code.
917f1f2ae84SBarry Smith 
918dd8e379bSPierre Jolivet    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
9193821be0aSBarry Smith    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
920f1f2ae84SBarry Smith 
9213821be0aSBarry Smith    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
9220316ec64SBarry Smith    because they are not the actual solver objects.
9230316ec64SBarry Smith 
9240316ec64SBarry Smith    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
92568a21331SBarry Smith    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
9260316ec64SBarry Smith 
9273821be0aSBarry Smith    Developer Note:
9283821be0aSBarry Smith    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
9293821be0aSBarry Smith    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
9303821be0aSBarry Smith 
9313821be0aSBarry Smith .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
932f1f2ae84SBarry Smith M*/
933d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
934d71ae5a4SJacob Faibussowitsch {
935f1f2ae84SBarry Smith   PC_MPI *km;
936f9818f3cSJose E. Roman   char   *found = NULL;
937f1f2ae84SBarry Smith 
938f1f2ae84SBarry Smith   PetscFunctionBegin;
9393821be0aSBarry Smith   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
9403821be0aSBarry Smith   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
9413821be0aSBarry Smith 
9423821be0aSBarry Smith   /* material from PCSetType() */
9433821be0aSBarry Smith   PetscTryTypeMethod(pc, destroy);
9443821be0aSBarry Smith   pc->ops->destroy = NULL;
9453821be0aSBarry Smith   pc->data         = NULL;
9463821be0aSBarry Smith 
9473821be0aSBarry Smith   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
9483821be0aSBarry Smith   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
9493821be0aSBarry Smith   pc->modifysubmatrices  = NULL;
9503821be0aSBarry Smith   pc->modifysubmatricesP = NULL;
9513821be0aSBarry Smith   pc->setupcalled        = 0;
9523821be0aSBarry Smith 
9534dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&km));
954f1f2ae84SBarry Smith   pc->data = (void *)km;
955f1f2ae84SBarry Smith 
956f1f2ae84SBarry Smith   km->mincntperrank = 10000;
957f1f2ae84SBarry Smith 
958f1f2ae84SBarry Smith   pc->ops->setup          = PCSetUp_MPI;
959f1f2ae84SBarry Smith   pc->ops->apply          = PCApply_MPI;
960f1f2ae84SBarry Smith   pc->ops->destroy        = PCDestroy_MPI;
961f1f2ae84SBarry Smith   pc->ops->view           = PCView_MPI;
962f1f2ae84SBarry Smith   pc->ops->setfromoptions = PCSetFromOptions_MPI;
9633821be0aSBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
9643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
965f1f2ae84SBarry Smith }
966*9f0612e4SBarry Smith 
967*9f0612e4SBarry Smith /*@
968*9f0612e4SBarry Smith   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
969*9f0612e4SBarry Smith 
970*9f0612e4SBarry Smith   Not Collective
971*9f0612e4SBarry Smith 
972*9f0612e4SBarry Smith   Input Parameter:
973*9f0612e4SBarry Smith . pc - the preconditioner context
974*9f0612e4SBarry Smith 
975*9f0612e4SBarry Smith   Output Parameter:
976*9f0612e4SBarry Smith . innerksp - the inner `KSP`
977*9f0612e4SBarry Smith 
978*9f0612e4SBarry Smith   Level: advanced
979*9f0612e4SBarry Smith 
980*9f0612e4SBarry Smith .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
981*9f0612e4SBarry Smith @*/
982*9f0612e4SBarry Smith PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
983*9f0612e4SBarry Smith {
984*9f0612e4SBarry Smith   PC_MPI *red = (PC_MPI *)pc->data;
985*9f0612e4SBarry Smith 
986*9f0612e4SBarry Smith   PetscFunctionBegin;
987*9f0612e4SBarry Smith   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
988*9f0612e4SBarry Smith   PetscAssertPointer(innerksp, 2);
989*9f0612e4SBarry Smith   *innerksp = red->ksps[0];
990*9f0612e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
991*9f0612e4SBarry Smith }
992