xref: /petsc/src/ksp/pc/impls/mpi/pcmpi.c (revision 3fa2bd1c3b863886141a0fb18511a02c2209ae46)
1f1f2ae84SBarry Smith /*
2f1f2ae84SBarry Smith     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3f1f2ae84SBarry Smith     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
4f1f2ae84SBarry Smith 
5dd8e379bSPierre Jolivet     That program may use OpenMP to compute the right-hand side and matrix for the linear system
6f1f2ae84SBarry Smith 
7f1f2ae84SBarry Smith     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
8f1f2ae84SBarry Smith 
9f1f2ae84SBarry Smith     The resulting KSP and PC can only be controlled via the options database, though some common commands
10f1f2ae84SBarry Smith     could be passed through the server.
11f1f2ae84SBarry Smith 
12f1f2ae84SBarry Smith */
139f0612e4SBarry Smith #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
14f1f2ae84SBarry Smith #include <petsc/private/kspimpl.h>
15789afff4SPierre Jolivet #include <petscts.h>
16789afff4SPierre Jolivet #include <petsctao.h>
179f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
189f0612e4SBarry Smith   #include <pthread.h>
199f0612e4SBarry Smith #endif
20f1f2ae84SBarry Smith 
21f1f2ae84SBarry Smith #define PC_MPI_MAX_RANKS  256
22f1f2ae84SBarry Smith #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
23f1f2ae84SBarry Smith 
24f1f2ae84SBarry Smith typedef struct {
259f0612e4SBarry Smith   KSP       ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */
26*3fa2bd1cSBarry Smith   PetscInt  sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
27*3fa2bd1cSBarry Smith   PetscInt  NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
289f0612e4SBarry Smith   PetscInt  mincntperrank;                                        /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */
299f0612e4SBarry Smith   PetscBool alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI process is used for the solve */
30f1f2ae84SBarry Smith } PC_MPI;
31f1f2ae84SBarry Smith 
329371c9d4SSatish Balay typedef enum {
339371c9d4SSatish Balay   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
34f1f2ae84SBarry Smith   PCMPI_CREATE,
35f1f2ae84SBarry Smith   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
36f1f2ae84SBarry Smith   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
37f1f2ae84SBarry Smith   PCMPI_SOLVE,
38f1f2ae84SBarry Smith   PCMPI_VIEW,
399f0612e4SBarry Smith   PCMPI_DESTROY /* destroy a PC that is no longer needed */
40f1f2ae84SBarry Smith } PCMPICommand;
41f1f2ae84SBarry Smith 
42f1f2ae84SBarry Smith static MPI_Comm      PCMPIComms[PC_MPI_MAX_RANKS];
43f1f2ae84SBarry Smith static PetscBool     PCMPICommSet = PETSC_FALSE;
44f1f2ae84SBarry Smith static PetscInt      PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
455316cbedSBarry Smith static PetscInt      PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
469f0612e4SBarry Smith static PetscLogEvent EventServerDist, EventServerDistMPI;
479f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
489f0612e4SBarry Smith static pthread_mutex_t *PCMPIServerLocks;
499f0612e4SBarry Smith #else
509f0612e4SBarry Smith static void *PCMPIServerLocks;
519f0612e4SBarry Smith #endif
52f1f2ae84SBarry Smith 
53d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICommsCreate(void)
54d71ae5a4SJacob Faibussowitsch {
55f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
56f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
57f1f2ae84SBarry Smith 
58f1f2ae84SBarry Smith   PetscFunctionBegin;
59f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
60f1f2ae84SBarry Smith   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
61f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62f1f2ae84SBarry Smith   /* comm for size 1 is useful only for debugging */
63f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
64f1f2ae84SBarry Smith     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
65f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
66f1f2ae84SBarry Smith     PCMPISolveCounts[i] = 0;
67f1f2ae84SBarry Smith     PCMPIKSPCounts[i]   = 0;
685316cbedSBarry Smith     PCMPIIterations[i]  = 0;
695316cbedSBarry Smith     PCMPISizes[i]       = 0;
70f1f2ae84SBarry Smith   }
71f1f2ae84SBarry Smith   PCMPICommSet = PETSC_TRUE;
723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
73f1f2ae84SBarry Smith }
74f1f2ae84SBarry Smith 
759f0612e4SBarry Smith static PetscErrorCode PCMPICommsDestroy(void)
76d71ae5a4SJacob Faibussowitsch {
77f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
78f1f2ae84SBarry Smith   PetscMPIInt size, rank, i;
79f1f2ae84SBarry Smith 
80f1f2ae84SBarry Smith   PetscFunctionBegin;
813ba16761SJacob Faibussowitsch   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
82f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
83f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
84f1f2ae84SBarry Smith   for (i = 0; i < size; i++) {
85f1f2ae84SBarry Smith     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
86f1f2ae84SBarry Smith   }
87f1f2ae84SBarry Smith   PCMPICommSet = PETSC_FALSE;
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
89f1f2ae84SBarry Smith }
90f1f2ae84SBarry Smith 
91d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPICreate(PC pc)
92d71ae5a4SJacob Faibussowitsch {
93f1f2ae84SBarry Smith   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
94f1f2ae84SBarry Smith   MPI_Comm    comm = PC_MPI_COMM_WORLD;
95f1f2ae84SBarry Smith   KSP         ksp;
96f1f2ae84SBarry Smith   PetscInt    N[2], mincntperrank = 0;
97f1f2ae84SBarry Smith   PetscMPIInt size;
98f1f2ae84SBarry Smith   Mat         sA;
993821be0aSBarry Smith   char       *cprefix = NULL;
100f1f2ae84SBarry Smith   PetscMPIInt len     = 0;
101f1f2ae84SBarry Smith 
102f1f2ae84SBarry Smith   PetscFunctionBegin;
1039f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
104f1f2ae84SBarry Smith   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
105f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
106f1f2ae84SBarry Smith   if (pc) {
107f1f2ae84SBarry Smith     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
108f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
109f1f2ae84SBarry Smith     PetscCall(MatGetSize(sA, &N[0], &N[1]));
110f1f2ae84SBarry Smith   }
111f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
112f1f2ae84SBarry Smith 
113f1f2ae84SBarry Smith   /* choose a suitable sized MPI_Comm for the problem to be solved on */
114f1f2ae84SBarry Smith   if (km) mincntperrank = km->mincntperrank;
115f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
116f1f2ae84SBarry Smith   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
117f1f2ae84SBarry Smith   if (comm == MPI_COMM_NULL) {
118f1f2ae84SBarry Smith     ksp                = NULL;
1199f0612e4SBarry Smith     PCMPIServerInSolve = PETSC_FALSE;
1203ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
121f1f2ae84SBarry Smith   }
1220316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
123f1f2ae84SBarry Smith   PetscCall(KSPCreate(comm, &ksp));
1243821be0aSBarry Smith   PetscCall(KSPSetNestLevel(ksp, 1));
1253821be0aSBarry Smith   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
1260316ec64SBarry Smith   PetscCall(PetscLogStagePop());
127f1f2ae84SBarry Smith   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
128f1f2ae84SBarry Smith   if (pc) {
129f1f2ae84SBarry Smith     size_t      slen;
1303821be0aSBarry Smith     const char *prefix = NULL;
131f9818f3cSJose E. Roman     char       *found  = NULL;
132f1f2ae84SBarry Smith 
133f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
134dad3da8eSBarry Smith     PCMPIKSPCounts[size - 1]++;
1353821be0aSBarry Smith     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
1363821be0aSBarry Smith     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1373821be0aSBarry Smith     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
1383821be0aSBarry Smith     PetscCall(PetscStrallocpy(prefix, &cprefix));
1393821be0aSBarry Smith     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
1403821be0aSBarry Smith     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
1413821be0aSBarry Smith     *found = 0;
1423821be0aSBarry Smith     PetscCall(PetscStrlen(cprefix, &slen));
143835f2295SStefano Zampini     PetscCall(PetscMPIIntCast(slen, &len));
144f1f2ae84SBarry Smith   }
145f1f2ae84SBarry Smith   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
146f1f2ae84SBarry Smith   if (len) {
1473821be0aSBarry Smith     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
1483821be0aSBarry Smith     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
1493821be0aSBarry Smith     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
150f1f2ae84SBarry Smith   }
1513821be0aSBarry Smith   PetscCall(PetscFree(cprefix));
1529f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
1533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
154f1f2ae84SBarry Smith }
155f1f2ae84SBarry Smith 
156d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISetMat(PC pc)
157d71ae5a4SJacob Faibussowitsch {
158f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
159f1f2ae84SBarry Smith   Mat                A;
1609f0612e4SBarry Smith   PetscInt           m, n, j, bs;
161f1f2ae84SBarry Smith   Mat                sA;
162f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
163f1f2ae84SBarry Smith   KSP                ksp;
164f1f2ae84SBarry Smith   PetscLayout        layout;
1659f0612e4SBarry Smith   const PetscInt    *IA = NULL, *JA = NULL, *ia, *ja;
166f1f2ae84SBarry Smith   const PetscInt    *range;
167*3fa2bd1cSBarry Smith   PetscInt          *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz;
168*3fa2bd1cSBarry Smith   PetscMPIInt        size, i;
1699f0612e4SBarry Smith   const PetscScalar *a                = NULL, *sa;
1709f0612e4SBarry Smith   PetscInt           matproperties[8] = {0}, rstart, rend;
1713821be0aSBarry Smith   char              *cprefix;
172f1f2ae84SBarry Smith 
173f1f2ae84SBarry Smith   PetscFunctionBegin;
174f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
1753ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
1769f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
17745682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
178f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
179f1f2ae84SBarry Smith   if (pc) {
18068a21331SBarry Smith     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
1813821be0aSBarry Smith     const char *prefix;
1823821be0aSBarry Smith     size_t      clen;
18368a21331SBarry Smith 
184f1f2ae84SBarry Smith     PetscCallMPI(MPI_Comm_size(comm, &size));
185dad3da8eSBarry Smith     PCMPIMatCounts[size - 1]++;
186f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
18768a21331SBarry Smith     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
188dd0d27b1SBarry Smith     PetscCall(MatGetBlockSize(sA, &bs));
18968a21331SBarry Smith     matproperties[2] = bs;
19068a21331SBarry Smith     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
19168a21331SBarry Smith     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
19268a21331SBarry Smith     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
19368a21331SBarry Smith     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
19468a21331SBarry Smith     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
19568a21331SBarry Smith     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
19668a21331SBarry Smith     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
19768a21331SBarry Smith     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
1987a99bfcaSBarry Smith     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
1993821be0aSBarry Smith     PetscCall(MatGetOptionsPrefix(sA, &prefix));
2003821be0aSBarry Smith     PetscCall(PetscStrallocpy(prefix, &cprefix));
2013821be0aSBarry Smith     PetscCall(PetscStrlen(cprefix, &clen));
2023821be0aSBarry Smith     matproperties[7] = (PetscInt)clen;
203f1f2ae84SBarry Smith   }
2043821be0aSBarry Smith   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
205f1f2ae84SBarry Smith 
20668a21331SBarry Smith   /* determine ownership ranges of matrix columns */
207f1f2ae84SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
20868a21331SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
20968a21331SBarry Smith   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
210f1f2ae84SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
211f1f2ae84SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &n));
21268a21331SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
21368a21331SBarry Smith 
21468a21331SBarry Smith   /* determine ownership ranges of matrix rows */
21568a21331SBarry Smith   PetscCall(PetscLayoutCreate(comm, &layout));
21668a21331SBarry Smith   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
21768a21331SBarry Smith   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
21868a21331SBarry Smith   PetscCall(PetscLayoutSetUp(layout));
21968a21331SBarry Smith   PetscCall(PetscLayoutGetLocalSize(layout, &m));
2209f0612e4SBarry Smith   PetscCall(PetscLayoutGetRange(layout, &rstart, &rend));
221f1f2ae84SBarry Smith 
2229f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
223f1f2ae84SBarry Smith   /* copy over the matrix nonzero structure and values */
224f1f2ae84SBarry Smith   if (pc) {
2259f0612e4SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
2269f0612e4SBarry Smith     if (!PCMPIServerUseShmget) {
227f1f2ae84SBarry Smith       NZ      = km->NZ;
228f1f2ae84SBarry Smith       NZdispl = km->NZdispl;
229f1f2ae84SBarry Smith       PetscCall(PetscLayoutGetRanges(layout, &range));
230f1f2ae84SBarry Smith       for (i = 0; i < size; i++) {
231*3fa2bd1cSBarry Smith         sendcounti[i] = 1 + range[i + 1] - range[i];
232*3fa2bd1cSBarry Smith         NZ[i]         = IA[range[i + 1]] - IA[range[i]];
233f1f2ae84SBarry Smith       }
234f1f2ae84SBarry Smith       displi[0]  = 0;
235f1f2ae84SBarry Smith       NZdispl[0] = 0;
236f1f2ae84SBarry Smith       for (j = 1; j < size; j++) {
237f1f2ae84SBarry Smith         displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
238f1f2ae84SBarry Smith         NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
239f1f2ae84SBarry Smith       }
2409f0612e4SBarry Smith     }
241f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
242f1f2ae84SBarry Smith   }
243f1f2ae84SBarry Smith   PetscCall(PetscLayoutDestroy(&layout));
244f1f2ae84SBarry Smith 
2453821be0aSBarry Smith   PetscCall(MatCreate(comm, &A));
2463821be0aSBarry Smith   if (matproperties[7] > 0) {
247835f2295SStefano Zampini     PetscMPIInt ni;
248835f2295SStefano Zampini 
249835f2295SStefano Zampini     PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni));
2503821be0aSBarry Smith     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
251835f2295SStefano Zampini     PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm));
2523821be0aSBarry Smith     PetscCall(MatSetOptionsPrefix(A, cprefix));
2533821be0aSBarry Smith     PetscCall(PetscFree(cprefix));
2543821be0aSBarry Smith   }
2553821be0aSBarry Smith   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
2563821be0aSBarry Smith   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
2573821be0aSBarry Smith   PetscCall(MatSetType(A, MATMPIAIJ));
2589f0612e4SBarry Smith 
2599f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
260*3fa2bd1cSBarry Smith     PetscCallMPI(MPI_Scatter(NZ, 1, MPIU_INT, &nz, 1, MPIU_INT, 0, comm));
2619f0612e4SBarry Smith     PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
262*3fa2bd1cSBarry Smith     PetscCallMPI(MPIU_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm));
263*3fa2bd1cSBarry Smith     PetscCallMPI(MPIU_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm));
264*3fa2bd1cSBarry Smith     PetscCallMPI(MPIU_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm));
2659f0612e4SBarry Smith   } else {
2669f0612e4SBarry Smith     const void           *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa};
2679f0612e4SBarry Smith     PCMPIServerAddresses *addresses;
2689f0612e4SBarry Smith 
2699f0612e4SBarry Smith     PetscCall(PetscNew(&addresses));
2709f0612e4SBarry Smith     addresses->n = 3;
2719f0612e4SBarry Smith     PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr));
2729f0612e4SBarry Smith     ia = rstart + (PetscInt *)addresses->addr[0];
2739f0612e4SBarry Smith     ja = ia[0] + (PetscInt *)addresses->addr[1];
2749f0612e4SBarry Smith     a  = ia[0] + (PetscScalar *)addresses->addr[2];
27549abdd8aSBarry Smith     PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy));
2769f0612e4SBarry Smith   }
2779f0612e4SBarry Smith 
2789f0612e4SBarry Smith   if (pc) {
2799f0612e4SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
2809f0612e4SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
2819f0612e4SBarry Smith   }
2829f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
2839f0612e4SBarry Smith 
2849f0612e4SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
2853821be0aSBarry Smith   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
28668a21331SBarry Smith   PetscCall(MatSetBlockSize(A, matproperties[2]));
2873821be0aSBarry Smith 
28868a21331SBarry Smith   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
28968a21331SBarry Smith   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
29068a21331SBarry Smith   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
29168a21331SBarry Smith   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
292f1f2ae84SBarry Smith 
2939f0612e4SBarry Smith   if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a));
294f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(ksp, A, A));
295f1f2ae84SBarry Smith   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
2960316ec64SBarry Smith   PetscCall(PetscLogStagePop());
2979f0612e4SBarry Smith   if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */
298f1f2ae84SBarry Smith     const PetscInt *range;
299f1f2ae84SBarry Smith 
300f1f2ae84SBarry Smith     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
301f1f2ae84SBarry Smith     for (i = 0; i < size; i++) {
302*3fa2bd1cSBarry Smith       km->sendcount[i] = range[i + 1] - range[i];
303*3fa2bd1cSBarry Smith       km->displ[i]     = range[i];
304f1f2ae84SBarry Smith     }
305f1f2ae84SBarry Smith   }
306f1f2ae84SBarry Smith   PetscCall(MatDestroy(&A));
30745682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
308f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(ksp));
3099f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
3103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
311f1f2ae84SBarry Smith }
312f1f2ae84SBarry Smith 
313d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIUpdateMatValues(PC pc)
314d71ae5a4SJacob Faibussowitsch {
315f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
316f1f2ae84SBarry Smith   KSP                ksp;
317f1f2ae84SBarry Smith   Mat                sA, A;
318f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
3199f0612e4SBarry Smith   const PetscInt    *ia, *IA;
3209f0612e4SBarry Smith   const PetscScalar *a;
321f1f2ae84SBarry Smith   PetscCount         nz;
322f1f2ae84SBarry Smith   const PetscScalar *sa = NULL;
323dad3da8eSBarry Smith   PetscMPIInt        size;
3249f0612e4SBarry Smith   PetscInt           rstart, matproperties[4] = {0, 0, 0, 0};
325f1f2ae84SBarry Smith 
326f1f2ae84SBarry Smith   PetscFunctionBegin;
327f1f2ae84SBarry Smith   if (pc) {
328f1f2ae84SBarry Smith     PetscCall(PCGetOperators(pc, &sA, &sA));
329f1f2ae84SBarry Smith     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
3309f0612e4SBarry Smith     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
331f1f2ae84SBarry Smith   }
332f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3333ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
3349f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
33545682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
336f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
337dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
338dad3da8eSBarry Smith   PCMPIMatCounts[size - 1]++;
339f1f2ae84SBarry Smith   PetscCall(KSPGetOperators(ksp, NULL, &A));
3409f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
3419f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
342*3fa2bd1cSBarry Smith     PetscInt petsc_nz;
3436497c311SBarry Smith 
344f1f2ae84SBarry Smith     PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
345*3fa2bd1cSBarry Smith     PetscCall(PetscIntCast(nz, &petsc_nz));
346f1f2ae84SBarry Smith     PetscCall(PetscMalloc1(nz, &a));
347*3fa2bd1cSBarry Smith     PetscCallMPI(MPIU_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, petsc_nz, MPIU_SCALAR, 0, comm));
3489f0612e4SBarry Smith   } else {
3499f0612e4SBarry Smith     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
3509f0612e4SBarry Smith     PCMPIServerAddresses *addresses;
3519f0612e4SBarry Smith     PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses));
3529f0612e4SBarry Smith     ia = rstart + (PetscInt *)addresses->addr[0];
3539f0612e4SBarry Smith     a  = ia[0] + (PetscScalar *)addresses->addr[2];
3549f0612e4SBarry Smith   }
3559f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
35668a21331SBarry Smith   if (pc) {
35768a21331SBarry Smith     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
35868a21331SBarry Smith 
35968a21331SBarry Smith     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
3609f0612e4SBarry Smith     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL));
36168a21331SBarry Smith 
36268a21331SBarry Smith     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
36368a21331SBarry Smith     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
36468a21331SBarry Smith     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
36568a21331SBarry Smith     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
36668a21331SBarry Smith     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
36768a21331SBarry Smith     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
36868a21331SBarry Smith     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
36968a21331SBarry Smith     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
37068a21331SBarry Smith   }
371f1f2ae84SBarry Smith   PetscCall(MatUpdateMPIAIJWithArray(A, a));
3729f0612e4SBarry Smith   if (!PCMPIServerUseShmget) PetscCall(PetscFree(a));
37368a21331SBarry Smith   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
37468a21331SBarry Smith   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
37568a21331SBarry Smith   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
37668a21331SBarry Smith   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
37768a21331SBarry Smith   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
37868a21331SBarry Smith   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
37945682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
3809f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
3813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
382f1f2ae84SBarry Smith }
383f1f2ae84SBarry Smith 
384d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
385d71ae5a4SJacob Faibussowitsch {
386f1f2ae84SBarry Smith   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
387f1f2ae84SBarry Smith   KSP                ksp;
388f1f2ae84SBarry Smith   MPI_Comm           comm = PC_MPI_COMM_WORLD;
389f1f2ae84SBarry Smith   const PetscScalar *sb   = NULL, *x;
390f1f2ae84SBarry Smith   PetscScalar       *b, *sx = NULL;
3915316cbedSBarry Smith   PetscInt           its, n;
3925316cbedSBarry Smith   PetscMPIInt        size;
3939f0612e4SBarry Smith   void              *addr[2];
394f1f2ae84SBarry Smith 
395f1f2ae84SBarry Smith   PetscFunctionBegin;
396f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
3973ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
3989f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
39945682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
400f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
401f1f2ae84SBarry Smith 
402f1f2ae84SBarry Smith   /* scatterv rhs */
403dad3da8eSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
4045316cbedSBarry Smith   if (pc) {
4055316cbedSBarry Smith     PetscInt N;
4065316cbedSBarry Smith 
407dad3da8eSBarry Smith     PCMPISolveCounts[size - 1]++;
40868a21331SBarry Smith     PetscCall(MatGetSize(pc->pmat, &N, NULL));
4095316cbedSBarry Smith     PCMPISizes[size - 1] += N;
410f1f2ae84SBarry Smith   }
411f1f2ae84SBarry Smith   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
4129f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
4139f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
414f1f2ae84SBarry Smith     PetscCall(VecGetArray(ksp->vec_rhs, &b));
4159f0612e4SBarry Smith     if (pc) PetscCall(VecGetArrayRead(B, &sb));
416*3fa2bd1cSBarry Smith     PetscCallMPI(MPIU_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
417f1f2ae84SBarry Smith     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
4189f0612e4SBarry Smith     PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
4199f0612e4SBarry Smith     // TODO: scatter initial guess if needed
4209f0612e4SBarry Smith   } else {
4219f0612e4SBarry Smith     PetscInt rstart;
4229f0612e4SBarry Smith 
4239f0612e4SBarry Smith     if (pc) PetscCall(VecGetArrayRead(B, &sb));
4249f0612e4SBarry Smith     if (pc) PetscCall(VecGetArray(X, &sx));
4259f0612e4SBarry Smith     const void *inaddr[2] = {(const void **)sb, (const void **)sx};
4269f0612e4SBarry Smith     if (pc) PetscCall(VecRestoreArray(X, &sx));
4279f0612e4SBarry Smith     if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
4289f0612e4SBarry Smith 
4299f0612e4SBarry Smith     PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr));
4309f0612e4SBarry Smith     PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL));
4319f0612e4SBarry Smith     PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0]));
4329f0612e4SBarry Smith     PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1]));
4339f0612e4SBarry Smith   }
4349f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
435f1f2ae84SBarry Smith 
43645682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4370316ec64SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
438f1f2ae84SBarry Smith   PetscCall(KSPSolve(ksp, NULL, NULL));
4390316ec64SBarry Smith   PetscCall(PetscLogStagePop());
44045682376SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL));
4415316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(ksp, &its));
4425316cbedSBarry Smith   PCMPIIterations[size - 1] += its;
4439f0612e4SBarry Smith   // TODO: send iterations up to outer KSP
4449f0612e4SBarry Smith 
4459f0612e4SBarry Smith   if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr));
446f1f2ae84SBarry Smith 
447f1f2ae84SBarry Smith   /* gather solution */
4489f0612e4SBarry Smith   PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL));
4499f0612e4SBarry Smith   if (!PCMPIServerUseShmget) {
450f1f2ae84SBarry Smith     PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
451f1f2ae84SBarry Smith     if (pc) PetscCall(VecGetArray(X, &sx));
452*3fa2bd1cSBarry Smith     PetscCallMPI(MPIU_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
453f1f2ae84SBarry Smith     if (pc) PetscCall(VecRestoreArray(X, &sx));
454f1f2ae84SBarry Smith     PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
4559f0612e4SBarry Smith   } else {
4569f0612e4SBarry Smith     PetscCall(VecResetArray(ksp->vec_rhs));
4579f0612e4SBarry Smith     PetscCall(VecResetArray(ksp->vec_sol));
4589f0612e4SBarry Smith   }
4599f0612e4SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL));
46045682376SBarry Smith   PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL));
4619f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
4623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
463f1f2ae84SBarry Smith }
464f1f2ae84SBarry Smith 
465d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCMPIDestroy(PC pc)
466d71ae5a4SJacob Faibussowitsch {
467f1f2ae84SBarry Smith   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
468f1f2ae84SBarry Smith   KSP      ksp;
469f1f2ae84SBarry Smith   MPI_Comm comm = PC_MPI_COMM_WORLD;
470f1f2ae84SBarry Smith 
471f1f2ae84SBarry Smith   PetscFunctionBegin;
472f1f2ae84SBarry Smith   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
4733ba16761SJacob Faibussowitsch   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
474c7d372c4SBarry Smith   PetscCall(PetscLogStagePush(PCMPIStage));
4759f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
476f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&ksp));
477c7d372c4SBarry Smith   PetscCall(PetscLogStagePop());
4789f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
4793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
480f1f2ae84SBarry Smith }
481f1f2ae84SBarry Smith 
4829f0612e4SBarry Smith static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request)
4839f0612e4SBarry Smith {
4849f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4859f0612e4SBarry Smith   PetscMPIInt dummy1 = 1, dummy2;
4869f0612e4SBarry Smith #endif
4879f0612e4SBarry Smith 
4889f0612e4SBarry Smith   PetscFunctionBegin;
4899f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4909f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
4919f0612e4SBarry Smith     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]);
4929f0612e4SBarry Smith   }
4939f0612e4SBarry Smith #endif
4949f0612e4SBarry Smith   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
4959f0612e4SBarry Smith   /* next line ensures the sender has already taken the lock */
4969f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
4979f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
4989f0612e4SBarry Smith     PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
4999f0612e4SBarry Smith     for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]);
5009f0612e4SBarry Smith   }
5019f0612e4SBarry Smith #endif
5029f0612e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
5039f0612e4SBarry Smith }
5043821be0aSBarry Smith 
505f1f2ae84SBarry Smith /*@C
5067a99bfcaSBarry Smith   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
507f1580f4eSBarry Smith   parallel `KSP` solves and management of parallel `KSP` objects.
508f1f2ae84SBarry Smith 
5093821be0aSBarry Smith   Logically Collective on all MPI processes except rank 0
510f1f2ae84SBarry Smith 
511f1580f4eSBarry Smith   Options Database Keys:
512f1f2ae84SBarry Smith + -mpi_linear_solver_server                   - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
5139f0612e4SBarry Smith . -mpi_linear_solver_server_view              - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
5149f0612e4SBarry Smith - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported)
515f1f2ae84SBarry Smith 
51620f4b53cSBarry Smith   Level: developer
51720f4b53cSBarry Smith 
518f1580f4eSBarry Smith   Note:
519f1f2ae84SBarry Smith   This is normally started automatically in `PetscInitialize()` when the option is provided
520f1f2ae84SBarry Smith 
5213821be0aSBarry Smith   See `PCMPI` for information on using the solver with a `KSP` object
5223821be0aSBarry Smith 
523f1f2ae84SBarry Smith   Developer Notes:
5243821be0aSBarry Smith   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
525f1580f4eSBarry Smith   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
526f1580f4eSBarry Smith   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
527f1f2ae84SBarry Smith 
528f1580f4eSBarry Smith   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
529f1f2ae84SBarry Smith 
5303821be0aSBarry Smith   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
5313821be0aSBarry Smith 
5323821be0aSBarry Smith   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
5333821be0aSBarry Smith 
534baca6076SPierre Jolivet   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
5353821be0aSBarry Smith   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
5363821be0aSBarry Smith 
5377a99bfcaSBarry Smith   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
5383821be0aSBarry Smith   all MPI processes but the user callback only runs on one MPI process per node.
5393821be0aSBarry Smith 
5403821be0aSBarry Smith   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
5413821be0aSBarry Smith   the `MPI_Comm` argument from PETSc calls.
5423821be0aSBarry Smith 
5433821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
544f1f2ae84SBarry Smith @*/
545d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerBegin(void)
546d71ae5a4SJacob Faibussowitsch {
547f1f2ae84SBarry Smith   PetscMPIInt rank;
548f1f2ae84SBarry Smith 
549f1f2ae84SBarry Smith   PetscFunctionBegin;
5509d3446b2SPierre Jolivet   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
5515e1a0e3cSBarry Smith   if (PetscDefined(USE_SINGLE_LIBRARY)) {
5525e1a0e3cSBarry Smith     PetscCall(VecInitializePackage());
5535e1a0e3cSBarry Smith     PetscCall(MatInitializePackage());
5545e1a0e3cSBarry Smith     PetscCall(DMInitializePackage());
5555e1a0e3cSBarry Smith     PetscCall(PCInitializePackage());
5565e1a0e3cSBarry Smith     PetscCall(KSPInitializePackage());
5575e1a0e3cSBarry Smith     PetscCall(SNESInitializePackage());
5585e1a0e3cSBarry Smith     PetscCall(TSInitializePackage());
5595e1a0e3cSBarry Smith     PetscCall(TaoInitializePackage());
5605e1a0e3cSBarry Smith   }
561956255efSBarry Smith   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
56245682376SBarry Smith   PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist));
5639f0612e4SBarry Smith   PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI));
5649f0612e4SBarry Smith 
5659f0612e4SBarry Smith   if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE;
5669f0612e4SBarry Smith   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL));
5675e1a0e3cSBarry Smith 
568f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
5699f0612e4SBarry Smith   if (PCMPIServerUseShmget) {
5709f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
5719f0612e4SBarry Smith     PetscMPIInt size;
5729f0612e4SBarry Smith 
5739f0612e4SBarry Smith     PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
5749f0612e4SBarry Smith     if (size > 1) {
5759f0612e4SBarry Smith       pthread_mutex_t *locks;
5769f0612e4SBarry Smith 
5779f0612e4SBarry Smith       if (rank == 0) {
5789f0612e4SBarry Smith         PCMPIServerActive = PETSC_TRUE;
5799f0612e4SBarry Smith         PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks));
5809f0612e4SBarry Smith       }
5819f0612e4SBarry Smith       PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks));
5829f0612e4SBarry Smith       if (rank == 0) {
5839f0612e4SBarry Smith         pthread_mutexattr_t attr;
5849f0612e4SBarry Smith 
5859f0612e4SBarry Smith         pthread_mutexattr_init(&attr);
5869f0612e4SBarry Smith         pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
5879f0612e4SBarry Smith 
5889f0612e4SBarry Smith         for (int i = 1; i < size; i++) {
5899f0612e4SBarry Smith           pthread_mutex_init(&PCMPIServerLocks[i], &attr);
5909f0612e4SBarry Smith           pthread_mutex_lock(&PCMPIServerLocks[i]);
5919f0612e4SBarry Smith         }
5929f0612e4SBarry Smith       }
5939f0612e4SBarry Smith       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
5949f0612e4SBarry Smith     }
5959f0612e4SBarry Smith #endif
5969f0612e4SBarry Smith   }
597f1f2ae84SBarry Smith   if (rank == 0) {
598f1f2ae84SBarry Smith     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
5993821be0aSBarry Smith     PCMPIServerActive = PETSC_TRUE;
6003ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
601f1f2ae84SBarry Smith   }
602f1f2ae84SBarry Smith 
603f1f2ae84SBarry Smith   while (PETSC_TRUE) {
604f1f2ae84SBarry Smith     PCMPICommand request = PCMPI_CREATE;
60566a7e86cSPierre Jolivet #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6069f0612e4SBarry Smith     PetscMPIInt dummy1 = 1, dummy2;
60766a7e86cSPierre Jolivet #endif
6089f0612e4SBarry Smith 
609d7c1f440SPierre Jolivet     // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters?
6109f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6119f0612e4SBarry Smith     if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]);
6129f0612e4SBarry Smith #endif
613f1f2ae84SBarry Smith     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
6149f0612e4SBarry Smith #if defined(PETSC_HAVE_PTHREAD_MUTEX)
6159f0612e4SBarry Smith     if (PCMPIServerUseShmget) {
6169f0612e4SBarry Smith       /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */
6179f0612e4SBarry Smith       PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD));
6189f0612e4SBarry Smith       pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]);
6199f0612e4SBarry Smith     }
6209f0612e4SBarry Smith #endif
621f1f2ae84SBarry Smith     switch (request) {
622d71ae5a4SJacob Faibussowitsch     case PCMPI_CREATE:
623d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPICreate(NULL));
624d71ae5a4SJacob Faibussowitsch       break;
625d71ae5a4SJacob Faibussowitsch     case PCMPI_SET_MAT:
626d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISetMat(NULL));
627d71ae5a4SJacob Faibussowitsch       break;
628d71ae5a4SJacob Faibussowitsch     case PCMPI_UPDATE_MAT_VALUES:
629d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIUpdateMatValues(NULL));
630d71ae5a4SJacob Faibussowitsch       break;
631f1f2ae84SBarry Smith     case PCMPI_VIEW:
632f1f2ae84SBarry Smith       // PetscCall(PCMPIView(NULL));
633f1f2ae84SBarry Smith       break;
634d71ae5a4SJacob Faibussowitsch     case PCMPI_SOLVE:
635d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPISolve(NULL, NULL, NULL));
636d71ae5a4SJacob Faibussowitsch       break;
637d71ae5a4SJacob Faibussowitsch     case PCMPI_DESTROY:
638d71ae5a4SJacob Faibussowitsch       PetscCall(PCMPIDestroy(NULL));
639d71ae5a4SJacob Faibussowitsch       break;
640f1f2ae84SBarry Smith     case PCMPI_EXIT:
6419f0612e4SBarry Smith       if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
642f1f2ae84SBarry Smith       PetscCall(PetscFinalize());
643f1f2ae84SBarry Smith       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
644f1f2ae84SBarry Smith       break;
645d71ae5a4SJacob Faibussowitsch     default:
646d71ae5a4SJacob Faibussowitsch       break;
647f1f2ae84SBarry Smith     }
648f1f2ae84SBarry Smith   }
6493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
650f1f2ae84SBarry Smith }
651f1f2ae84SBarry Smith 
652f1f2ae84SBarry Smith /*@C
653f1f2ae84SBarry Smith   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
654f1580f4eSBarry Smith   parallel KSP solves and management of parallel `KSP` objects.
655f1f2ae84SBarry Smith 
65620f4b53cSBarry Smith   Logically Collective on all MPI ranks except 0
65720f4b53cSBarry Smith 
65820f4b53cSBarry Smith   Level: developer
659f1f2ae84SBarry Smith 
660f1580f4eSBarry Smith   Note:
6619f0612e4SBarry Smith   This is normally called automatically in `PetscFinalize()`
662f1f2ae84SBarry Smith 
6633821be0aSBarry Smith .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
664f1f2ae84SBarry Smith @*/
665d71ae5a4SJacob Faibussowitsch PetscErrorCode PCMPIServerEnd(void)
666d71ae5a4SJacob Faibussowitsch {
667f1f2ae84SBarry Smith   PetscFunctionBegin;
668f1f2ae84SBarry Smith   if (PetscGlobalRank == 0) {
669f1f2ae84SBarry Smith     PetscViewer       viewer = NULL;
670f1f2ae84SBarry Smith     PetscViewerFormat format;
671f1f2ae84SBarry Smith 
6729f0612e4SBarry Smith     PetscCall(PetscShmgetAddressesFinalize());
6739f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT));
6749f0612e4SBarry Smith     if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks));
675f1f2ae84SBarry Smith     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
676f1f2ae84SBarry Smith     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
677f1f2ae84SBarry Smith     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
678f1f2ae84SBarry Smith     PetscOptionsEnd();
679f1f2ae84SBarry Smith     if (viewer) {
680f1f2ae84SBarry Smith       PetscBool isascii;
681f1f2ae84SBarry Smith 
682f1f2ae84SBarry Smith       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
683f1f2ae84SBarry Smith       if (isascii) {
684f1f2ae84SBarry Smith         PetscMPIInt size;
6855316cbedSBarry Smith         PetscMPIInt i;
686f1f2ae84SBarry Smith 
687f1f2ae84SBarry Smith         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
6885316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
6895316cbedSBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
6905316cbedSBarry Smith         if (PCMPIKSPCountsSeq) {
6915316cbedSBarry Smith           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
692f1f2ae84SBarry Smith         }
6935316cbedSBarry Smith         for (i = 0; i < size; i++) {
6945316cbedSBarry Smith           if (PCMPIKSPCounts[i]) {
6955316cbedSBarry Smith             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
6965316cbedSBarry Smith           }
6975316cbedSBarry Smith         }
6989f0612e4SBarry Smith         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not "));
699f1f2ae84SBarry Smith       }
700648c30bcSBarry Smith       PetscCall(PetscViewerDestroy(&viewer));
701f1f2ae84SBarry Smith     }
702f1f2ae84SBarry Smith   }
703f1f2ae84SBarry Smith   PetscCall(PCMPICommsDestroy());
7043821be0aSBarry Smith   PCMPIServerActive = PETSC_FALSE;
7053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
706f1f2ae84SBarry Smith }
707f1f2ae84SBarry Smith 
708f1f2ae84SBarry Smith /*
709f1f2ae84SBarry Smith     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
710f1f2ae84SBarry Smith     because, for example, the problem is small. This version is more efficient because it does not require copying any data
711f1f2ae84SBarry Smith */
712d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_Seq(PC pc)
713d71ae5a4SJacob Faibussowitsch {
714f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
715f1f2ae84SBarry Smith   Mat         sA;
716f1f2ae84SBarry Smith   const char *prefix;
717f9818f3cSJose E. Roman   char       *found = NULL, *cprefix;
718f1f2ae84SBarry Smith 
719f1f2ae84SBarry Smith   PetscFunctionBegin;
7209f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
721f1f2ae84SBarry Smith   PetscCall(PCGetOperators(pc, NULL, &sA));
722f1f2ae84SBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
723f1f2ae84SBarry Smith   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
7243821be0aSBarry Smith   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
7253821be0aSBarry Smith   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
7263821be0aSBarry Smith 
7273821be0aSBarry Smith   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
7283821be0aSBarry Smith   PetscCall(PCGetOptionsPrefix(pc, &prefix));
7293821be0aSBarry Smith   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
7303821be0aSBarry Smith   PetscCall(PetscStrallocpy(prefix, &cprefix));
7313821be0aSBarry Smith   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
7323821be0aSBarry Smith   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
7333821be0aSBarry Smith   *found = 0;
7343821be0aSBarry Smith   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
7353821be0aSBarry Smith   PetscCall(PetscFree(cprefix));
7363821be0aSBarry Smith 
737f1f2ae84SBarry Smith   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
738f1f2ae84SBarry Smith   PetscCall(KSPSetFromOptions(km->ksps[0]));
739f1f2ae84SBarry Smith   PetscCall(KSPSetUp(km->ksps[0]));
740835f2295SStefano Zampini   PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
741f1f2ae84SBarry Smith   PCMPIKSPCountsSeq++;
7429f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
7433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
744f1f2ae84SBarry Smith }
745f1f2ae84SBarry Smith 
746d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
747d71ae5a4SJacob Faibussowitsch {
748f1f2ae84SBarry Smith   PC_MPI  *km = (PC_MPI *)pc->data;
7495316cbedSBarry Smith   PetscInt its, n;
7505316cbedSBarry Smith   Mat      A;
751f1f2ae84SBarry Smith 
752f1f2ae84SBarry Smith   PetscFunctionBegin;
7539f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
754f1f2ae84SBarry Smith   PetscCall(KSPSolve(km->ksps[0], b, x));
7555316cbedSBarry Smith   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
756f1f2ae84SBarry Smith   PCMPISolveCountsSeq++;
7575316cbedSBarry Smith   PCMPIIterationsSeq += its;
7585316cbedSBarry Smith   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
7595316cbedSBarry Smith   PetscCall(MatGetSize(A, &n, NULL));
7605316cbedSBarry Smith   PCMPISizesSeq += n;
7619f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
7629f0612e4SBarry Smith   /*
7639f0612e4SBarry Smith     do not keep reference to previous rhs and solution since destroying them in the next KSPSolve()
7649f0612e4SBarry Smith     my use PetscFree() instead of PCMPIArrayDeallocate()
7659f0612e4SBarry Smith   */
7669f0612e4SBarry Smith   PetscCall(VecDestroy(&km->ksps[0]->vec_rhs));
7679f0612e4SBarry Smith   PetscCall(VecDestroy(&km->ksps[0]->vec_sol));
7683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
769f1f2ae84SBarry Smith }
770f1f2ae84SBarry Smith 
771d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
772d71ae5a4SJacob Faibussowitsch {
773f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
774f1f2ae84SBarry Smith 
775f1f2ae84SBarry Smith   PetscFunctionBegin;
776f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
777835f2295SStefano Zampini   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
7785316cbedSBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
7793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
780f1f2ae84SBarry Smith }
781f1f2ae84SBarry Smith 
782d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_Seq(PC pc)
783d71ae5a4SJacob Faibussowitsch {
784f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
7859f0612e4SBarry Smith   Mat     A, B;
7869f0612e4SBarry Smith   Vec     x, b;
787f1f2ae84SBarry Smith 
788f1f2ae84SBarry Smith   PetscFunctionBegin;
7899f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_TRUE;
7909f0612e4SBarry Smith   /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */
7919f0612e4SBarry Smith   PetscCall(KSPGetOperators(km->ksps[0], &A, &B));
7929f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)A));
7939f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)B));
7949f0612e4SBarry Smith   PetscCall(KSPGetSolution(km->ksps[0], &x));
7959f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)x));
7969f0612e4SBarry Smith   PetscCall(KSPGetRhs(km->ksps[0], &b));
7979f0612e4SBarry Smith   PetscCall(PetscObjectReference((PetscObject)b));
798f1f2ae84SBarry Smith   PetscCall(KSPDestroy(&km->ksps[0]));
799f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
8009f0612e4SBarry Smith   PCMPIServerInSolve = PETSC_FALSE;
8019f0612e4SBarry Smith   PetscCall(MatDestroy(&A));
8029f0612e4SBarry Smith   PetscCall(MatDestroy(&B));
8039f0612e4SBarry Smith   PetscCall(VecDestroy(&x));
8049f0612e4SBarry Smith   PetscCall(VecDestroy(&b));
8053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
806f1f2ae84SBarry Smith }
807f1f2ae84SBarry Smith 
808f1f2ae84SBarry Smith /*
809f1f2ae84SBarry Smith      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
810dd8e379bSPierre Jolivet      right-hand side to the parallel PC
811f1f2ae84SBarry Smith */
812d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_MPI(PC pc)
813d71ae5a4SJacob Faibussowitsch {
814f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
815f1f2ae84SBarry Smith   PetscMPIInt rank, size;
816f1f2ae84SBarry Smith   PetscBool   newmatrix = PETSC_FALSE;
817f1f2ae84SBarry Smith 
818f1f2ae84SBarry Smith   PetscFunctionBegin;
819f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
820f1f2ae84SBarry Smith   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
821f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
822f1f2ae84SBarry Smith 
823f1f2ae84SBarry Smith   if (!pc->setupcalled) {
824f1f2ae84SBarry Smith     if (!km->alwaysuseserver) {
825f1f2ae84SBarry Smith       PetscInt n;
826f1f2ae84SBarry Smith       Mat      sA;
827f1f2ae84SBarry Smith       /* short circuit for small systems */
828f1f2ae84SBarry Smith       PetscCall(PCGetOperators(pc, &sA, &sA));
829f1f2ae84SBarry Smith       PetscCall(MatGetSize(sA, &n, NULL));
830f1f2ae84SBarry Smith       if (n < 2 * km->mincntperrank - 1 || size == 1) {
831f1f2ae84SBarry Smith         pc->ops->setup   = NULL;
832f1f2ae84SBarry Smith         pc->ops->apply   = PCApply_Seq;
833f1f2ae84SBarry Smith         pc->ops->destroy = PCDestroy_Seq;
834f1f2ae84SBarry Smith         pc->ops->view    = PCView_Seq;
835f1f2ae84SBarry Smith         PetscCall(PCSetUp_Seq(pc));
8363ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
837f1f2ae84SBarry Smith       }
838f1f2ae84SBarry Smith     }
839f1f2ae84SBarry Smith 
8409f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE));
841f1f2ae84SBarry Smith     PetscCall(PCMPICreate(pc));
842f1f2ae84SBarry Smith     newmatrix = PETSC_TRUE;
8439371c9d4SSatish Balay   }
8449371c9d4SSatish Balay   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
845f1f2ae84SBarry Smith 
846f1f2ae84SBarry Smith   if (newmatrix) {
847835f2295SStefano Zampini     PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n"));
8489f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT));
849f1f2ae84SBarry Smith     PetscCall(PCMPISetMat(pc));
850f1f2ae84SBarry Smith   } else {
851835f2295SStefano Zampini     PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n"));
8529f0612e4SBarry Smith     PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES));
853f1f2ae84SBarry Smith     PetscCall(PCMPIUpdateMatValues(pc));
854f1f2ae84SBarry Smith   }
8553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
856f1f2ae84SBarry Smith }
857f1f2ae84SBarry Smith 
858d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
859d71ae5a4SJacob Faibussowitsch {
860f1f2ae84SBarry Smith   PetscFunctionBegin;
8619f0612e4SBarry Smith   PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE));
862f1f2ae84SBarry Smith   PetscCall(PCMPISolve(pc, b, x));
8633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
864f1f2ae84SBarry Smith }
865f1f2ae84SBarry Smith 
86666976f2fSJacob Faibussowitsch static PetscErrorCode PCDestroy_MPI(PC pc)
867d71ae5a4SJacob Faibussowitsch {
868f1f2ae84SBarry Smith   PetscFunctionBegin;
8699f0612e4SBarry Smith   PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY));
870f1f2ae84SBarry Smith   PetscCall(PCMPIDestroy(pc));
871f1f2ae84SBarry Smith   PetscCall(PetscFree(pc->data));
8723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
873f1f2ae84SBarry Smith }
874f1f2ae84SBarry Smith 
875f1f2ae84SBarry Smith /*
8769f0612e4SBarry Smith      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database
877f1f2ae84SBarry Smith */
87866976f2fSJacob Faibussowitsch static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
879d71ae5a4SJacob Faibussowitsch {
880f1f2ae84SBarry Smith   PC_MPI     *km = (PC_MPI *)pc->data;
881f1f2ae84SBarry Smith   MPI_Comm    comm;
882f1f2ae84SBarry Smith   PetscMPIInt size;
883f1f2ae84SBarry Smith 
884f1f2ae84SBarry Smith   PetscFunctionBegin;
885f1f2ae84SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
886f1f2ae84SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
887f1f2ae84SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
888835f2295SStefano Zampini   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank));
8899f0612e4SBarry Smith   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n"));
8903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
891f1f2ae84SBarry Smith }
892f1f2ae84SBarry Smith 
89366976f2fSJacob Faibussowitsch static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
894d71ae5a4SJacob Faibussowitsch {
895f1f2ae84SBarry Smith   PC_MPI *km = (PC_MPI *)pc->data;
896f1f2ae84SBarry Smith 
897f1f2ae84SBarry Smith   PetscFunctionBegin;
898f1f2ae84SBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
8993821be0aSBarry Smith   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
9003821be0aSBarry Smith   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
901f1f2ae84SBarry Smith   PetscOptionsHeadEnd();
9023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
903f1f2ae84SBarry Smith }
904f1f2ae84SBarry Smith 
905f1f2ae84SBarry Smith /*MC
906f1580f4eSBarry Smith      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
907f1f2ae84SBarry Smith 
9083821be0aSBarry Smith    Options Database Keys for the Server:
909f1f2ae84SBarry Smith +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
9109f0612e4SBarry Smith .  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
9119f0612e4SBarry Smith -  -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true
912f1f2ae84SBarry Smith 
9133821be0aSBarry Smith    Options Database Keys for a specific `KSP` object
9143821be0aSBarry Smith +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
9153821be0aSBarry Smith -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
9163821be0aSBarry Smith 
9173821be0aSBarry Smith    Level: developer
91820f4b53cSBarry Smith 
919f1f2ae84SBarry Smith    Notes:
9209f0612e4SBarry Smith    This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or
9219f0612e4SBarry Smith    `MatCreateSeqAIJWithArrays()`
9229f0612e4SBarry Smith 
92346bbbc36SPierre Jolivet    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
924f1f2ae84SBarry Smith 
925f1f2ae84SBarry Smith    It can be particularly useful for user OpenMP code or potentially user GPU code.
926f1f2ae84SBarry Smith 
927dd8e379bSPierre Jolivet    When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
9283821be0aSBarry Smith    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
929f1f2ae84SBarry Smith 
9303821be0aSBarry Smith    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
9310316ec64SBarry Smith    because they are not the actual solver objects.
9320316ec64SBarry Smith 
9330316ec64SBarry Smith    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
93468a21331SBarry Smith    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
9350316ec64SBarry Smith 
9363821be0aSBarry Smith    Developer Note:
9373821be0aSBarry Smith    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
9383821be0aSBarry Smith    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
9393821be0aSBarry Smith 
9403821be0aSBarry Smith .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
941f1f2ae84SBarry Smith M*/
942d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
943d71ae5a4SJacob Faibussowitsch {
944f1f2ae84SBarry Smith   PC_MPI *km;
945f9818f3cSJose E. Roman   char   *found = NULL;
946f1f2ae84SBarry Smith 
947f1f2ae84SBarry Smith   PetscFunctionBegin;
9483821be0aSBarry Smith   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
9493821be0aSBarry Smith   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
9503821be0aSBarry Smith 
9513821be0aSBarry Smith   /* material from PCSetType() */
9523821be0aSBarry Smith   PetscTryTypeMethod(pc, destroy);
9533821be0aSBarry Smith   pc->ops->destroy = NULL;
9543821be0aSBarry Smith   pc->data         = NULL;
9553821be0aSBarry Smith 
9563821be0aSBarry Smith   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
9573821be0aSBarry Smith   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
9583821be0aSBarry Smith   pc->modifysubmatrices  = NULL;
9593821be0aSBarry Smith   pc->modifysubmatricesP = NULL;
9603821be0aSBarry Smith   pc->setupcalled        = 0;
9613821be0aSBarry Smith 
9624dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&km));
963f1f2ae84SBarry Smith   pc->data = (void *)km;
964f1f2ae84SBarry Smith 
965f1f2ae84SBarry Smith   km->mincntperrank = 10000;
966f1f2ae84SBarry Smith 
967f1f2ae84SBarry Smith   pc->ops->setup          = PCSetUp_MPI;
968f1f2ae84SBarry Smith   pc->ops->apply          = PCApply_MPI;
969f1f2ae84SBarry Smith   pc->ops->destroy        = PCDestroy_MPI;
970f1f2ae84SBarry Smith   pc->ops->view           = PCView_MPI;
971f1f2ae84SBarry Smith   pc->ops->setfromoptions = PCSetFromOptions_MPI;
9723821be0aSBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
9733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
974f1f2ae84SBarry Smith }
9759f0612e4SBarry Smith 
9769f0612e4SBarry Smith /*@
9779f0612e4SBarry Smith   PCMPIGetKSP - Gets the `KSP` created by the `PCMPI`
9789f0612e4SBarry Smith 
9799f0612e4SBarry Smith   Not Collective
9809f0612e4SBarry Smith 
9819f0612e4SBarry Smith   Input Parameter:
9829f0612e4SBarry Smith . pc - the preconditioner context
9839f0612e4SBarry Smith 
9849f0612e4SBarry Smith   Output Parameter:
9859f0612e4SBarry Smith . innerksp - the inner `KSP`
9869f0612e4SBarry Smith 
9879f0612e4SBarry Smith   Level: advanced
9889f0612e4SBarry Smith 
9899f0612e4SBarry Smith .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE`
9909f0612e4SBarry Smith @*/
9919f0612e4SBarry Smith PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp)
9929f0612e4SBarry Smith {
9939f0612e4SBarry Smith   PC_MPI *red = (PC_MPI *)pc->data;
9949f0612e4SBarry Smith 
9959f0612e4SBarry Smith   PetscFunctionBegin;
9969f0612e4SBarry Smith   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
9979f0612e4SBarry Smith   PetscAssertPointer(innerksp, 2);
9989f0612e4SBarry Smith   *innerksp = red->ksps[0];
9999f0612e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
10009f0612e4SBarry Smith }
1001