1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 len = (PetscMPIInt)slen; 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 168 const PetscScalar *a = NULL, *sa; 169 PetscInt matproperties[8] = {0}, rstart, rend; 170 char *cprefix; 171 172 PetscFunctionBegin; 173 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 174 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 175 PCMPIServerInSolve = PETSC_TRUE; 176 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 177 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 178 if (pc) { 179 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 180 const char *prefix; 181 size_t clen; 182 183 PetscCallMPI(MPI_Comm_size(comm, &size)); 184 PCMPIMatCounts[size - 1]++; 185 PetscCall(PCGetOperators(pc, &sA, &sA)); 186 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 187 PetscCall(MatGetBlockSize(sA, &bs)); 188 matproperties[2] = bs; 189 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 190 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 191 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 192 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 193 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 194 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 195 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 196 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 197 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 198 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 199 PetscCall(PetscStrallocpy(prefix, &cprefix)); 200 PetscCall(PetscStrlen(cprefix, &clen)); 201 matproperties[7] = (PetscInt)clen; 202 } 203 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 204 205 /* determine ownership ranges of matrix columns */ 206 PetscCall(PetscLayoutCreate(comm, &layout)); 207 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 208 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 209 PetscCall(PetscLayoutSetUp(layout)); 210 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 211 PetscCall(PetscLayoutDestroy(&layout)); 212 213 /* determine ownership ranges of matrix rows */ 214 PetscCall(PetscLayoutCreate(comm, &layout)); 215 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 216 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 217 PetscCall(PetscLayoutSetUp(layout)); 218 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 219 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 220 221 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 222 /* copy over the matrix nonzero structure and values */ 223 if (pc) { 224 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 225 if (!PCMPIServerUseShmget) { 226 NZ = km->NZ; 227 NZdispl = km->NZdispl; 228 PetscCall(PetscLayoutGetRanges(layout, &range)); 229 for (i = 0; i < size; i++) { 230 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 231 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 232 } 233 displi[0] = 0; 234 NZdispl[0] = 0; 235 for (j = 1; j < size; j++) { 236 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 237 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 238 } 239 } 240 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241 } 242 PetscCall(PetscLayoutDestroy(&layout)); 243 244 PetscCall(MatCreate(comm, &A)); 245 if (matproperties[7] > 0) { 246 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 247 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 248 PetscCall(MatSetOptionsPrefix(A, cprefix)); 249 PetscCall(PetscFree(cprefix)); 250 } 251 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 252 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 253 PetscCall(MatSetType(A, MATMPIAIJ)); 254 255 if (!PCMPIServerUseShmget) { 256 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 257 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 258 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, n + 1, MPIU_INT, 0, comm)); 259 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 260 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 261 } else { 262 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 263 PCMPIServerAddresses *addresses; 264 265 PetscCall(PetscNew(&addresses)); 266 addresses->n = 3; 267 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 268 ia = rstart + (PetscInt *)addresses->addr[0]; 269 ja = ia[0] + (PetscInt *)addresses->addr[1]; 270 a = ia[0] + (PetscScalar *)addresses->addr[2]; 271 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, (PetscErrorCode(*)(void *))PCMPIServerAddressesDestroy)); 272 } 273 274 if (pc) { 275 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 276 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 277 } 278 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 279 280 PetscCall(PetscLogStagePush(PCMPIStage)); 281 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 282 PetscCall(MatSetBlockSize(A, matproperties[2])); 283 284 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 285 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 286 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 287 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 288 289 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 290 PetscCall(KSPSetOperators(ksp, A, A)); 291 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 292 PetscCall(PetscLogStagePop()); 293 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 294 const PetscInt *range; 295 296 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 297 for (i = 0; i < size; i++) { 298 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 299 km->displ[i] = (PetscMPIInt)range[i]; 300 } 301 } 302 PetscCall(MatDestroy(&A)); 303 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 304 PetscCall(KSPSetFromOptions(ksp)); 305 PCMPIServerInSolve = PETSC_FALSE; 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 310 { 311 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 312 KSP ksp; 313 Mat sA, A; 314 MPI_Comm comm = PC_MPI_COMM_WORLD; 315 const PetscInt *ia, *IA; 316 const PetscScalar *a; 317 PetscCount nz; 318 const PetscScalar *sa = NULL; 319 PetscMPIInt size; 320 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 321 322 PetscFunctionBegin; 323 if (pc) { 324 PetscCall(PCGetOperators(pc, &sA, &sA)); 325 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 326 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 327 } 328 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 329 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 330 PCMPIServerInSolve = PETSC_TRUE; 331 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 332 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 333 PetscCallMPI(MPI_Comm_size(comm, &size)); 334 PCMPIMatCounts[size - 1]++; 335 PetscCall(KSPGetOperators(ksp, NULL, &A)); 336 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 337 if (!PCMPIServerUseShmget) { 338 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 339 PetscCall(PetscMalloc1(nz, &a)); 340 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 341 } else { 342 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 343 PCMPIServerAddresses *addresses; 344 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 345 ia = rstart + (PetscInt *)addresses->addr[0]; 346 a = ia[0] + (PetscScalar *)addresses->addr[2]; 347 } 348 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 349 if (pc) { 350 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 351 352 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 353 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 354 355 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 356 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 357 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 358 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 359 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 360 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 361 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 362 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 363 } 364 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 365 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 366 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 367 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 368 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 369 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 370 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 371 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 372 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 373 PCMPIServerInSolve = PETSC_FALSE; 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 378 { 379 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 380 KSP ksp; 381 MPI_Comm comm = PC_MPI_COMM_WORLD; 382 const PetscScalar *sb = NULL, *x; 383 PetscScalar *b, *sx = NULL; 384 PetscInt its, n; 385 PetscMPIInt size; 386 void *addr[2]; 387 388 PetscFunctionBegin; 389 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 390 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 391 PCMPIServerInSolve = PETSC_TRUE; 392 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 393 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 394 395 /* scatterv rhs */ 396 PetscCallMPI(MPI_Comm_size(comm, &size)); 397 if (pc) { 398 PetscInt N; 399 400 PCMPISolveCounts[size - 1]++; 401 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 402 PCMPISizes[size - 1] += N; 403 } 404 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 405 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 406 if (!PCMPIServerUseShmget) { 407 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 408 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 409 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 410 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 411 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 412 // TODO: scatter initial guess if needed 413 } else { 414 PetscInt rstart; 415 416 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 417 if (pc) PetscCall(VecGetArray(X, &sx)); 418 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 419 if (pc) PetscCall(VecRestoreArray(X, &sx)); 420 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 421 422 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 423 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 424 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 425 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 426 } 427 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 428 429 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 430 PetscCall(PetscLogStagePush(PCMPIStage)); 431 PetscCall(KSPSolve(ksp, NULL, NULL)); 432 PetscCall(PetscLogStagePop()); 433 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 434 PetscCall(KSPGetIterationNumber(ksp, &its)); 435 PCMPIIterations[size - 1] += its; 436 // TODO: send iterations up to outer KSP 437 438 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 439 440 /* gather solution */ 441 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 442 if (!PCMPIServerUseShmget) { 443 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 444 if (pc) PetscCall(VecGetArray(X, &sx)); 445 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 446 if (pc) PetscCall(VecRestoreArray(X, &sx)); 447 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 448 } else { 449 PetscCall(VecResetArray(ksp->vec_rhs)); 450 PetscCall(VecResetArray(ksp->vec_sol)); 451 } 452 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 453 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 454 PCMPIServerInSolve = PETSC_FALSE; 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 static PetscErrorCode PCMPIDestroy(PC pc) 459 { 460 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 461 KSP ksp; 462 MPI_Comm comm = PC_MPI_COMM_WORLD; 463 464 PetscFunctionBegin; 465 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 466 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 467 PetscCall(PetscLogStagePush(PCMPIStage)); 468 PCMPIServerInSolve = PETSC_TRUE; 469 PetscCall(KSPDestroy(&ksp)); 470 PetscCall(PetscLogStagePop()); 471 PCMPIServerInSolve = PETSC_FALSE; 472 PetscFunctionReturn(PETSC_SUCCESS); 473 } 474 475 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 476 { 477 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 478 PetscMPIInt dummy1 = 1, dummy2; 479 #endif 480 481 PetscFunctionBegin; 482 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 483 if (PCMPIServerUseShmget) { 484 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 485 } 486 #endif 487 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 488 /* next line ensures the sender has already taken the lock */ 489 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 490 if (PCMPIServerUseShmget) { 491 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 492 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 493 } 494 #endif 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 /*@C 499 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 500 parallel `KSP` solves and management of parallel `KSP` objects. 501 502 Logically Collective on all MPI processes except rank 0 503 504 Options Database Keys: 505 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 506 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 507 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 508 509 Level: developer 510 511 Note: 512 This is normally started automatically in `PetscInitialize()` when the option is provided 513 514 See `PCMPI` for information on using the solver with a `KSP` object 515 516 Developer Notes: 517 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 518 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 519 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 520 521 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 522 523 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 524 525 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 526 527 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 528 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 529 530 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 531 all MPI processes but the user callback only runs on one MPI process per node. 532 533 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 534 the `MPI_Comm` argument from PETSc calls. 535 536 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 537 @*/ 538 PetscErrorCode PCMPIServerBegin(void) 539 { 540 PetscMPIInt rank; 541 542 PetscFunctionBegin; 543 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 544 if (PetscDefined(USE_SINGLE_LIBRARY)) { 545 PetscCall(VecInitializePackage()); 546 PetscCall(MatInitializePackage()); 547 PetscCall(DMInitializePackage()); 548 PetscCall(PCInitializePackage()); 549 PetscCall(KSPInitializePackage()); 550 PetscCall(SNESInitializePackage()); 551 PetscCall(TSInitializePackage()); 552 PetscCall(TaoInitializePackage()); 553 } 554 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 555 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 556 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 557 558 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 559 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 560 561 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 562 if (PCMPIServerUseShmget) { 563 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 564 PetscMPIInt size; 565 566 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 567 if (size > 1) { 568 pthread_mutex_t *locks; 569 570 if (rank == 0) { 571 PCMPIServerActive = PETSC_TRUE; 572 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 573 } 574 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 575 if (rank == 0) { 576 pthread_mutexattr_t attr; 577 578 pthread_mutexattr_init(&attr); 579 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 580 581 for (int i = 1; i < size; i++) { 582 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 583 pthread_mutex_lock(&PCMPIServerLocks[i]); 584 } 585 } 586 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 587 } 588 #endif 589 } 590 if (rank == 0) { 591 PETSC_COMM_WORLD = PETSC_COMM_SELF; 592 PCMPIServerActive = PETSC_TRUE; 593 PetscFunctionReturn(PETSC_SUCCESS); 594 } 595 596 while (PETSC_TRUE) { 597 PCMPICommand request = PCMPI_CREATE; 598 PetscMPIInt dummy1 = 1, dummy2; 599 600 // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters? 601 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 602 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 603 #endif 604 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 605 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 606 if (PCMPIServerUseShmget) { 607 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 608 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 609 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 610 } 611 #endif 612 switch (request) { 613 case PCMPI_CREATE: 614 PetscCall(PCMPICreate(NULL)); 615 break; 616 case PCMPI_SET_MAT: 617 PetscCall(PCMPISetMat(NULL)); 618 break; 619 case PCMPI_UPDATE_MAT_VALUES: 620 PetscCall(PCMPIUpdateMatValues(NULL)); 621 break; 622 case PCMPI_VIEW: 623 // PetscCall(PCMPIView(NULL)); 624 break; 625 case PCMPI_SOLVE: 626 PetscCall(PCMPISolve(NULL, NULL, NULL)); 627 break; 628 case PCMPI_DESTROY: 629 PetscCall(PCMPIDestroy(NULL)); 630 break; 631 case PCMPI_EXIT: 632 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 633 PetscCall(PetscFinalize()); 634 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 635 break; 636 default: 637 break; 638 } 639 } 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 /*@C 644 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 645 parallel KSP solves and management of parallel `KSP` objects. 646 647 Logically Collective on all MPI ranks except 0 648 649 Level: developer 650 651 Note: 652 This is normally called automatically in `PetscFinalize()` 653 654 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 655 @*/ 656 PetscErrorCode PCMPIServerEnd(void) 657 { 658 PetscFunctionBegin; 659 if (PetscGlobalRank == 0) { 660 PetscViewer viewer = NULL; 661 PetscViewerFormat format; 662 663 PetscCall(PetscShmgetAddressesFinalize()); 664 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 665 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 666 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 667 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 668 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 669 PetscOptionsEnd(); 670 if (viewer) { 671 PetscBool isascii; 672 673 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 674 if (isascii) { 675 PetscMPIInt size; 676 PetscMPIInt i; 677 678 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 679 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 680 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 681 if (PCMPIKSPCountsSeq) { 682 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 683 } 684 for (i = 0; i < size; i++) { 685 if (PCMPIKSPCounts[i]) { 686 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 687 } 688 } 689 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 690 } 691 PetscCall(PetscViewerDestroy(&viewer)); 692 } 693 } 694 PetscCall(PCMPICommsDestroy()); 695 PCMPIServerActive = PETSC_FALSE; 696 PetscFunctionReturn(PETSC_SUCCESS); 697 } 698 699 /* 700 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 701 because, for example, the problem is small. This version is more efficient because it does not require copying any data 702 */ 703 static PetscErrorCode PCSetUp_Seq(PC pc) 704 { 705 PC_MPI *km = (PC_MPI *)pc->data; 706 Mat sA; 707 const char *prefix; 708 char *found = NULL, *cprefix; 709 710 PetscFunctionBegin; 711 PCMPIServerInSolve = PETSC_TRUE; 712 PetscCall(PCGetOperators(pc, NULL, &sA)); 713 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 714 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 715 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 716 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 717 718 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 719 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 720 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 721 PetscCall(PetscStrallocpy(prefix, &cprefix)); 722 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 723 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 724 *found = 0; 725 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 726 PetscCall(PetscFree(cprefix)); 727 728 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 729 PetscCall(KSPSetFromOptions(km->ksps[0])); 730 PetscCall(KSPSetUp(km->ksps[0])); 731 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 732 PCMPIKSPCountsSeq++; 733 PCMPIServerInSolve = PETSC_FALSE; 734 PetscFunctionReturn(PETSC_SUCCESS); 735 } 736 737 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 738 { 739 PC_MPI *km = (PC_MPI *)pc->data; 740 PetscInt its, n; 741 Mat A; 742 743 PetscFunctionBegin; 744 PCMPIServerInSolve = PETSC_TRUE; 745 PetscCall(KSPSolve(km->ksps[0], b, x)); 746 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 747 PCMPISolveCountsSeq++; 748 PCMPIIterationsSeq += its; 749 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 750 PetscCall(MatGetSize(A, &n, NULL)); 751 PCMPISizesSeq += n; 752 PCMPIServerInSolve = PETSC_FALSE; 753 /* 754 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 755 my use PetscFree() instead of PCMPIArrayDeallocate() 756 */ 757 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 758 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 763 { 764 PC_MPI *km = (PC_MPI *)pc->data; 765 766 PetscFunctionBegin; 767 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 768 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 769 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode PCDestroy_Seq(PC pc) 774 { 775 PC_MPI *km = (PC_MPI *)pc->data; 776 Mat A, B; 777 Vec x, b; 778 779 PetscFunctionBegin; 780 PCMPIServerInSolve = PETSC_TRUE; 781 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 782 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 783 PetscCall(PetscObjectReference((PetscObject)A)); 784 PetscCall(PetscObjectReference((PetscObject)B)); 785 PetscCall(KSPGetSolution(km->ksps[0], &x)); 786 PetscCall(PetscObjectReference((PetscObject)x)); 787 PetscCall(KSPGetRhs(km->ksps[0], &b)); 788 PetscCall(PetscObjectReference((PetscObject)b)); 789 PetscCall(KSPDestroy(&km->ksps[0])); 790 PetscCall(PetscFree(pc->data)); 791 PCMPIServerInSolve = PETSC_FALSE; 792 PetscCall(MatDestroy(&A)); 793 PetscCall(MatDestroy(&B)); 794 PetscCall(VecDestroy(&x)); 795 PetscCall(VecDestroy(&b)); 796 PetscFunctionReturn(PETSC_SUCCESS); 797 } 798 799 /* 800 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 801 right-hand side to the parallel PC 802 */ 803 static PetscErrorCode PCSetUp_MPI(PC pc) 804 { 805 PC_MPI *km = (PC_MPI *)pc->data; 806 PetscMPIInt rank, size; 807 PetscBool newmatrix = PETSC_FALSE; 808 809 PetscFunctionBegin; 810 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 811 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 812 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 813 814 if (!pc->setupcalled) { 815 if (!km->alwaysuseserver) { 816 PetscInt n; 817 Mat sA; 818 /* short circuit for small systems */ 819 PetscCall(PCGetOperators(pc, &sA, &sA)); 820 PetscCall(MatGetSize(sA, &n, NULL)); 821 if (n < 2 * km->mincntperrank - 1 || size == 1) { 822 pc->ops->setup = NULL; 823 pc->ops->apply = PCApply_Seq; 824 pc->ops->destroy = PCDestroy_Seq; 825 pc->ops->view = PCView_Seq; 826 PetscCall(PCSetUp_Seq(pc)); 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 } 830 831 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 832 PetscCall(PCMPICreate(pc)); 833 newmatrix = PETSC_TRUE; 834 } 835 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 836 837 if (newmatrix) { 838 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 839 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 840 PetscCall(PCMPISetMat(pc)); 841 } else { 842 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 843 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 844 PetscCall(PCMPIUpdateMatValues(pc)); 845 } 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 850 { 851 PetscFunctionBegin; 852 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 853 PetscCall(PCMPISolve(pc, b, x)); 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 static PetscErrorCode PCDestroy_MPI(PC pc) 858 { 859 PetscFunctionBegin; 860 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 861 PetscCall(PCMPIDestroy(pc)); 862 PetscCall(PetscFree(pc->data)); 863 PetscFunctionReturn(PETSC_SUCCESS); 864 } 865 866 /* 867 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 868 */ 869 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 870 { 871 PC_MPI *km = (PC_MPI *)pc->data; 872 MPI_Comm comm; 873 PetscMPIInt size; 874 875 PetscFunctionBegin; 876 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 877 PetscCallMPI(MPI_Comm_size(comm, &size)); 878 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 879 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %d\n", (int)km->mincntperrank)); 880 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 881 PetscFunctionReturn(PETSC_SUCCESS); 882 } 883 884 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 885 { 886 PC_MPI *km = (PC_MPI *)pc->data; 887 888 PetscFunctionBegin; 889 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 890 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 891 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 892 PetscOptionsHeadEnd(); 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 /*MC 897 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 898 899 Options Database Keys for the Server: 900 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 901 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 902 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 903 904 Options Database Keys for a specific `KSP` object 905 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 906 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 907 908 Level: developer 909 910 Notes: 911 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 912 `MatCreateSeqAIJWithArrays()` 913 914 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 915 916 It can be particularly useful for user OpenMP code or potentially user GPU code. 917 918 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 919 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 920 921 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 922 because they are not the actual solver objects. 923 924 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 925 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 926 927 Developer Note: 928 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 929 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 930 931 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 932 M*/ 933 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 934 { 935 PC_MPI *km; 936 char *found = NULL; 937 938 PetscFunctionBegin; 939 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 940 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 941 942 /* material from PCSetType() */ 943 PetscTryTypeMethod(pc, destroy); 944 pc->ops->destroy = NULL; 945 pc->data = NULL; 946 947 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 948 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 949 pc->modifysubmatrices = NULL; 950 pc->modifysubmatricesP = NULL; 951 pc->setupcalled = 0; 952 953 PetscCall(PetscNew(&km)); 954 pc->data = (void *)km; 955 956 km->mincntperrank = 10000; 957 958 pc->ops->setup = PCSetUp_MPI; 959 pc->ops->apply = PCApply_MPI; 960 pc->ops->destroy = PCDestroy_MPI; 961 pc->ops->view = PCView_MPI; 962 pc->ops->setfromoptions = PCSetFromOptions_MPI; 963 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 /*@ 968 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 969 970 Not Collective 971 972 Input Parameter: 973 . pc - the preconditioner context 974 975 Output Parameter: 976 . innerksp - the inner `KSP` 977 978 Level: advanced 979 980 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 981 @*/ 982 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 983 { 984 PC_MPI *red = (PC_MPI *)pc->data; 985 986 PetscFunctionBegin; 987 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 988 PetscAssertPointer(innerksp, 2); 989 *innerksp = red->ksps[0]; 990 PetscFunctionReturn(PETSC_SUCCESS); 991 } 992