1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *cprefix = NULL; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 PetscCall(PetscLogStagePush(PCMPIStage)); 111 PetscCall(KSPCreate(comm, &ksp)); 112 PetscCall(KSPSetNestLevel(ksp, 1)); 113 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 114 PetscCall(PetscLogStagePop()); 115 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 116 if (pc) { 117 size_t slen; 118 const char *prefix = NULL; 119 char *found = NULL; 120 121 PetscCallMPI(MPI_Comm_size(comm, &size)); 122 PCMPIKSPCounts[size - 1]++; 123 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 124 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 125 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 126 PetscCall(PetscStrallocpy(prefix, &cprefix)); 127 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 128 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 129 *found = 0; 130 PetscCall(PetscStrlen(cprefix, &slen)); 131 len = (PetscMPIInt)slen; 132 } 133 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 134 if (len) { 135 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 136 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 137 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 138 } 139 PetscCall(PetscFree(cprefix)); 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 static PetscErrorCode PCMPISetMat(PC pc) 144 { 145 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 146 Mat A; 147 PetscInt m, n, *ia, *ja, j, bs; 148 Mat sA; 149 MPI_Comm comm = PC_MPI_COMM_WORLD; 150 KSP ksp; 151 PetscLayout layout; 152 const PetscInt *IA = NULL, *JA = NULL; 153 const PetscInt *range; 154 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 155 PetscScalar *a; 156 const PetscScalar *sa = NULL; 157 PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 158 char *cprefix; 159 160 PetscFunctionBegin; 161 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 162 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 163 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 164 if (pc) { 165 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 166 const char *prefix; 167 size_t clen; 168 169 PetscCallMPI(MPI_Comm_size(comm, &size)); 170 PCMPIMatCounts[size - 1]++; 171 PetscCall(PCGetOperators(pc, &sA, &sA)); 172 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 173 PetscCall(MatGetBlockSize(sA, &bs)); 174 matproperties[2] = bs; 175 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 176 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 177 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 178 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 179 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 180 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 181 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 182 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 183 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 184 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 185 PetscCall(PetscStrallocpy(prefix, &cprefix)); 186 PetscCall(PetscStrlen(cprefix, &clen)); 187 matproperties[7] = (PetscInt)clen; 188 } 189 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 190 191 /* determine ownership ranges of matrix columns */ 192 PetscCall(PetscLayoutCreate(comm, &layout)); 193 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 194 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 195 PetscCall(PetscLayoutSetUp(layout)); 196 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 197 PetscCall(PetscLayoutDestroy(&layout)); 198 199 /* determine ownership ranges of matrix rows */ 200 PetscCall(PetscLayoutCreate(comm, &layout)); 201 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 202 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 203 PetscCall(PetscLayoutSetUp(layout)); 204 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 205 206 /* copy over the matrix nonzero structure and values */ 207 if (pc) { 208 NZ = km->NZ; 209 NZdispl = km->NZdispl; 210 PetscCall(PetscLayoutGetRanges(layout, &range)); 211 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 212 for (i = 0; i < size; i++) { 213 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 214 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 215 } 216 displi[0] = 0; 217 NZdispl[0] = 0; 218 for (j = 1; j < size; j++) { 219 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 220 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 221 } 222 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 223 } 224 PetscCall(PetscLayoutDestroy(&layout)); 225 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 226 227 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 228 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 229 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 230 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 231 232 if (pc) { 233 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 234 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 235 } 236 237 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 238 ia[0] = 0; 239 PetscCall(PetscLogStagePush(PCMPIStage)); 240 PetscCall(MatCreate(comm, &A)); 241 if (matproperties[7] > 0) { 242 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 243 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 244 PetscCall(MatSetOptionsPrefix(A, cprefix)); 245 PetscCall(PetscFree(cprefix)); 246 } 247 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 248 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 249 PetscCall(MatSetType(A, MATMPIAIJ)); 250 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 251 PetscCall(MatSetBlockSize(A, matproperties[2])); 252 253 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 254 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 255 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 256 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 257 258 PetscCall(PetscFree3(ia, ja, a)); 259 PetscCall(KSPSetOperators(ksp, A, A)); 260 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 261 PetscCall(PetscLogStagePop()); 262 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 263 const PetscInt *range; 264 265 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 266 for (i = 0; i < size; i++) { 267 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 268 km->displ[i] = (PetscMPIInt)range[i]; 269 } 270 } 271 PetscCall(MatDestroy(&A)); 272 PetscCall(KSPSetFromOptions(ksp)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 277 { 278 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 279 KSP ksp; 280 Mat sA, A; 281 MPI_Comm comm = PC_MPI_COMM_WORLD; 282 PetscScalar *a; 283 PetscCount nz; 284 const PetscScalar *sa = NULL; 285 PetscMPIInt size; 286 PetscInt matproperties[4] = {0, 0, 0, 0}; 287 288 PetscFunctionBegin; 289 if (pc) { 290 PetscCall(PCGetOperators(pc, &sA, &sA)); 291 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 292 } 293 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 294 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 295 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 296 PetscCallMPI(MPI_Comm_size(comm, &size)); 297 PCMPIMatCounts[size - 1]++; 298 PetscCall(KSPGetOperators(ksp, NULL, &A)); 299 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 300 PetscCall(PetscMalloc1(nz, &a)); 301 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 302 if (pc) { 303 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 304 305 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 306 307 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 308 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 309 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 310 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 311 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 312 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 313 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 314 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 315 } 316 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 317 PetscCall(PetscFree(a)); 318 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 319 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 320 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 321 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 322 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 323 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 328 { 329 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 330 KSP ksp; 331 MPI_Comm comm = PC_MPI_COMM_WORLD; 332 const PetscScalar *sb = NULL, *x; 333 PetscScalar *b, *sx = NULL; 334 PetscInt its, n; 335 PetscMPIInt size; 336 337 PetscFunctionBegin; 338 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 339 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 340 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 341 342 /* TODO: optimize code to not require building counts/displ every time */ 343 344 /* scatterv rhs */ 345 PetscCallMPI(MPI_Comm_size(comm, &size)); 346 if (pc) { 347 PetscInt N; 348 349 PCMPISolveCounts[size - 1]++; 350 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 351 PCMPISizes[size - 1] += N; 352 PetscCall(VecGetArrayRead(B, &sb)); 353 } 354 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 355 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 356 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 357 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 358 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 359 360 PetscCall(PetscLogStagePush(PCMPIStage)); 361 PetscCall(KSPSolve(ksp, NULL, NULL)); 362 PetscCall(PetscLogStagePop()); 363 PetscCall(KSPGetIterationNumber(ksp, &its)); 364 PCMPIIterations[size - 1] += its; 365 366 /* gather solution */ 367 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 368 if (pc) PetscCall(VecGetArray(X, &sx)); 369 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 370 if (pc) PetscCall(VecRestoreArray(X, &sx)); 371 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 372 PetscFunctionReturn(PETSC_SUCCESS); 373 } 374 375 static PetscErrorCode PCMPIDestroy(PC pc) 376 { 377 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 378 KSP ksp; 379 MPI_Comm comm = PC_MPI_COMM_WORLD; 380 381 PetscFunctionBegin; 382 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 383 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 384 PetscCall(KSPDestroy(&ksp)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 PetscBool PCMPIServerActive = PETSC_FALSE; 389 390 /*@C 391 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 392 parallel `KSP` solves and management of parallel `KSP` objects. 393 394 Logically Collective on all MPI processes except rank 0 395 396 Options Database Keys: 397 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 398 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 399 400 Level: developer 401 402 Note: 403 This is normally started automatically in `PetscInitialize()` when the option is provided 404 405 See `PCMPI` for information on using the solver with a `KSP` object 406 407 Developer Notes: 408 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 409 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 410 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 411 412 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 413 414 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 415 416 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 417 418 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 419 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 420 421 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 422 all MPI processes but the user callback only runs on one MPI process per node. 423 424 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 425 the `MPI_Comm` argument from PETSc calls. 426 427 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 428 @*/ 429 PetscErrorCode PCMPIServerBegin(void) 430 { 431 PetscMPIInt rank; 432 433 PetscFunctionBegin; 434 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 435 if (PetscDefined(USE_SINGLE_LIBRARY)) { 436 PetscCall(VecInitializePackage()); 437 PetscCall(MatInitializePackage()); 438 PetscCall(DMInitializePackage()); 439 PetscCall(PCInitializePackage()); 440 PetscCall(KSPInitializePackage()); 441 PetscCall(SNESInitializePackage()); 442 PetscCall(TSInitializePackage()); 443 PetscCall(TaoInitializePackage()); 444 } 445 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 446 447 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 448 if (rank == 0) { 449 PETSC_COMM_WORLD = PETSC_COMM_SELF; 450 PCMPIServerActive = PETSC_TRUE; 451 PetscFunctionReturn(PETSC_SUCCESS); 452 } 453 454 while (PETSC_TRUE) { 455 PCMPICommand request = PCMPI_CREATE; 456 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 457 switch (request) { 458 case PCMPI_CREATE: 459 PetscCall(PCMPICreate(NULL)); 460 break; 461 case PCMPI_SET_MAT: 462 PetscCall(PCMPISetMat(NULL)); 463 break; 464 case PCMPI_UPDATE_MAT_VALUES: 465 PetscCall(PCMPIUpdateMatValues(NULL)); 466 break; 467 case PCMPI_VIEW: 468 // PetscCall(PCMPIView(NULL)); 469 break; 470 case PCMPI_SOLVE: 471 PetscCall(PCMPISolve(NULL, NULL, NULL)); 472 break; 473 case PCMPI_DESTROY: 474 PetscCall(PCMPIDestroy(NULL)); 475 break; 476 case PCMPI_EXIT: 477 PetscCall(PetscFinalize()); 478 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 479 break; 480 default: 481 break; 482 } 483 } 484 PetscFunctionReturn(PETSC_SUCCESS); 485 } 486 487 /*@C 488 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 489 parallel KSP solves and management of parallel `KSP` objects. 490 491 Logically Collective on all MPI ranks except 0 492 493 Level: developer 494 495 Note: 496 This is normally ended automatically in `PetscFinalize()` when the option is provided 497 498 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 499 @*/ 500 PetscErrorCode PCMPIServerEnd(void) 501 { 502 PCMPICommand request = PCMPI_EXIT; 503 504 PetscFunctionBegin; 505 if (PetscGlobalRank == 0) { 506 PetscViewer viewer = NULL; 507 PetscViewerFormat format; 508 509 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 510 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 511 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 512 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 513 PetscOptionsEnd(); 514 if (viewer) { 515 PetscBool isascii; 516 517 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 518 if (isascii) { 519 PetscMPIInt size; 520 PetscMPIInt i; 521 522 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 523 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 524 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 525 if (PCMPIKSPCountsSeq) { 526 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 527 } 528 for (i = 0; i < size; i++) { 529 if (PCMPIKSPCounts[i]) { 530 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 531 } 532 } 533 } 534 PetscCall(PetscViewerDestroy(&viewer)); 535 } 536 } 537 PetscCall(PCMPICommsDestroy()); 538 PCMPIServerActive = PETSC_FALSE; 539 PetscFunctionReturn(PETSC_SUCCESS); 540 } 541 542 /* 543 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 544 because, for example, the problem is small. This version is more efficient because it does not require copying any data 545 */ 546 static PetscErrorCode PCSetUp_Seq(PC pc) 547 { 548 PC_MPI *km = (PC_MPI *)pc->data; 549 Mat sA; 550 const char *prefix; 551 char *found = NULL, *cprefix; 552 553 PetscFunctionBegin; 554 PetscCall(PCGetOperators(pc, NULL, &sA)); 555 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 556 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 557 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 558 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 559 560 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 561 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 562 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 563 PetscCall(PetscStrallocpy(prefix, &cprefix)); 564 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 565 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 566 *found = 0; 567 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 568 PetscCall(PetscFree(cprefix)); 569 570 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 571 PetscCall(KSPSetFromOptions(km->ksps[0])); 572 PetscCall(KSPSetUp(km->ksps[0])); 573 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 574 PCMPIKSPCountsSeq++; 575 PetscFunctionReturn(PETSC_SUCCESS); 576 } 577 578 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 579 { 580 PC_MPI *km = (PC_MPI *)pc->data; 581 PetscInt its, n; 582 Mat A; 583 584 PetscFunctionBegin; 585 PetscCall(KSPSolve(km->ksps[0], b, x)); 586 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 587 PCMPISolveCountsSeq++; 588 PCMPIIterationsSeq += its; 589 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 590 PetscCall(MatGetSize(A, &n, NULL)); 591 PCMPISizesSeq += n; 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 596 { 597 PC_MPI *km = (PC_MPI *)pc->data; 598 599 PetscFunctionBegin; 600 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 601 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 602 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 static PetscErrorCode PCDestroy_Seq(PC pc) 607 { 608 PC_MPI *km = (PC_MPI *)pc->data; 609 610 PetscFunctionBegin; 611 PetscCall(KSPDestroy(&km->ksps[0])); 612 PetscCall(PetscFree(pc->data)); 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 /* 617 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 618 right hand side to the parallel PC 619 */ 620 static PetscErrorCode PCSetUp_MPI(PC pc) 621 { 622 PC_MPI *km = (PC_MPI *)pc->data; 623 PetscMPIInt rank, size; 624 PCMPICommand request; 625 PetscBool newmatrix = PETSC_FALSE; 626 627 PetscFunctionBegin; 628 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 629 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 630 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 631 632 if (!pc->setupcalled) { 633 if (!km->alwaysuseserver) { 634 PetscInt n; 635 Mat sA; 636 /* short circuit for small systems */ 637 PetscCall(PCGetOperators(pc, &sA, &sA)); 638 PetscCall(MatGetSize(sA, &n, NULL)); 639 if (n < 2 * km->mincntperrank - 1 || size == 1) { 640 pc->ops->setup = NULL; 641 pc->ops->apply = PCApply_Seq; 642 pc->ops->destroy = PCDestroy_Seq; 643 pc->ops->view = PCView_Seq; 644 PetscCall(PCSetUp_Seq(pc)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 } 648 649 request = PCMPI_CREATE; 650 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 651 PetscCall(PCMPICreate(pc)); 652 newmatrix = PETSC_TRUE; 653 } 654 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 655 656 if (newmatrix) { 657 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 658 request = PCMPI_SET_MAT; 659 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 660 PetscCall(PCMPISetMat(pc)); 661 } else { 662 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 663 request = PCMPI_UPDATE_MAT_VALUES; 664 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 665 PetscCall(PCMPIUpdateMatValues(pc)); 666 } 667 PetscFunctionReturn(PETSC_SUCCESS); 668 } 669 670 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 671 { 672 PCMPICommand request = PCMPI_SOLVE; 673 674 PetscFunctionBegin; 675 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 676 PetscCall(PCMPISolve(pc, b, x)); 677 PetscFunctionReturn(PETSC_SUCCESS); 678 } 679 680 static PetscErrorCode PCDestroy_MPI(PC pc) 681 { 682 PCMPICommand request = PCMPI_DESTROY; 683 684 PetscFunctionBegin; 685 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 686 PetscCall(PCMPIDestroy(pc)); 687 PetscCall(PetscFree(pc->data)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 /* 692 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 693 */ 694 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 695 { 696 PC_MPI *km = (PC_MPI *)pc->data; 697 MPI_Comm comm; 698 PetscMPIInt size; 699 700 PetscFunctionBegin; 701 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 702 PetscCallMPI(MPI_Comm_size(comm, &size)); 703 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 704 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 705 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 706 PetscFunctionReturn(PETSC_SUCCESS); 707 } 708 709 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 710 { 711 PC_MPI *km = (PC_MPI *)pc->data; 712 713 PetscFunctionBegin; 714 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 715 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 716 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 717 PetscOptionsHeadEnd(); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } 720 721 /*MC 722 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 723 724 Options Database Keys for the Server: 725 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 726 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 727 728 Options Database Keys for a specific `KSP` object 729 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 730 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 731 732 Level: developer 733 734 Notes: 735 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 736 737 It can be particularly useful for user OpenMP code or potentially user GPU code. 738 739 When the program is running with a single MPI process then it directly uses the provided matrix and right hand side 740 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 741 742 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 743 because they are not the actual solver objects. 744 745 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 746 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 747 748 Developer Note: 749 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 750 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 751 752 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 753 M*/ 754 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 755 { 756 PC_MPI *km; 757 char *found = NULL; 758 759 PetscFunctionBegin; 760 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 761 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 762 763 /* material from PCSetType() */ 764 PetscTryTypeMethod(pc, destroy); 765 pc->ops->destroy = NULL; 766 pc->data = NULL; 767 768 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 769 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 770 pc->modifysubmatrices = NULL; 771 pc->modifysubmatricesP = NULL; 772 pc->setupcalled = 0; 773 774 PetscCall(PetscNew(&km)); 775 pc->data = (void *)km; 776 777 km->mincntperrank = 10000; 778 779 pc->ops->setup = PCSetUp_MPI; 780 pc->ops->apply = PCApply_MPI; 781 pc->ops->destroy = PCDestroy_MPI; 782 pc->ops->view = PCView_MPI; 783 pc->ops->setfromoptions = PCSetFromOptions_MPI; 784 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787