1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 42 static PetscErrorCode PCMPICommsCreate(void) 43 { 44 MPI_Comm comm = PC_MPI_COMM_WORLD; 45 PetscMPIInt size, rank, i; 46 47 PetscFunctionBegin; 48 PetscCallMPI(MPI_Comm_size(comm, &size)); 49 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 50 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 51 /* comm for size 1 is useful only for debugging */ 52 for (i = 0; i < size; i++) { 53 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 54 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 55 PCMPISolveCounts[i] = 0; 56 PCMPIKSPCounts[i] = 0; 57 } 58 PCMPICommSet = PETSC_TRUE; 59 PetscFunctionReturn(0); 60 } 61 62 PetscErrorCode PCMPICommsDestroy(void) 63 { 64 MPI_Comm comm = PC_MPI_COMM_WORLD; 65 PetscMPIInt size, rank, i; 66 67 PetscFunctionBegin; 68 if (!PCMPICommSet) PetscFunctionReturn(0); 69 PetscCallMPI(MPI_Comm_size(comm, &size)); 70 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 71 for (i = 0; i < size; i++) { 72 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 73 } 74 PCMPICommSet = PETSC_FALSE; 75 PetscFunctionReturn(0); 76 } 77 78 static PetscErrorCode PCMPICreate(PC pc) 79 { 80 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 81 MPI_Comm comm = PC_MPI_COMM_WORLD; 82 KSP ksp; 83 PetscInt N[2], mincntperrank = 0; 84 PetscMPIInt size; 85 Mat sA; 86 char *prefix; 87 PetscMPIInt len = 0; 88 89 PetscFunctionBegin; 90 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 91 PetscCallMPI(MPI_Comm_size(comm, &size)); 92 if (pc) { 93 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 94 PetscCall(PCGetOperators(pc, &sA, &sA)); 95 PetscCall(MatGetSize(sA, &N[0], &N[1])); 96 } 97 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 98 99 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 100 if (km) mincntperrank = km->mincntperrank; 101 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 102 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 103 if (comm == MPI_COMM_NULL) { 104 ksp = NULL; 105 PetscFunctionReturn(0); 106 } 107 PetscCall(KSPCreate(comm, &ksp)); 108 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 109 if (pc) { 110 size_t slen; 111 112 PetscCallMPI(MPI_Comm_size(comm, &size)); 113 PCMPIKSPCounts[size - 1]++; 114 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 115 PetscCall(PetscStrlen(prefix, &slen)); 116 len = (PetscMPIInt)slen; 117 } 118 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 119 if (len) { 120 if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix)); 121 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 122 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 123 } 124 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 125 PetscFunctionReturn(0); 126 } 127 128 static PetscErrorCode PCMPISetMat(PC pc) 129 { 130 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 131 Mat A; 132 PetscInt N[2], n, *ia, *ja, j, bs; 133 Mat sA; 134 MPI_Comm comm = PC_MPI_COMM_WORLD; 135 KSP ksp; 136 PetscLayout layout; 137 const PetscInt *IA = NULL, *JA = NULL; 138 const PetscInt *range; 139 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 140 PetscScalar *a; 141 const PetscScalar *sa = NULL; 142 143 PetscFunctionBegin; 144 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 145 if (!ksp) PetscFunctionReturn(0); 146 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 147 if (pc) { 148 PetscCallMPI(MPI_Comm_size(comm, &size)); 149 PCMPIMatCounts[size - 1]++; 150 PetscCall(PCGetOperators(pc, &sA, &sA)); 151 PetscCall(MatGetSize(sA, &N[0], &N[1])); 152 PetscCall(MatGetBlockSize(sA, &bs)); 153 /* need to broadcast symmetry flags etc if set */ 154 } 155 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 156 PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm)); 157 158 /* determine ownership ranges of matrix */ 159 PetscCall(PetscLayoutCreate(comm, &layout)); 160 PetscCall(PetscLayoutSetBlockSize(layout, bs)); 161 PetscCall(PetscLayoutSetSize(layout, N[0])); 162 PetscCall(PetscLayoutSetUp(layout)); 163 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 164 165 /* copy over the matrix nonzero structure and values */ 166 if (pc) { 167 NZ = km->NZ; 168 NZdispl = km->NZdispl; 169 PetscCall(PetscLayoutGetRanges(layout, &range)); 170 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 171 for (i = 0; i < size; i++) { 172 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 173 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 174 } 175 displi[0] = 0; 176 NZdispl[0] = 0; 177 for (j = 1; j < size; j++) { 178 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 179 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 180 } 181 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 182 } 183 PetscCall(PetscLayoutDestroy(&layout)); 184 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 185 186 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 187 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 188 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 189 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 190 191 if (pc) { 192 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 193 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 194 } 195 196 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 197 ia[0] = 0; 198 PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A)); 199 PetscCall(MatSetBlockSize(A, bs)); 200 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 201 202 PetscCall(PetscFree3(ia, ja, a)); 203 PetscCall(KSPSetOperators(ksp, A, A)); 204 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 205 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 206 const PetscInt *range; 207 208 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 209 for (i = 0; i < size; i++) { 210 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 211 km->displ[i] = (PetscMPIInt)range[i]; 212 } 213 } 214 PetscCall(MatDestroy(&A)); 215 PetscCall(KSPSetFromOptions(ksp)); 216 PetscFunctionReturn(0); 217 } 218 219 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 220 { 221 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 222 KSP ksp; 223 Mat sA, A; 224 MPI_Comm comm = PC_MPI_COMM_WORLD; 225 PetscScalar *a; 226 PetscCount nz; 227 const PetscScalar *sa = NULL; 228 PetscMPIInt size; 229 230 PetscFunctionBegin; 231 if (pc) { 232 PetscCall(PCGetOperators(pc, &sA, &sA)); 233 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 234 } 235 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 236 if (!ksp) PetscFunctionReturn(0); 237 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 238 PetscCallMPI(MPI_Comm_size(comm, &size)); 239 PCMPIMatCounts[size - 1]++; 240 PetscCall(KSPGetOperators(ksp, NULL, &A)); 241 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 242 PetscCall(PetscMalloc1(nz, &a)); 243 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 244 if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 245 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 246 PetscCall(PetscFree(a)); 247 PetscFunctionReturn(0); 248 } 249 250 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 251 { 252 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 253 KSP ksp; 254 MPI_Comm comm = PC_MPI_COMM_WORLD; 255 const PetscScalar *sb = NULL, *x; 256 PetscScalar *b, *sx = NULL; 257 PetscInt n; 258 259 PetscFunctionBegin; 260 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 261 if (!ksp) PetscFunctionReturn(0); 262 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 263 264 /* TODO: optimize code to not require building counts/displ everytime */ 265 266 /* scatterv rhs */ 267 if (pc) { 268 PetscMPIInt size; 269 270 PetscCallMPI(MPI_Comm_size(comm, &size)); 271 PCMPISolveCounts[size - 1]++; 272 PetscCall(VecGetArrayRead(B, &sb)); 273 } 274 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 275 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 276 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 277 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 278 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 279 280 PetscCall(KSPSolve(ksp, NULL, NULL)); 281 282 /* gather solution */ 283 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 284 if (pc) PetscCall(VecGetArray(X, &sx)); 285 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 286 if (pc) PetscCall(VecRestoreArray(X, &sx)); 287 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 288 PetscFunctionReturn(0); 289 } 290 291 static PetscErrorCode PCMPIDestroy(PC pc) 292 { 293 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 294 KSP ksp; 295 MPI_Comm comm = PC_MPI_COMM_WORLD; 296 297 PetscFunctionBegin; 298 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 299 if (!ksp) PetscFunctionReturn(0); 300 PetscCall(KSPDestroy(&ksp)); 301 PetscFunctionReturn(0); 302 } 303 304 /*@C 305 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 306 parallel `KSP` solves and management of parallel `KSP` objects. 307 308 Logically collective on all MPI ranks except 0 309 310 Options Database Keys: 311 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 312 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 313 314 Note: 315 This is normally started automatically in `PetscInitialize()` when the option is provided 316 317 Developer Notes: 318 When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 319 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 320 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 321 322 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 323 324 Level: developer 325 326 .seealso: `PCMPIServerEnd()`, `PCMPI` 327 @*/ 328 PetscErrorCode PCMPIServerBegin(void) 329 { 330 PetscMPIInt rank; 331 332 PetscFunctionBegin; 333 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 334 if (PetscDefined(USE_SINGLE_LIBRARY)) { 335 PetscCall(VecInitializePackage()); 336 PetscCall(MatInitializePackage()); 337 PetscCall(DMInitializePackage()); 338 PetscCall(PCInitializePackage()); 339 PetscCall(KSPInitializePackage()); 340 PetscCall(SNESInitializePackage()); 341 PetscCall(TSInitializePackage()); 342 PetscCall(TaoInitializePackage()); 343 } 344 345 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 346 if (rank == 0) { 347 PETSC_COMM_WORLD = PETSC_COMM_SELF; 348 PetscFunctionReturn(0); 349 } 350 351 while (PETSC_TRUE) { 352 PCMPICommand request = PCMPI_CREATE; 353 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 354 switch (request) { 355 case PCMPI_CREATE: 356 PetscCall(PCMPICreate(NULL)); 357 break; 358 case PCMPI_SET_MAT: 359 PetscCall(PCMPISetMat(NULL)); 360 break; 361 case PCMPI_UPDATE_MAT_VALUES: 362 PetscCall(PCMPIUpdateMatValues(NULL)); 363 break; 364 case PCMPI_VIEW: 365 // PetscCall(PCMPIView(NULL)); 366 break; 367 case PCMPI_SOLVE: 368 PetscCall(PCMPISolve(NULL, NULL, NULL)); 369 break; 370 case PCMPI_DESTROY: 371 PetscCall(PCMPIDestroy(NULL)); 372 break; 373 case PCMPI_EXIT: 374 PetscCall(PetscFinalize()); 375 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 376 break; 377 default: 378 break; 379 } 380 } 381 PetscFunctionReturn(0); 382 } 383 384 /*@C 385 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 386 parallel KSP solves and management of parallel `KSP` objects. 387 388 Logically collective on all MPI ranks except 0 389 390 Note: 391 This is normally ended automatically in `PetscFinalize()` when the option is provided 392 393 Level: developer 394 395 .seealso: `PCMPIServerBegin()`, `PCMPI` 396 @*/ 397 PetscErrorCode PCMPIServerEnd(void) 398 { 399 PCMPICommand request = PCMPI_EXIT; 400 401 PetscFunctionBegin; 402 if (PetscGlobalRank == 0) { 403 PetscViewer viewer = NULL; 404 PetscViewerFormat format; 405 406 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 407 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 408 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 409 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 410 PetscOptionsEnd(); 411 if (viewer) { 412 PetscBool isascii; 413 414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 415 if (isascii) { 416 PetscMPIInt size; 417 PetscInt i, ksp = 0, mat = 0, solve = 0; 418 419 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 420 for (i = 0; i < size; i++) { 421 ksp += PCMPIKSPCounts[i]; 422 mat += PCMPIMatCounts[i]; 423 solve += PCMPISolveCounts[i]; 424 } 425 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n")); 426 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel KSPs %" PetscInt_FMT " Mats %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve)); 427 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential KSPs %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq)); 428 } 429 PetscCall(PetscViewerDestroy(&viewer)); 430 } 431 } 432 PetscCall(PCMPICommsDestroy()); 433 PetscFunctionReturn(0); 434 } 435 436 /* 437 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 438 because, for example, the problem is small. This version is more efficient because it does not require copying any data 439 */ 440 static PetscErrorCode PCSetUp_Seq(PC pc) 441 { 442 PC_MPI *km = (PC_MPI *)pc->data; 443 Mat sA; 444 const char *prefix; 445 446 PetscFunctionBegin; 447 PetscCall(PCGetOperators(pc, NULL, &sA)); 448 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 449 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 450 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 451 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 452 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 453 PetscCall(KSPSetFromOptions(km->ksps[0])); 454 PetscCall(KSPSetUp(km->ksps[0])); 455 PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"); 456 PCMPIKSPCountsSeq++; 457 PetscFunctionReturn(0); 458 } 459 460 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 461 { 462 PC_MPI *km = (PC_MPI *)pc->data; 463 464 PetscFunctionBegin; 465 PetscCall(KSPSolve(km->ksps[0], b, x)); 466 PCMPISolveCountsSeq++; 467 PetscFunctionReturn(0); 468 } 469 470 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 471 { 472 PC_MPI *km = (PC_MPI *)pc->data; 473 const char *prefix; 474 475 PetscFunctionBegin; 476 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 477 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 478 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 479 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix)); 480 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 481 PetscFunctionReturn(0); 482 } 483 484 static PetscErrorCode PCDestroy_Seq(PC pc) 485 { 486 PC_MPI *km = (PC_MPI *)pc->data; 487 488 PetscFunctionBegin; 489 PetscCall(KSPDestroy(&km->ksps[0])); 490 PetscCall(PetscFree(pc->data)); 491 PetscFunctionReturn(0); 492 } 493 494 /* 495 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 496 right hand side to the parallel PC 497 */ 498 static PetscErrorCode PCSetUp_MPI(PC pc) 499 { 500 PC_MPI *km = (PC_MPI *)pc->data; 501 PetscMPIInt rank, size; 502 PCMPICommand request; 503 PetscBool newmatrix = PETSC_FALSE; 504 505 PetscFunctionBegin; 506 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 507 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 508 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 509 510 if (!pc->setupcalled) { 511 if (!km->alwaysuseserver) { 512 PetscInt n; 513 Mat sA; 514 /* short circuit for small systems */ 515 PetscCall(PCGetOperators(pc, &sA, &sA)); 516 PetscCall(MatGetSize(sA, &n, NULL)); 517 if (n < 2 * km->mincntperrank - 1 || size == 1) { 518 pc->ops->setup = NULL; 519 pc->ops->apply = PCApply_Seq; 520 pc->ops->destroy = PCDestroy_Seq; 521 pc->ops->view = PCView_Seq; 522 PetscCall(PCSetUp_Seq(pc)); 523 PetscFunctionReturn(0); 524 } 525 } 526 527 request = PCMPI_CREATE; 528 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 529 PetscCall(PCMPICreate(pc)); 530 newmatrix = PETSC_TRUE; 531 } 532 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 533 534 if (newmatrix) { 535 PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"); 536 request = PCMPI_SET_MAT; 537 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 538 PetscCall(PCMPISetMat(pc)); 539 } else { 540 PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"); 541 request = PCMPI_UPDATE_MAT_VALUES; 542 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 543 PetscCall(PCMPIUpdateMatValues(pc)); 544 } 545 PetscFunctionReturn(0); 546 } 547 548 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 549 { 550 PCMPICommand request = PCMPI_SOLVE; 551 552 PetscFunctionBegin; 553 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 554 PetscCall(PCMPISolve(pc, b, x)); 555 PetscFunctionReturn(0); 556 } 557 558 PetscErrorCode PCDestroy_MPI(PC pc) 559 { 560 PCMPICommand request = PCMPI_DESTROY; 561 562 PetscFunctionBegin; 563 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 564 PetscCall(PCMPIDestroy(pc)); 565 PetscCall(PetscFree(pc->data)); 566 PetscFunctionReturn(0); 567 } 568 569 /* 570 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 571 */ 572 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 573 { 574 PC_MPI *km = (PC_MPI *)pc->data; 575 MPI_Comm comm; 576 PetscMPIInt size; 577 const char *prefix; 578 579 PetscFunctionBegin; 580 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 581 PetscCallMPI(MPI_Comm_size(comm, &size)); 582 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 583 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 584 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 585 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix)); 586 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 587 PetscFunctionReturn(0); 588 } 589 590 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 591 { 592 PC_MPI *km = (PC_MPI *)pc->data; 593 594 PetscFunctionBegin; 595 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 596 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 597 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 598 PetscOptionsHeadEnd(); 599 PetscFunctionReturn(0); 600 } 601 602 /*MC 603 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 604 605 Level: beginner 606 607 Options Database Keys: 608 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 609 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 610 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 611 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 612 613 Notes: 614 The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_ 615 616 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 617 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 618 619 It can be particularly useful for user OpenMP code or potentially user GPU code. 620 621 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi) 622 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly. 623 624 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 625 M*/ 626 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 627 { 628 PC_MPI *km; 629 630 PetscFunctionBegin; 631 PetscCall(PetscNew(&km)); 632 pc->data = (void *)km; 633 634 km->mincntperrank = 10000; 635 636 pc->ops->setup = PCSetUp_MPI; 637 pc->ops->apply = PCApply_MPI; 638 pc->ops->destroy = PCDestroy_MPI; 639 pc->ops->view = PCView_MPI; 640 pc->ops->setfromoptions = PCSetFromOptions_MPI; 641 PetscFunctionReturn(0); 642 } 643