1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS],PCMPISizes[PC_MPI_MAX_RANKS],PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(0); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(0); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *prefix; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(0); 109 } 110 PetscCall(KSPCreate(comm, &ksp)); 111 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 112 if (pc) { 113 size_t slen; 114 115 PetscCallMPI(MPI_Comm_size(comm, &size)); 116 PCMPIKSPCounts[size - 1]++; 117 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 118 PetscCall(PetscStrlen(prefix, &slen)); 119 len = (PetscMPIInt)slen; 120 } 121 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 122 if (len) { 123 if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix)); 124 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 125 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 126 } 127 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 128 PetscFunctionReturn(0); 129 } 130 131 static PetscErrorCode PCMPISetMat(PC pc) 132 { 133 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 134 Mat A; 135 PetscInt N[2], n, *ia, *ja, j, bs; 136 Mat sA; 137 MPI_Comm comm = PC_MPI_COMM_WORLD; 138 KSP ksp; 139 PetscLayout layout; 140 const PetscInt *IA = NULL, *JA = NULL; 141 const PetscInt *range; 142 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 143 PetscScalar *a; 144 const PetscScalar *sa = NULL; 145 146 PetscFunctionBegin; 147 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 148 if (!ksp) PetscFunctionReturn(0); 149 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 150 if (pc) { 151 PetscCallMPI(MPI_Comm_size(comm, &size)); 152 PCMPIMatCounts[size - 1]++; 153 PetscCall(PCGetOperators(pc, &sA, &sA)); 154 PetscCall(MatGetSize(sA, &N[0], &N[1])); 155 PetscCall(MatGetBlockSize(sA, &bs)); 156 /* need to broadcast symmetry flags etc if set */ 157 } 158 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 159 PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm)); 160 161 /* determine ownership ranges of matrix */ 162 PetscCall(PetscLayoutCreate(comm, &layout)); 163 PetscCall(PetscLayoutSetBlockSize(layout, bs)); 164 PetscCall(PetscLayoutSetSize(layout, N[0])); 165 PetscCall(PetscLayoutSetUp(layout)); 166 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 167 168 /* copy over the matrix nonzero structure and values */ 169 if (pc) { 170 NZ = km->NZ; 171 NZdispl = km->NZdispl; 172 PetscCall(PetscLayoutGetRanges(layout, &range)); 173 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 174 for (i = 0; i < size; i++) { 175 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 176 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 177 } 178 displi[0] = 0; 179 NZdispl[0] = 0; 180 for (j = 1; j < size; j++) { 181 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 182 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 183 } 184 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 185 } 186 PetscCall(PetscLayoutDestroy(&layout)); 187 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 188 189 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 190 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 191 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 192 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 193 194 if (pc) { 195 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 196 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 197 } 198 199 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 200 ia[0] = 0; 201 PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A)); 202 PetscCall(MatSetBlockSize(A, bs)); 203 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 204 205 PetscCall(PetscFree3(ia, ja, a)); 206 PetscCall(KSPSetOperators(ksp, A, A)); 207 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 208 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 209 const PetscInt *range; 210 211 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 212 for (i = 0; i < size; i++) { 213 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 214 km->displ[i] = (PetscMPIInt)range[i]; 215 } 216 } 217 PetscCall(MatDestroy(&A)); 218 PetscCall(KSPSetFromOptions(ksp)); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 223 { 224 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 225 KSP ksp; 226 Mat sA, A; 227 MPI_Comm comm = PC_MPI_COMM_WORLD; 228 PetscScalar *a; 229 PetscCount nz; 230 const PetscScalar *sa = NULL; 231 PetscMPIInt size; 232 233 PetscFunctionBegin; 234 if (pc) { 235 PetscCall(PCGetOperators(pc, &sA, &sA)); 236 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 237 } 238 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 239 if (!ksp) PetscFunctionReturn(0); 240 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 241 PetscCallMPI(MPI_Comm_size(comm, &size)); 242 PCMPIMatCounts[size - 1]++; 243 PetscCall(KSPGetOperators(ksp, NULL, &A)); 244 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 245 PetscCall(PetscMalloc1(nz, &a)); 246 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 247 if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 248 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 249 PetscCall(PetscFree(a)); 250 PetscFunctionReturn(0); 251 } 252 253 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 254 { 255 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 256 KSP ksp; 257 MPI_Comm comm = PC_MPI_COMM_WORLD; 258 const PetscScalar *sb = NULL, *x; 259 PetscScalar *b, *sx = NULL; 260 PetscInt its,n; 261 PetscMPIInt size; 262 263 PetscFunctionBegin; 264 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 265 if (!ksp) PetscFunctionReturn(0); 266 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 267 268 /* TODO: optimize code to not require building counts/displ everytime */ 269 270 /* scatterv rhs */ 271 PetscCallMPI(MPI_Comm_size(comm, &size)); 272 if (pc) { 273 PetscInt N; 274 275 PCMPISolveCounts[size - 1]++; 276 PetscCall(MatGetSize(pc->pmat,&N,NULL));; 277 PCMPISizes[size - 1] += N; 278 PetscCall(VecGetArrayRead(B, &sb)); 279 } 280 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 281 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 282 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 283 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 284 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 285 286 PetscCall(KSPSolve(ksp, NULL, NULL)); 287 PetscCall(KSPGetIterationNumber(ksp,&its)); 288 PCMPIIterations[size - 1] += its; 289 290 /* gather solution */ 291 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 292 if (pc) PetscCall(VecGetArray(X, &sx)); 293 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 294 if (pc) PetscCall(VecRestoreArray(X, &sx)); 295 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 296 PetscFunctionReturn(0); 297 } 298 299 static PetscErrorCode PCMPIDestroy(PC pc) 300 { 301 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 302 KSP ksp; 303 MPI_Comm comm = PC_MPI_COMM_WORLD; 304 305 PetscFunctionBegin; 306 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 307 if (!ksp) PetscFunctionReturn(0); 308 PetscCall(KSPDestroy(&ksp)); 309 PetscFunctionReturn(0); 310 } 311 312 /*@C 313 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 314 parallel `KSP` solves and management of parallel `KSP` objects. 315 316 Logically collective on all MPI ranks except 0 317 318 Options Database Keys: 319 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 320 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 321 322 Note: 323 This is normally started automatically in `PetscInitialize()` when the option is provided 324 325 Developer Notes: 326 When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 327 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 328 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 329 330 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 331 332 Level: developer 333 334 .seealso: `PCMPIServerEnd()`, `PCMPI` 335 @*/ 336 PetscErrorCode PCMPIServerBegin(void) 337 { 338 PetscMPIInt rank; 339 340 PetscFunctionBegin; 341 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 342 if (PetscDefined(USE_SINGLE_LIBRARY)) { 343 PetscCall(VecInitializePackage()); 344 PetscCall(MatInitializePackage()); 345 PetscCall(DMInitializePackage()); 346 PetscCall(PCInitializePackage()); 347 PetscCall(KSPInitializePackage()); 348 PetscCall(SNESInitializePackage()); 349 PetscCall(TSInitializePackage()); 350 PetscCall(TaoInitializePackage()); 351 } 352 353 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 354 if (rank == 0) { 355 PETSC_COMM_WORLD = PETSC_COMM_SELF; 356 PetscFunctionReturn(0); 357 } 358 359 while (PETSC_TRUE) { 360 PCMPICommand request = PCMPI_CREATE; 361 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 362 switch (request) { 363 case PCMPI_CREATE: 364 PetscCall(PCMPICreate(NULL)); 365 break; 366 case PCMPI_SET_MAT: 367 PetscCall(PCMPISetMat(NULL)); 368 break; 369 case PCMPI_UPDATE_MAT_VALUES: 370 PetscCall(PCMPIUpdateMatValues(NULL)); 371 break; 372 case PCMPI_VIEW: 373 // PetscCall(PCMPIView(NULL)); 374 break; 375 case PCMPI_SOLVE: 376 PetscCall(PCMPISolve(NULL, NULL, NULL)); 377 break; 378 case PCMPI_DESTROY: 379 PetscCall(PCMPIDestroy(NULL)); 380 break; 381 case PCMPI_EXIT: 382 PetscCall(PetscFinalize()); 383 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 384 break; 385 default: 386 break; 387 } 388 } 389 PetscFunctionReturn(0); 390 } 391 392 /*@C 393 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 394 parallel KSP solves and management of parallel `KSP` objects. 395 396 Logically collective on all MPI ranks except 0 397 398 Note: 399 This is normally ended automatically in `PetscFinalize()` when the option is provided 400 401 Level: developer 402 403 .seealso: `PCMPIServerBegin()`, `PCMPI` 404 @*/ 405 PetscErrorCode PCMPIServerEnd(void) 406 { 407 PCMPICommand request = PCMPI_EXIT; 408 409 PetscFunctionBegin; 410 if (PetscGlobalRank == 0) { 411 PetscViewer viewer = NULL; 412 PetscViewerFormat format; 413 414 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 415 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 416 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 417 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 418 PetscOptionsEnd(); 419 if (viewer) { 420 PetscBool isascii; 421 422 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 423 if (isascii) { 424 PetscMPIInt size; 425 PetscMPIInt i; 426 427 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 428 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 429 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 430 if (PCMPIKSPCountsSeq) { 431 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq,PCMPISizesSeq/PCMPISolveCountsSeq,PCMPIIterationsSeq/PCMPISolveCountsSeq)); 432 } 433 for (i = 0; i < size; i++) { 434 if (PCMPIKSPCounts[i]) { 435 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i+1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i]/PCMPISolveCounts[i],PCMPIIterations[i]/PCMPISolveCounts[i])); 436 } 437 } 438 } 439 PetscCall(PetscViewerDestroy(&viewer)); 440 } 441 } 442 PetscCall(PCMPICommsDestroy()); 443 PetscFunctionReturn(0); 444 } 445 446 /* 447 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 448 because, for example, the problem is small. This version is more efficient because it does not require copying any data 449 */ 450 static PetscErrorCode PCSetUp_Seq(PC pc) 451 { 452 PC_MPI *km = (PC_MPI *)pc->data; 453 Mat sA; 454 const char *prefix; 455 456 PetscFunctionBegin; 457 PetscCall(PCGetOperators(pc, NULL, &sA)); 458 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 459 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 460 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 461 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 462 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 463 PetscCall(KSPSetFromOptions(km->ksps[0])); 464 PetscCall(KSPSetUp(km->ksps[0])); 465 PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"); 466 PCMPIKSPCountsSeq++; 467 PetscFunctionReturn(0); 468 } 469 470 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 471 { 472 PC_MPI *km = (PC_MPI *)pc->data; 473 PetscInt its,n; 474 Mat A; 475 476 PetscFunctionBegin; 477 PetscCall(KSPSolve(km->ksps[0], b, x)); 478 PetscCall(KSPGetIterationNumber(km->ksps[0],&its)); 479 PCMPISolveCountsSeq++; 480 PCMPIIterationsSeq += its; 481 PetscCall(KSPGetOperators(km->ksps[0],NULL,&A)); 482 PetscCall(MatGetSize(A,&n,NULL)); 483 PCMPISizesSeq += n; 484 PetscFunctionReturn(0); 485 } 486 487 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 488 { 489 PC_MPI *km = (PC_MPI *)pc->data; 490 const char *prefix; 491 492 PetscFunctionBegin; 493 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 494 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 495 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 496 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 497 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 498 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 499 PetscFunctionReturn(0); 500 } 501 502 static PetscErrorCode PCDestroy_Seq(PC pc) 503 { 504 PC_MPI *km = (PC_MPI *)pc->data; 505 506 PetscFunctionBegin; 507 PetscCall(KSPDestroy(&km->ksps[0])); 508 PetscCall(PetscFree(pc->data)); 509 PetscFunctionReturn(0); 510 } 511 512 /* 513 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 514 right hand side to the parallel PC 515 */ 516 static PetscErrorCode PCSetUp_MPI(PC pc) 517 { 518 PC_MPI *km = (PC_MPI *)pc->data; 519 PetscMPIInt rank, size; 520 PCMPICommand request; 521 PetscBool newmatrix = PETSC_FALSE; 522 523 PetscFunctionBegin; 524 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 525 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 526 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 527 528 if (!pc->setupcalled) { 529 if (!km->alwaysuseserver) { 530 PetscInt n; 531 Mat sA; 532 /* short circuit for small systems */ 533 PetscCall(PCGetOperators(pc, &sA, &sA)); 534 PetscCall(MatGetSize(sA, &n, NULL)); 535 if (n < 2 * km->mincntperrank - 1 || size == 1) { 536 pc->ops->setup = NULL; 537 pc->ops->apply = PCApply_Seq; 538 pc->ops->destroy = PCDestroy_Seq; 539 pc->ops->view = PCView_Seq; 540 PetscCall(PCSetUp_Seq(pc)); 541 PetscFunctionReturn(0); 542 } 543 } 544 545 request = PCMPI_CREATE; 546 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 547 PetscCall(PCMPICreate(pc)); 548 newmatrix = PETSC_TRUE; 549 } 550 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 551 552 if (newmatrix) { 553 PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"); 554 request = PCMPI_SET_MAT; 555 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 556 PetscCall(PCMPISetMat(pc)); 557 } else { 558 PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"); 559 request = PCMPI_UPDATE_MAT_VALUES; 560 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 561 PetscCall(PCMPIUpdateMatValues(pc)); 562 } 563 PetscFunctionReturn(0); 564 } 565 566 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 567 { 568 PCMPICommand request = PCMPI_SOLVE; 569 570 PetscFunctionBegin; 571 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 572 PetscCall(PCMPISolve(pc, b, x)); 573 PetscFunctionReturn(0); 574 } 575 576 PetscErrorCode PCDestroy_MPI(PC pc) 577 { 578 PCMPICommand request = PCMPI_DESTROY; 579 580 PetscFunctionBegin; 581 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 582 PetscCall(PCMPIDestroy(pc)); 583 PetscCall(PetscFree(pc->data)); 584 PetscFunctionReturn(0); 585 } 586 587 /* 588 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 589 */ 590 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 591 { 592 PC_MPI *km = (PC_MPI *)pc->data; 593 MPI_Comm comm; 594 PetscMPIInt size; 595 const char *prefix; 596 597 PetscFunctionBegin; 598 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 599 PetscCallMPI(MPI_Comm_size(comm, &size)); 600 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 601 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 602 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 603 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 604 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 605 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 609 { 610 PC_MPI *km = (PC_MPI *)pc->data; 611 612 PetscFunctionBegin; 613 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 614 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 615 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 616 PetscOptionsHeadEnd(); 617 PetscFunctionReturn(0); 618 } 619 620 /*MC 621 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 622 623 Level: beginner 624 625 Options Database Keys: 626 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 627 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 628 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 629 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 630 631 Notes: 632 The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_ 633 634 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 635 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 636 637 It can be particularly useful for user OpenMP code or potentially user GPU code. 638 639 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi) 640 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly. 641 642 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 643 M*/ 644 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 645 { 646 PC_MPI *km; 647 648 PetscFunctionBegin; 649 PetscCall(PetscNew(&km)); 650 pc->data = (void *)km; 651 652 km->mincntperrank = 10000; 653 654 pc->ops->setup = PCSetUp_MPI; 655 pc->ops->apply = PCApply_MPI; 656 pc->ops->destroy = PCDestroy_MPI; 657 pc->ops->view = PCView_MPI; 658 pc->ops->setfromoptions = PCSetFromOptions_MPI; 659 PetscFunctionReturn(0); 660 } 661