1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petsc.h> 16 17 #define PC_MPI_MAX_RANKS 256 18 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 19 20 typedef struct { 21 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 22 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 23 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 24 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 25 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 26 } PC_MPI; 27 28 typedef enum { 29 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 30 PCMPI_CREATE, 31 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 32 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 33 PCMPI_SOLVE, 34 PCMPI_VIEW, 35 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 36 } PCMPICommand; 37 38 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 39 static PetscBool PCMPICommSet = PETSC_FALSE; 40 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 41 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS],PCMPISizes[PC_MPI_MAX_RANKS],PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 42 43 static PetscErrorCode PCMPICommsCreate(void) 44 { 45 MPI_Comm comm = PC_MPI_COMM_WORLD; 46 PetscMPIInt size, rank, i; 47 48 PetscFunctionBegin; 49 PetscCallMPI(MPI_Comm_size(comm, &size)); 50 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 51 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 52 /* comm for size 1 is useful only for debugging */ 53 for (i = 0; i < size; i++) { 54 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 55 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 56 PCMPISolveCounts[i] = 0; 57 PCMPIKSPCounts[i] = 0; 58 PCMPIIterations[i] = 0; 59 PCMPISizes[i] = 0; 60 } 61 PCMPICommSet = PETSC_TRUE; 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode PCMPICommsDestroy(void) 66 { 67 MPI_Comm comm = PC_MPI_COMM_WORLD; 68 PetscMPIInt size, rank, i; 69 70 PetscFunctionBegin; 71 if (!PCMPICommSet) PetscFunctionReturn(0); 72 PetscCallMPI(MPI_Comm_size(comm, &size)); 73 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 74 for (i = 0; i < size; i++) { 75 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 76 } 77 PCMPICommSet = PETSC_FALSE; 78 PetscFunctionReturn(0); 79 } 80 81 static PetscErrorCode PCMPICreate(PC pc) 82 { 83 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 84 MPI_Comm comm = PC_MPI_COMM_WORLD; 85 KSP ksp; 86 PetscInt N[2], mincntperrank = 0; 87 PetscMPIInt size; 88 Mat sA; 89 char *prefix; 90 PetscMPIInt len = 0; 91 92 PetscFunctionBegin; 93 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 if (pc) { 96 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 97 PetscCall(PCGetOperators(pc, &sA, &sA)); 98 PetscCall(MatGetSize(sA, &N[0], &N[1])); 99 } 100 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 101 102 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 103 if (km) mincntperrank = km->mincntperrank; 104 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 105 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 106 if (comm == MPI_COMM_NULL) { 107 ksp = NULL; 108 PetscFunctionReturn(0); 109 } 110 PetscCall(PetscLogStagePush(PCMPIStage)); 111 PetscCall(KSPCreate(comm, &ksp)); 112 PetscCall(PetscLogStagePop()); 113 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 114 if (pc) { 115 size_t slen; 116 117 PetscCallMPI(MPI_Comm_size(comm, &size)); 118 PCMPIKSPCounts[size - 1]++; 119 PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix)); 120 PetscCall(PetscStrlen(prefix, &slen)); 121 len = (PetscMPIInt)slen; 122 } 123 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 124 if (len) { 125 if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix)); 126 PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm)); 127 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 128 } 129 PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_")); 130 PetscFunctionReturn(0); 131 } 132 133 static PetscErrorCode PCMPISetMat(PC pc) 134 { 135 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 136 Mat A; 137 PetscInt N[2], n, *ia, *ja, j, bs; 138 Mat sA; 139 MPI_Comm comm = PC_MPI_COMM_WORLD; 140 KSP ksp; 141 PetscLayout layout; 142 const PetscInt *IA = NULL, *JA = NULL; 143 const PetscInt *range; 144 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 145 PetscScalar *a; 146 const PetscScalar *sa = NULL; 147 148 PetscFunctionBegin; 149 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 150 if (!ksp) PetscFunctionReturn(0); 151 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 152 if (pc) { 153 PetscCallMPI(MPI_Comm_size(comm, &size)); 154 PCMPIMatCounts[size - 1]++; 155 PetscCall(PCGetOperators(pc, &sA, &sA)); 156 PetscCall(MatGetSize(sA, &N[0], &N[1])); 157 PetscCall(MatGetBlockSize(sA, &bs)); 158 /* need to broadcast symmetry flags etc if set */ 159 } 160 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 161 PetscCallMPI(MPI_Bcast(&bs, 1, MPIU_INT, 0, comm)); 162 163 /* determine ownership ranges of matrix */ 164 PetscCall(PetscLayoutCreate(comm, &layout)); 165 PetscCall(PetscLayoutSetBlockSize(layout, bs)); 166 PetscCall(PetscLayoutSetSize(layout, N[0])); 167 PetscCall(PetscLayoutSetUp(layout)); 168 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 169 170 /* copy over the matrix nonzero structure and values */ 171 if (pc) { 172 NZ = km->NZ; 173 NZdispl = km->NZdispl; 174 PetscCall(PetscLayoutGetRanges(layout, &range)); 175 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 176 for (i = 0; i < size; i++) { 177 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 178 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 179 } 180 displi[0] = 0; 181 NZdispl[0] = 0; 182 for (j = 1; j < size; j++) { 183 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 184 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 185 } 186 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 187 } 188 PetscCall(PetscLayoutDestroy(&layout)); 189 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 190 191 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 192 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 193 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 194 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 195 196 if (pc) { 197 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 198 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 199 } 200 201 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 202 ia[0] = 0; 203 PetscCall(PetscLogStagePush(PCMPIStage)); 204 PetscCall(MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A)); 205 PetscCall(MatSetBlockSize(A, bs)); 206 PetscCall(MatSetOptionsPrefix(A, "mpi_")); 207 208 PetscCall(PetscFree3(ia, ja, a)); 209 PetscCall(KSPSetOperators(ksp, A, A)); 210 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 211 PetscCall(PetscLogStagePop()); 212 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 213 const PetscInt *range; 214 215 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 216 for (i = 0; i < size; i++) { 217 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 218 km->displ[i] = (PetscMPIInt)range[i]; 219 } 220 } 221 PetscCall(MatDestroy(&A)); 222 PetscCall(KSPSetFromOptions(ksp)); 223 PetscFunctionReturn(0); 224 } 225 226 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 227 { 228 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 229 KSP ksp; 230 Mat sA, A; 231 MPI_Comm comm = PC_MPI_COMM_WORLD; 232 PetscScalar *a; 233 PetscCount nz; 234 const PetscScalar *sa = NULL; 235 PetscMPIInt size; 236 237 PetscFunctionBegin; 238 if (pc) { 239 PetscCall(PCGetOperators(pc, &sA, &sA)); 240 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241 } 242 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 243 if (!ksp) PetscFunctionReturn(0); 244 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 245 PetscCallMPI(MPI_Comm_size(comm, &size)); 246 PCMPIMatCounts[size - 1]++; 247 PetscCall(KSPGetOperators(ksp, NULL, &A)); 248 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 249 PetscCall(PetscMalloc1(nz, &a)); 250 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 251 if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 252 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 253 PetscCall(PetscFree(a)); 254 PetscFunctionReturn(0); 255 } 256 257 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 258 { 259 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 260 KSP ksp; 261 MPI_Comm comm = PC_MPI_COMM_WORLD; 262 const PetscScalar *sb = NULL, *x; 263 PetscScalar *b, *sx = NULL; 264 PetscInt its,n; 265 PetscMPIInt size; 266 267 PetscFunctionBegin; 268 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 269 if (!ksp) PetscFunctionReturn(0); 270 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 271 272 /* TODO: optimize code to not require building counts/displ everytime */ 273 274 /* scatterv rhs */ 275 PetscCallMPI(MPI_Comm_size(comm, &size)); 276 if (pc) { 277 PetscInt N; 278 279 PCMPISolveCounts[size - 1]++; 280 PetscCall(MatGetSize(pc->pmat,&N,NULL));; 281 PCMPISizes[size - 1] += N; 282 PetscCall(VecGetArrayRead(B, &sb)); 283 } 284 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 285 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 286 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 287 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 288 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 289 290 PetscCall(PetscLogStagePush(PCMPIStage)); 291 PetscCall(KSPSolve(ksp, NULL, NULL)); 292 PetscCall(PetscLogStagePop()); 293 PetscCall(KSPGetIterationNumber(ksp,&its)); 294 PCMPIIterations[size - 1] += its; 295 296 /* gather solution */ 297 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 298 if (pc) PetscCall(VecGetArray(X, &sx)); 299 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 300 if (pc) PetscCall(VecRestoreArray(X, &sx)); 301 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 302 PetscFunctionReturn(0); 303 } 304 305 static PetscErrorCode PCMPIDestroy(PC pc) 306 { 307 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 308 KSP ksp; 309 MPI_Comm comm = PC_MPI_COMM_WORLD; 310 311 PetscFunctionBegin; 312 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 313 if (!ksp) PetscFunctionReturn(0); 314 PetscCall(KSPDestroy(&ksp)); 315 PetscFunctionReturn(0); 316 } 317 318 /*@C 319 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 320 parallel `KSP` solves and management of parallel `KSP` objects. 321 322 Logically collective on all MPI ranks except 0 323 324 Options Database Keys: 325 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 326 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 327 328 Note: 329 This is normally started automatically in `PetscInitialize()` when the option is provided 330 331 Developer Notes: 332 When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 333 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 334 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 335 336 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 337 338 Level: developer 339 340 .seealso: `PCMPIServerEnd()`, `PCMPI` 341 @*/ 342 PetscErrorCode PCMPIServerBegin(void) 343 { 344 PetscMPIInt rank; 345 346 PetscFunctionBegin; 347 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server")); 348 if (PetscDefined(USE_SINGLE_LIBRARY)) { 349 PetscCall(VecInitializePackage()); 350 PetscCall(MatInitializePackage()); 351 PetscCall(DMInitializePackage()); 352 PetscCall(PCInitializePackage()); 353 PetscCall(KSPInitializePackage()); 354 PetscCall(SNESInitializePackage()); 355 PetscCall(TSInitializePackage()); 356 PetscCall(TaoInitializePackage()); 357 } 358 359 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 360 if (rank == 0) { 361 PETSC_COMM_WORLD = PETSC_COMM_SELF; 362 PetscFunctionReturn(0); 363 } 364 365 while (PETSC_TRUE) { 366 PCMPICommand request = PCMPI_CREATE; 367 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 368 switch (request) { 369 case PCMPI_CREATE: 370 PetscCall(PCMPICreate(NULL)); 371 break; 372 case PCMPI_SET_MAT: 373 PetscCall(PCMPISetMat(NULL)); 374 break; 375 case PCMPI_UPDATE_MAT_VALUES: 376 PetscCall(PCMPIUpdateMatValues(NULL)); 377 break; 378 case PCMPI_VIEW: 379 // PetscCall(PCMPIView(NULL)); 380 break; 381 case PCMPI_SOLVE: 382 PetscCall(PCMPISolve(NULL, NULL, NULL)); 383 break; 384 case PCMPI_DESTROY: 385 PetscCall(PCMPIDestroy(NULL)); 386 break; 387 case PCMPI_EXIT: 388 PetscCall(PetscFinalize()); 389 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 390 break; 391 default: 392 break; 393 } 394 } 395 PetscFunctionReturn(0); 396 } 397 398 /*@C 399 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 400 parallel KSP solves and management of parallel `KSP` objects. 401 402 Logically collective on all MPI ranks except 0 403 404 Note: 405 This is normally ended automatically in `PetscFinalize()` when the option is provided 406 407 Level: developer 408 409 .seealso: `PCMPIServerBegin()`, `PCMPI` 410 @*/ 411 PetscErrorCode PCMPIServerEnd(void) 412 { 413 PCMPICommand request = PCMPI_EXIT; 414 415 PetscFunctionBegin; 416 if (PetscGlobalRank == 0) { 417 PetscViewer viewer = NULL; 418 PetscViewerFormat format; 419 420 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 421 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 422 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 423 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 424 PetscOptionsEnd(); 425 if (viewer) { 426 PetscBool isascii; 427 428 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 429 if (isascii) { 430 PetscMPIInt size; 431 PetscMPIInt i; 432 433 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 434 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 435 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 436 if (PCMPIKSPCountsSeq) { 437 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq,PCMPISizesSeq/PCMPISolveCountsSeq,PCMPIIterationsSeq/PCMPISolveCountsSeq)); 438 } 439 for (i = 0; i < size; i++) { 440 if (PCMPIKSPCounts[i]) { 441 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i+1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i]/PCMPISolveCounts[i],PCMPIIterations[i]/PCMPISolveCounts[i])); 442 } 443 } 444 } 445 PetscCall(PetscViewerDestroy(&viewer)); 446 } 447 } 448 PetscCall(PCMPICommsDestroy()); 449 PetscFunctionReturn(0); 450 } 451 452 /* 453 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 454 because, for example, the problem is small. This version is more efficient because it does not require copying any data 455 */ 456 static PetscErrorCode PCSetUp_Seq(PC pc) 457 { 458 PC_MPI *km = (PC_MPI *)pc->data; 459 Mat sA; 460 const char *prefix; 461 462 PetscFunctionBegin; 463 PetscCall(PCGetOperators(pc, NULL, &sA)); 464 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 465 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 466 PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix)); 467 PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_")); 468 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 469 PetscCall(KSPSetFromOptions(km->ksps[0])); 470 PetscCall(KSPSetUp(km->ksps[0])); 471 PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"); 472 PCMPIKSPCountsSeq++; 473 PetscFunctionReturn(0); 474 } 475 476 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 477 { 478 PC_MPI *km = (PC_MPI *)pc->data; 479 PetscInt its,n; 480 Mat A; 481 482 PetscFunctionBegin; 483 PetscCall(KSPSolve(km->ksps[0], b, x)); 484 PetscCall(KSPGetIterationNumber(km->ksps[0],&its)); 485 PCMPISolveCountsSeq++; 486 PCMPIIterationsSeq += its; 487 PetscCall(KSPGetOperators(km->ksps[0],NULL,&A)); 488 PetscCall(MatGetSize(A,&n,NULL)); 489 PCMPISizesSeq += n; 490 PetscFunctionReturn(0); 491 } 492 493 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 494 { 495 PC_MPI *km = (PC_MPI *)pc->data; 496 const char *prefix; 497 498 PetscFunctionBegin; 499 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 500 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 501 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 502 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 503 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 504 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 505 PetscFunctionReturn(0); 506 } 507 508 static PetscErrorCode PCDestroy_Seq(PC pc) 509 { 510 PC_MPI *km = (PC_MPI *)pc->data; 511 512 PetscFunctionBegin; 513 PetscCall(KSPDestroy(&km->ksps[0])); 514 PetscCall(PetscFree(pc->data)); 515 PetscFunctionReturn(0); 516 } 517 518 /* 519 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 520 right hand side to the parallel PC 521 */ 522 static PetscErrorCode PCSetUp_MPI(PC pc) 523 { 524 PC_MPI *km = (PC_MPI *)pc->data; 525 PetscMPIInt rank, size; 526 PCMPICommand request; 527 PetscBool newmatrix = PETSC_FALSE; 528 529 PetscFunctionBegin; 530 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 531 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 532 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 533 534 if (!pc->setupcalled) { 535 if (!km->alwaysuseserver) { 536 PetscInt n; 537 Mat sA; 538 /* short circuit for small systems */ 539 PetscCall(PCGetOperators(pc, &sA, &sA)); 540 PetscCall(MatGetSize(sA, &n, NULL)); 541 if (n < 2 * km->mincntperrank - 1 || size == 1) { 542 pc->ops->setup = NULL; 543 pc->ops->apply = PCApply_Seq; 544 pc->ops->destroy = PCDestroy_Seq; 545 pc->ops->view = PCView_Seq; 546 PetscCall(PCSetUp_Seq(pc)); 547 PetscFunctionReturn(0); 548 } 549 } 550 551 request = PCMPI_CREATE; 552 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 553 PetscCall(PCMPICreate(pc)); 554 newmatrix = PETSC_TRUE; 555 } 556 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 557 558 if (newmatrix) { 559 PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"); 560 request = PCMPI_SET_MAT; 561 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 562 PetscCall(PCMPISetMat(pc)); 563 } else { 564 PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"); 565 request = PCMPI_UPDATE_MAT_VALUES; 566 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 567 PetscCall(PCMPIUpdateMatValues(pc)); 568 } 569 PetscFunctionReturn(0); 570 } 571 572 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 573 { 574 PCMPICommand request = PCMPI_SOLVE; 575 576 PetscFunctionBegin; 577 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 578 PetscCall(PCMPISolve(pc, b, x)); 579 PetscFunctionReturn(0); 580 } 581 582 PetscErrorCode PCDestroy_MPI(PC pc) 583 { 584 PCMPICommand request = PCMPI_DESTROY; 585 586 PetscFunctionBegin; 587 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 588 PetscCall(PCMPIDestroy(pc)); 589 PetscCall(PetscFree(pc->data)); 590 PetscFunctionReturn(0); 591 } 592 593 /* 594 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 595 */ 596 PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 597 { 598 PC_MPI *km = (PC_MPI *)pc->data; 599 MPI_Comm comm; 600 PetscMPIInt size; 601 const char *prefix; 602 603 PetscFunctionBegin; 604 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 605 PetscCallMPI(MPI_Comm_size(comm, &size)); 606 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 607 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 608 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 609 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix)); 610 else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n")); 611 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 615 { 616 PC_MPI *km = (PC_MPI *)pc->data; 617 618 PetscFunctionBegin; 619 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 620 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 621 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 622 PetscOptionsHeadEnd(); 623 PetscFunctionReturn(0); 624 } 625 626 /*MC 627 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 628 629 Level: beginner 630 631 Options Database Keys: 632 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 633 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 634 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 635 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 636 637 Notes: 638 The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_ 639 640 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 641 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 642 643 It can be particularly useful for user OpenMP code or potentially user GPU code. 644 645 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi) 646 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly. 647 648 The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 649 because they are not the actual solver objects. 650 651 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 652 stages will be confusing since the event times are are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 653 654 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 655 M*/ 656 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 657 { 658 PC_MPI *km; 659 660 PetscFunctionBegin; 661 PetscCall(PetscNew(&km)); 662 pc->data = (void *)km; 663 664 km->mincntperrank = 10000; 665 666 pc->ops->setup = PCSetUp_MPI; 667 pc->ops->apply = PCApply_MPI; 668 pc->ops->destroy = PCDestroy_MPI; 669 pc->ops->view = PCView_MPI; 670 pc->ops->setfromoptions = PCSetFromOptions_MPI; 671 PetscFunctionReturn(0); 672 } 673