1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 18 #define PC_MPI_MAX_RANKS 256 19 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 20 21 typedef struct { 22 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 23 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 24 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 25 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 26 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 27 } PC_MPI; 28 29 typedef enum { 30 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 31 PCMPI_CREATE, 32 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 33 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 34 PCMPI_SOLVE, 35 PCMPI_VIEW, 36 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 37 } PCMPICommand; 38 39 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 40 static PetscBool PCMPICommSet = PETSC_FALSE; 41 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 42 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 43 static PetscLogEvent EventServerDist; 44 45 static PetscErrorCode PCMPICommsCreate(void) 46 { 47 MPI_Comm comm = PC_MPI_COMM_WORLD; 48 PetscMPIInt size, rank, i; 49 50 PetscFunctionBegin; 51 PetscCallMPI(MPI_Comm_size(comm, &size)); 52 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 53 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 54 /* comm for size 1 is useful only for debugging */ 55 for (i = 0; i < size; i++) { 56 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 57 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 58 PCMPISolveCounts[i] = 0; 59 PCMPIKSPCounts[i] = 0; 60 PCMPIIterations[i] = 0; 61 PCMPISizes[i] = 0; 62 } 63 PCMPICommSet = PETSC_TRUE; 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 PetscErrorCode PCMPICommsDestroy(void) 68 { 69 MPI_Comm comm = PC_MPI_COMM_WORLD; 70 PetscMPIInt size, rank, i; 71 72 PetscFunctionBegin; 73 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 74 PetscCallMPI(MPI_Comm_size(comm, &size)); 75 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 76 for (i = 0; i < size; i++) { 77 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 78 } 79 PCMPICommSet = PETSC_FALSE; 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode PCMPICreate(PC pc) 84 { 85 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 86 MPI_Comm comm = PC_MPI_COMM_WORLD; 87 KSP ksp; 88 PetscInt N[2], mincntperrank = 0; 89 PetscMPIInt size; 90 Mat sA; 91 char *cprefix = NULL; 92 PetscMPIInt len = 0; 93 94 PetscFunctionBegin; 95 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 96 PetscCallMPI(MPI_Comm_size(comm, &size)); 97 if (pc) { 98 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 99 PetscCall(PCGetOperators(pc, &sA, &sA)); 100 PetscCall(MatGetSize(sA, &N[0], &N[1])); 101 } 102 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 103 104 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 105 if (km) mincntperrank = km->mincntperrank; 106 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 107 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 108 if (comm == MPI_COMM_NULL) { 109 ksp = NULL; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(PetscLogStagePush(PCMPIStage)); 113 PetscCall(KSPCreate(comm, &ksp)); 114 PetscCall(KSPSetNestLevel(ksp, 1)); 115 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 116 PetscCall(PetscLogStagePop()); 117 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 118 if (pc) { 119 size_t slen; 120 const char *prefix = NULL; 121 char *found = NULL; 122 123 PetscCallMPI(MPI_Comm_size(comm, &size)); 124 PCMPIKSPCounts[size - 1]++; 125 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 126 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 127 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 128 PetscCall(PetscStrallocpy(prefix, &cprefix)); 129 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 130 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 131 *found = 0; 132 PetscCall(PetscStrlen(cprefix, &slen)); 133 len = (PetscMPIInt)slen; 134 } 135 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 136 if (len) { 137 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 138 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 139 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 140 } 141 PetscCall(PetscFree(cprefix)); 142 PetscFunctionReturn(PETSC_SUCCESS); 143 } 144 145 static PetscErrorCode PCMPISetMat(PC pc) 146 { 147 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 148 Mat A; 149 PetscInt m, n, *ia, *ja, j, bs; 150 Mat sA; 151 MPI_Comm comm = PC_MPI_COMM_WORLD; 152 KSP ksp; 153 PetscLayout layout; 154 const PetscInt *IA = NULL, *JA = NULL; 155 const PetscInt *range; 156 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 157 PetscScalar *a; 158 const PetscScalar *sa = NULL; 159 PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 160 char *cprefix; 161 162 PetscFunctionBegin; 163 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 164 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 166 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 167 if (pc) { 168 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 169 const char *prefix; 170 size_t clen; 171 172 PetscCallMPI(MPI_Comm_size(comm, &size)); 173 PCMPIMatCounts[size - 1]++; 174 PetscCall(PCGetOperators(pc, &sA, &sA)); 175 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 176 PetscCall(MatGetBlockSize(sA, &bs)); 177 matproperties[2] = bs; 178 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 179 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 180 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 181 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 182 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 183 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 184 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 185 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 186 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 187 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 188 PetscCall(PetscStrallocpy(prefix, &cprefix)); 189 PetscCall(PetscStrlen(cprefix, &clen)); 190 matproperties[7] = (PetscInt)clen; 191 } 192 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 193 194 /* determine ownership ranges of matrix columns */ 195 PetscCall(PetscLayoutCreate(comm, &layout)); 196 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 197 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 198 PetscCall(PetscLayoutSetUp(layout)); 199 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 202 /* determine ownership ranges of matrix rows */ 203 PetscCall(PetscLayoutCreate(comm, &layout)); 204 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 205 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 206 PetscCall(PetscLayoutSetUp(layout)); 207 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 208 209 /* copy over the matrix nonzero structure and values */ 210 if (pc) { 211 NZ = km->NZ; 212 NZdispl = km->NZdispl; 213 PetscCall(PetscLayoutGetRanges(layout, &range)); 214 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 215 for (i = 0; i < size; i++) { 216 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 217 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 218 } 219 displi[0] = 0; 220 NZdispl[0] = 0; 221 for (j = 1; j < size; j++) { 222 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 223 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 224 } 225 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 226 } 227 PetscCall(PetscLayoutDestroy(&layout)); 228 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 229 230 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 231 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 232 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 233 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 234 235 if (pc) { 236 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 237 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 238 } 239 240 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 241 ia[0] = 0; 242 PetscCall(PetscLogStagePush(PCMPIStage)); 243 PetscCall(MatCreate(comm, &A)); 244 if (matproperties[7] > 0) { 245 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 246 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 247 PetscCall(MatSetOptionsPrefix(A, cprefix)); 248 PetscCall(PetscFree(cprefix)); 249 } 250 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 251 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 252 PetscCall(MatSetType(A, MATMPIAIJ)); 253 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 254 PetscCall(MatSetBlockSize(A, matproperties[2])); 255 256 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 257 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 258 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 259 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 260 261 PetscCall(PetscFree3(ia, ja, a)); 262 PetscCall(KSPSetOperators(ksp, A, A)); 263 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 264 PetscCall(PetscLogStagePop()); 265 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 266 const PetscInt *range; 267 268 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 269 for (i = 0; i < size; i++) { 270 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 271 km->displ[i] = (PetscMPIInt)range[i]; 272 } 273 } 274 PetscCall(MatDestroy(&A)); 275 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 276 PetscCall(KSPSetFromOptions(ksp)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 281 { 282 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 283 KSP ksp; 284 Mat sA, A; 285 MPI_Comm comm = PC_MPI_COMM_WORLD; 286 PetscScalar *a; 287 PetscCount nz; 288 const PetscScalar *sa = NULL; 289 PetscMPIInt size; 290 PetscInt matproperties[4] = {0, 0, 0, 0}; 291 292 PetscFunctionBegin; 293 if (pc) { 294 PetscCall(PCGetOperators(pc, &sA, &sA)); 295 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 296 } 297 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 298 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 299 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 300 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 301 PetscCallMPI(MPI_Comm_size(comm, &size)); 302 PCMPIMatCounts[size - 1]++; 303 PetscCall(KSPGetOperators(ksp, NULL, &A)); 304 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 305 PetscCall(PetscMalloc1(nz, &a)); 306 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 307 if (pc) { 308 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 309 310 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 311 312 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 313 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 314 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 315 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 316 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 317 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 318 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 319 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 320 } 321 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 322 PetscCall(PetscFree(a)); 323 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 324 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 325 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 326 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 327 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 328 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 329 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 334 { 335 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 336 KSP ksp; 337 MPI_Comm comm = PC_MPI_COMM_WORLD; 338 const PetscScalar *sb = NULL, *x; 339 PetscScalar *b, *sx = NULL; 340 PetscInt its, n; 341 PetscMPIInt size; 342 343 PetscFunctionBegin; 344 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 345 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 346 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 347 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 348 349 /* scatterv rhs */ 350 PetscCallMPI(MPI_Comm_size(comm, &size)); 351 if (pc) { 352 PetscInt N; 353 354 PCMPISolveCounts[size - 1]++; 355 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 356 PCMPISizes[size - 1] += N; 357 PetscCall(VecGetArrayRead(B, &sb)); 358 } 359 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 360 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 361 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 362 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 363 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 364 365 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 366 PetscCall(PetscLogStagePush(PCMPIStage)); 367 PetscCall(KSPSolve(ksp, NULL, NULL)); 368 PetscCall(PetscLogStagePop()); 369 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 370 PetscCall(KSPGetIterationNumber(ksp, &its)); 371 PCMPIIterations[size - 1] += its; 372 373 /* gather solution */ 374 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 375 if (pc) PetscCall(VecGetArray(X, &sx)); 376 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 377 if (pc) PetscCall(VecRestoreArray(X, &sx)); 378 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 379 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 static PetscErrorCode PCMPIDestroy(PC pc) 384 { 385 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 386 KSP ksp; 387 MPI_Comm comm = PC_MPI_COMM_WORLD; 388 389 PetscFunctionBegin; 390 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 391 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 392 PetscCall(PetscLogStagePush(PCMPIStage)); 393 PetscCall(KSPDestroy(&ksp)); 394 PetscCall(PetscLogStagePop()); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 PetscBool PCMPIServerActive = PETSC_FALSE; 399 400 /*@C 401 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 402 parallel `KSP` solves and management of parallel `KSP` objects. 403 404 Logically Collective on all MPI processes except rank 0 405 406 Options Database Keys: 407 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 408 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 409 410 Level: developer 411 412 Note: 413 This is normally started automatically in `PetscInitialize()` when the option is provided 414 415 See `PCMPI` for information on using the solver with a `KSP` object 416 417 Developer Notes: 418 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 419 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 420 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 421 422 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 423 424 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 425 426 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 427 428 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 429 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 430 431 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 432 all MPI processes but the user callback only runs on one MPI process per node. 433 434 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 435 the `MPI_Comm` argument from PETSc calls. 436 437 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 438 @*/ 439 PetscErrorCode PCMPIServerBegin(void) 440 { 441 PetscMPIInt rank; 442 443 PetscFunctionBegin; 444 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 445 if (PetscDefined(USE_SINGLE_LIBRARY)) { 446 PetscCall(VecInitializePackage()); 447 PetscCall(MatInitializePackage()); 448 PetscCall(DMInitializePackage()); 449 PetscCall(PCInitializePackage()); 450 PetscCall(KSPInitializePackage()); 451 PetscCall(SNESInitializePackage()); 452 PetscCall(TSInitializePackage()); 453 PetscCall(TaoInitializePackage()); 454 } 455 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 456 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 457 458 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 459 if (rank == 0) { 460 PETSC_COMM_WORLD = PETSC_COMM_SELF; 461 PCMPIServerActive = PETSC_TRUE; 462 PetscFunctionReturn(PETSC_SUCCESS); 463 } 464 465 while (PETSC_TRUE) { 466 PCMPICommand request = PCMPI_CREATE; 467 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 468 switch (request) { 469 case PCMPI_CREATE: 470 PetscCall(PCMPICreate(NULL)); 471 break; 472 case PCMPI_SET_MAT: 473 PetscCall(PCMPISetMat(NULL)); 474 break; 475 case PCMPI_UPDATE_MAT_VALUES: 476 PetscCall(PCMPIUpdateMatValues(NULL)); 477 break; 478 case PCMPI_VIEW: 479 // PetscCall(PCMPIView(NULL)); 480 break; 481 case PCMPI_SOLVE: 482 PetscCall(PCMPISolve(NULL, NULL, NULL)); 483 break; 484 case PCMPI_DESTROY: 485 PetscCall(PCMPIDestroy(NULL)); 486 break; 487 case PCMPI_EXIT: 488 PetscCall(PetscFinalize()); 489 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 490 break; 491 default: 492 break; 493 } 494 } 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 /*@C 499 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 500 parallel KSP solves and management of parallel `KSP` objects. 501 502 Logically Collective on all MPI ranks except 0 503 504 Level: developer 505 506 Note: 507 This is normally ended automatically in `PetscFinalize()` when the option is provided 508 509 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 510 @*/ 511 PetscErrorCode PCMPIServerEnd(void) 512 { 513 PCMPICommand request = PCMPI_EXIT; 514 515 PetscFunctionBegin; 516 if (PetscGlobalRank == 0) { 517 PetscViewer viewer = NULL; 518 PetscViewerFormat format; 519 520 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 521 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 522 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 523 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 524 PetscOptionsEnd(); 525 if (viewer) { 526 PetscBool isascii; 527 528 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 529 if (isascii) { 530 PetscMPIInt size; 531 PetscMPIInt i; 532 533 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 534 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 535 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 536 if (PCMPIKSPCountsSeq) { 537 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 538 } 539 for (i = 0; i < size; i++) { 540 if (PCMPIKSPCounts[i]) { 541 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 542 } 543 } 544 } 545 PetscCall(PetscViewerDestroy(&viewer)); 546 } 547 } 548 PetscCall(PCMPICommsDestroy()); 549 PCMPIServerActive = PETSC_FALSE; 550 PetscFunctionReturn(PETSC_SUCCESS); 551 } 552 553 /* 554 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 555 because, for example, the problem is small. This version is more efficient because it does not require copying any data 556 */ 557 static PetscErrorCode PCSetUp_Seq(PC pc) 558 { 559 PC_MPI *km = (PC_MPI *)pc->data; 560 Mat sA; 561 const char *prefix; 562 char *found = NULL, *cprefix; 563 564 PetscFunctionBegin; 565 PetscCall(PCGetOperators(pc, NULL, &sA)); 566 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 567 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 568 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 569 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 570 571 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 572 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 573 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 574 PetscCall(PetscStrallocpy(prefix, &cprefix)); 575 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 576 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 577 *found = 0; 578 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 579 PetscCall(PetscFree(cprefix)); 580 581 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 582 PetscCall(KSPSetFromOptions(km->ksps[0])); 583 PetscCall(KSPSetUp(km->ksps[0])); 584 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 585 PCMPIKSPCountsSeq++; 586 PetscFunctionReturn(PETSC_SUCCESS); 587 } 588 589 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 590 { 591 PC_MPI *km = (PC_MPI *)pc->data; 592 PetscInt its, n; 593 Mat A; 594 595 PetscFunctionBegin; 596 PetscCall(KSPSolve(km->ksps[0], b, x)); 597 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 598 PCMPISolveCountsSeq++; 599 PCMPIIterationsSeq += its; 600 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 601 PetscCall(MatGetSize(A, &n, NULL)); 602 PCMPISizesSeq += n; 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 607 { 608 PC_MPI *km = (PC_MPI *)pc->data; 609 610 PetscFunctionBegin; 611 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 612 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 613 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 614 PetscFunctionReturn(PETSC_SUCCESS); 615 } 616 617 static PetscErrorCode PCDestroy_Seq(PC pc) 618 { 619 PC_MPI *km = (PC_MPI *)pc->data; 620 621 PetscFunctionBegin; 622 PetscCall(KSPDestroy(&km->ksps[0])); 623 PetscCall(PetscFree(pc->data)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 /* 628 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 629 right-hand side to the parallel PC 630 */ 631 static PetscErrorCode PCSetUp_MPI(PC pc) 632 { 633 PC_MPI *km = (PC_MPI *)pc->data; 634 PetscMPIInt rank, size; 635 PCMPICommand request; 636 PetscBool newmatrix = PETSC_FALSE; 637 638 PetscFunctionBegin; 639 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 640 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 641 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 642 643 if (!pc->setupcalled) { 644 if (!km->alwaysuseserver) { 645 PetscInt n; 646 Mat sA; 647 /* short circuit for small systems */ 648 PetscCall(PCGetOperators(pc, &sA, &sA)); 649 PetscCall(MatGetSize(sA, &n, NULL)); 650 if (n < 2 * km->mincntperrank - 1 || size == 1) { 651 pc->ops->setup = NULL; 652 pc->ops->apply = PCApply_Seq; 653 pc->ops->destroy = PCDestroy_Seq; 654 pc->ops->view = PCView_Seq; 655 PetscCall(PCSetUp_Seq(pc)); 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 } 659 660 request = PCMPI_CREATE; 661 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 662 PetscCall(PCMPICreate(pc)); 663 newmatrix = PETSC_TRUE; 664 } 665 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 666 667 if (newmatrix) { 668 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 669 request = PCMPI_SET_MAT; 670 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 671 PetscCall(PCMPISetMat(pc)); 672 } else { 673 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 674 request = PCMPI_UPDATE_MAT_VALUES; 675 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 676 PetscCall(PCMPIUpdateMatValues(pc)); 677 } 678 PetscFunctionReturn(PETSC_SUCCESS); 679 } 680 681 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 682 { 683 PCMPICommand request = PCMPI_SOLVE; 684 685 PetscFunctionBegin; 686 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 687 PetscCall(PCMPISolve(pc, b, x)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 static PetscErrorCode PCDestroy_MPI(PC pc) 692 { 693 PCMPICommand request = PCMPI_DESTROY; 694 695 PetscFunctionBegin; 696 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 697 PetscCall(PCMPIDestroy(pc)); 698 PetscCall(PetscFree(pc->data)); 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 702 /* 703 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 704 */ 705 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 706 { 707 PC_MPI *km = (PC_MPI *)pc->data; 708 MPI_Comm comm; 709 PetscMPIInt size; 710 711 PetscFunctionBegin; 712 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 713 PetscCallMPI(MPI_Comm_size(comm, &size)); 714 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 715 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 716 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 717 PetscFunctionReturn(PETSC_SUCCESS); 718 } 719 720 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 721 { 722 PC_MPI *km = (PC_MPI *)pc->data; 723 724 PetscFunctionBegin; 725 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 726 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 727 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 728 PetscOptionsHeadEnd(); 729 PetscFunctionReturn(PETSC_SUCCESS); 730 } 731 732 /*MC 733 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 734 735 Options Database Keys for the Server: 736 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 737 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 738 739 Options Database Keys for a specific `KSP` object 740 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 741 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 742 743 Level: developer 744 745 Notes: 746 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 747 748 It can be particularly useful for user OpenMP code or potentially user GPU code. 749 750 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 751 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 752 753 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 754 because they are not the actual solver objects. 755 756 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 757 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 758 759 Developer Note: 760 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 761 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 762 763 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 764 M*/ 765 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 766 { 767 PC_MPI *km; 768 char *found = NULL; 769 770 PetscFunctionBegin; 771 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 772 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 773 774 /* material from PCSetType() */ 775 PetscTryTypeMethod(pc, destroy); 776 pc->ops->destroy = NULL; 777 pc->data = NULL; 778 779 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 780 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 781 pc->modifysubmatrices = NULL; 782 pc->modifysubmatricesP = NULL; 783 pc->setupcalled = 0; 784 785 PetscCall(PetscNew(&km)); 786 pc->data = (void *)km; 787 788 km->mincntperrank = 10000; 789 790 pc->ops->setup = PCSetUp_MPI; 791 pc->ops->apply = PCApply_MPI; 792 pc->ops->destroy = PCDestroy_MPI; 793 pc->ops->view = PCView_MPI; 794 pc->ops->setfromoptions = PCSetFromOptions_MPI; 795 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 796 PetscFunctionReturn(PETSC_SUCCESS); 797 } 798