1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 18 #define PC_MPI_MAX_RANKS 256 19 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 20 21 typedef struct { 22 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 23 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 24 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 25 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 26 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 27 } PC_MPI; 28 29 typedef enum { 30 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 31 PCMPI_CREATE, 32 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 33 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 34 PCMPI_SOLVE, 35 PCMPI_VIEW, 36 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 37 } PCMPICommand; 38 39 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 40 static PetscBool PCMPICommSet = PETSC_FALSE; 41 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 42 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 43 static PetscLogEvent EventServerDist; 44 45 static PetscErrorCode PCMPICommsCreate(void) 46 { 47 MPI_Comm comm = PC_MPI_COMM_WORLD; 48 PetscMPIInt size, rank, i; 49 50 PetscFunctionBegin; 51 PetscCallMPI(MPI_Comm_size(comm, &size)); 52 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 53 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 54 /* comm for size 1 is useful only for debugging */ 55 for (i = 0; i < size; i++) { 56 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 57 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 58 PCMPISolveCounts[i] = 0; 59 PCMPIKSPCounts[i] = 0; 60 PCMPIIterations[i] = 0; 61 PCMPISizes[i] = 0; 62 } 63 PCMPICommSet = PETSC_TRUE; 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 PetscErrorCode PCMPICommsDestroy(void) 68 { 69 MPI_Comm comm = PC_MPI_COMM_WORLD; 70 PetscMPIInt size, rank, i; 71 72 PetscFunctionBegin; 73 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 74 PetscCallMPI(MPI_Comm_size(comm, &size)); 75 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 76 for (i = 0; i < size; i++) { 77 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 78 } 79 PCMPICommSet = PETSC_FALSE; 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode PCMPICreate(PC pc) 84 { 85 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 86 MPI_Comm comm = PC_MPI_COMM_WORLD; 87 KSP ksp; 88 PetscInt N[2], mincntperrank = 0; 89 PetscMPIInt size; 90 Mat sA; 91 char *cprefix = NULL; 92 PetscMPIInt len = 0; 93 94 PetscFunctionBegin; 95 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 96 PetscCallMPI(MPI_Comm_size(comm, &size)); 97 if (pc) { 98 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 99 PetscCall(PCGetOperators(pc, &sA, &sA)); 100 PetscCall(MatGetSize(sA, &N[0], &N[1])); 101 } 102 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 103 104 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 105 if (km) mincntperrank = km->mincntperrank; 106 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 107 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 108 if (comm == MPI_COMM_NULL) { 109 ksp = NULL; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(PetscLogStagePush(PCMPIStage)); 113 PetscCall(KSPCreate(comm, &ksp)); 114 PetscCall(KSPSetNestLevel(ksp, 1)); 115 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 116 PetscCall(PetscLogStagePop()); 117 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 118 if (pc) { 119 size_t slen; 120 const char *prefix = NULL; 121 char *found = NULL; 122 123 PetscCallMPI(MPI_Comm_size(comm, &size)); 124 PCMPIKSPCounts[size - 1]++; 125 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 126 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 127 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 128 PetscCall(PetscStrallocpy(prefix, &cprefix)); 129 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 130 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 131 *found = 0; 132 PetscCall(PetscStrlen(cprefix, &slen)); 133 len = (PetscMPIInt)slen; 134 } 135 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 136 if (len) { 137 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 138 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 139 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 140 } 141 PetscCall(PetscFree(cprefix)); 142 PetscFunctionReturn(PETSC_SUCCESS); 143 } 144 145 static PetscErrorCode PCMPISetMat(PC pc) 146 { 147 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 148 Mat A; 149 PetscInt m, n, *ia, *ja, j, bs; 150 Mat sA; 151 MPI_Comm comm = PC_MPI_COMM_WORLD; 152 KSP ksp; 153 PetscLayout layout; 154 const PetscInt *IA = NULL, *JA = NULL; 155 const PetscInt *range; 156 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 157 PetscScalar *a; 158 const PetscScalar *sa = NULL; 159 PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 160 char *cprefix; 161 162 PetscFunctionBegin; 163 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 164 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 166 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 167 if (pc) { 168 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 169 const char *prefix; 170 size_t clen; 171 172 PetscCallMPI(MPI_Comm_size(comm, &size)); 173 PCMPIMatCounts[size - 1]++; 174 PetscCall(PCGetOperators(pc, &sA, &sA)); 175 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 176 PetscCall(MatGetBlockSize(sA, &bs)); 177 matproperties[2] = bs; 178 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 179 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 180 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 181 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 182 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 183 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 184 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 185 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 186 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 187 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 188 PetscCall(PetscStrallocpy(prefix, &cprefix)); 189 PetscCall(PetscStrlen(cprefix, &clen)); 190 matproperties[7] = (PetscInt)clen; 191 } 192 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 193 194 /* determine ownership ranges of matrix columns */ 195 PetscCall(PetscLayoutCreate(comm, &layout)); 196 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 197 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 198 PetscCall(PetscLayoutSetUp(layout)); 199 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 202 /* determine ownership ranges of matrix rows */ 203 PetscCall(PetscLayoutCreate(comm, &layout)); 204 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 205 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 206 PetscCall(PetscLayoutSetUp(layout)); 207 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 208 209 /* copy over the matrix nonzero structure and values */ 210 if (pc) { 211 NZ = km->NZ; 212 NZdispl = km->NZdispl; 213 PetscCall(PetscLayoutGetRanges(layout, &range)); 214 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 215 for (i = 0; i < size; i++) { 216 sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]); 217 NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]); 218 } 219 displi[0] = 0; 220 NZdispl[0] = 0; 221 for (j = 1; j < size; j++) { 222 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 223 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 224 } 225 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 226 } 227 PetscCall(PetscLayoutDestroy(&layout)); 228 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 229 230 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 231 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm)); 232 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm)); 233 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 234 235 if (pc) { 236 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 237 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 238 } 239 240 for (j = 1; j < n + 1; j++) ia[j] -= ia[0]; 241 ia[0] = 0; 242 PetscCall(PetscLogStagePush(PCMPIStage)); 243 PetscCall(MatCreate(comm, &A)); 244 if (matproperties[7] > 0) { 245 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 246 PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm)); 247 PetscCall(MatSetOptionsPrefix(A, cprefix)); 248 PetscCall(PetscFree(cprefix)); 249 } 250 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 251 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 252 PetscCall(MatSetType(A, MATMPIAIJ)); 253 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 254 PetscCall(MatSetBlockSize(A, matproperties[2])); 255 256 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 257 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 258 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 259 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 260 261 PetscCall(PetscFree3(ia, ja, a)); 262 PetscCall(KSPSetOperators(ksp, A, A)); 263 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 264 PetscCall(PetscLogStagePop()); 265 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 266 const PetscInt *range; 267 268 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 269 for (i = 0; i < size; i++) { 270 km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]); 271 km->displ[i] = (PetscMPIInt)range[i]; 272 } 273 } 274 PetscCall(MatDestroy(&A)); 275 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 276 PetscCall(KSPSetFromOptions(ksp)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 281 { 282 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 283 KSP ksp; 284 Mat sA, A; 285 MPI_Comm comm = PC_MPI_COMM_WORLD; 286 PetscScalar *a; 287 PetscCount nz; 288 const PetscScalar *sa = NULL; 289 PetscMPIInt size; 290 PetscInt matproperties[4] = {0, 0, 0, 0}; 291 292 PetscFunctionBegin; 293 if (pc) { 294 PetscCall(PCGetOperators(pc, &sA, &sA)); 295 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 296 } 297 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 298 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 299 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 300 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 301 PetscCallMPI(MPI_Comm_size(comm, &size)); 302 PCMPIMatCounts[size - 1]++; 303 PetscCall(KSPGetOperators(ksp, NULL, &A)); 304 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 305 PetscCall(PetscMalloc1(nz, &a)); 306 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm)); 307 if (pc) { 308 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 309 310 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 311 312 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 313 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 314 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 315 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 316 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 317 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 318 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 319 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 320 } 321 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 322 PetscCall(PetscFree(a)); 323 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 324 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 325 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 326 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 327 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 328 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 329 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 334 { 335 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 336 KSP ksp; 337 MPI_Comm comm = PC_MPI_COMM_WORLD; 338 const PetscScalar *sb = NULL, *x; 339 PetscScalar *b, *sx = NULL; 340 PetscInt its, n; 341 PetscMPIInt size; 342 343 PetscFunctionBegin; 344 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 345 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 346 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 347 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 348 349 /* TODO: optimize code to not require building counts/displ every time */ 350 351 /* scatterv rhs */ 352 PetscCallMPI(MPI_Comm_size(comm, &size)); 353 if (pc) { 354 PetscInt N; 355 356 PCMPISolveCounts[size - 1]++; 357 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 358 PCMPISizes[size - 1] += N; 359 PetscCall(VecGetArrayRead(B, &sb)); 360 } 361 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 362 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 363 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm)); 364 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 365 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 366 367 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 368 PetscCall(PetscLogStagePush(PCMPIStage)); 369 PetscCall(KSPSolve(ksp, NULL, NULL)); 370 PetscCall(PetscLogStagePop()); 371 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 372 PetscCall(KSPGetIterationNumber(ksp, &its)); 373 PCMPIIterations[size - 1] += its; 374 375 /* gather solution */ 376 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 377 if (pc) PetscCall(VecGetArray(X, &sx)); 378 PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 379 if (pc) PetscCall(VecRestoreArray(X, &sx)); 380 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 381 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 static PetscErrorCode PCMPIDestroy(PC pc) 386 { 387 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 388 KSP ksp; 389 MPI_Comm comm = PC_MPI_COMM_WORLD; 390 391 PetscFunctionBegin; 392 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 393 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 394 PetscCall(PetscLogStagePush(PCMPIStage)); 395 PetscCall(KSPDestroy(&ksp)); 396 PetscCall(PetscLogStagePop()); 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 PetscBool PCMPIServerActive = PETSC_FALSE; 401 402 /*@C 403 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 404 parallel `KSP` solves and management of parallel `KSP` objects. 405 406 Logically Collective on all MPI processes except rank 0 407 408 Options Database Keys: 409 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 410 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 411 412 Level: developer 413 414 Note: 415 This is normally started automatically in `PetscInitialize()` when the option is provided 416 417 See `PCMPI` for information on using the solver with a `KSP` object 418 419 Developer Notes: 420 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 421 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 422 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 423 424 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 425 426 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 427 428 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 429 430 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 431 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 432 433 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 434 all MPI processes but the user callback only runs on one MPI process per node. 435 436 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 437 the `MPI_Comm` argument from PETSc calls. 438 439 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 440 @*/ 441 PetscErrorCode PCMPIServerBegin(void) 442 { 443 PetscMPIInt rank; 444 445 PetscFunctionBegin; 446 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 447 if (PetscDefined(USE_SINGLE_LIBRARY)) { 448 PetscCall(VecInitializePackage()); 449 PetscCall(MatInitializePackage()); 450 PetscCall(DMInitializePackage()); 451 PetscCall(PCInitializePackage()); 452 PetscCall(KSPInitializePackage()); 453 PetscCall(SNESInitializePackage()); 454 PetscCall(TSInitializePackage()); 455 PetscCall(TaoInitializePackage()); 456 } 457 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 458 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 459 460 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 461 if (rank == 0) { 462 PETSC_COMM_WORLD = PETSC_COMM_SELF; 463 PCMPIServerActive = PETSC_TRUE; 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 while (PETSC_TRUE) { 468 PCMPICommand request = PCMPI_CREATE; 469 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 470 switch (request) { 471 case PCMPI_CREATE: 472 PetscCall(PCMPICreate(NULL)); 473 break; 474 case PCMPI_SET_MAT: 475 PetscCall(PCMPISetMat(NULL)); 476 break; 477 case PCMPI_UPDATE_MAT_VALUES: 478 PetscCall(PCMPIUpdateMatValues(NULL)); 479 break; 480 case PCMPI_VIEW: 481 // PetscCall(PCMPIView(NULL)); 482 break; 483 case PCMPI_SOLVE: 484 PetscCall(PCMPISolve(NULL, NULL, NULL)); 485 break; 486 case PCMPI_DESTROY: 487 PetscCall(PCMPIDestroy(NULL)); 488 break; 489 case PCMPI_EXIT: 490 PetscCall(PetscFinalize()); 491 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 492 break; 493 default: 494 break; 495 } 496 } 497 PetscFunctionReturn(PETSC_SUCCESS); 498 } 499 500 /*@C 501 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 502 parallel KSP solves and management of parallel `KSP` objects. 503 504 Logically Collective on all MPI ranks except 0 505 506 Level: developer 507 508 Note: 509 This is normally ended automatically in `PetscFinalize()` when the option is provided 510 511 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 512 @*/ 513 PetscErrorCode PCMPIServerEnd(void) 514 { 515 PCMPICommand request = PCMPI_EXIT; 516 517 PetscFunctionBegin; 518 if (PetscGlobalRank == 0) { 519 PetscViewer viewer = NULL; 520 PetscViewerFormat format; 521 522 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 523 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 524 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 525 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 526 PetscOptionsEnd(); 527 if (viewer) { 528 PetscBool isascii; 529 530 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 531 if (isascii) { 532 PetscMPIInt size; 533 PetscMPIInt i; 534 535 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 536 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 537 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 538 if (PCMPIKSPCountsSeq) { 539 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 540 } 541 for (i = 0; i < size; i++) { 542 if (PCMPIKSPCounts[i]) { 543 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 544 } 545 } 546 } 547 PetscCall(PetscViewerDestroy(&viewer)); 548 } 549 } 550 PetscCall(PCMPICommsDestroy()); 551 PCMPIServerActive = PETSC_FALSE; 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 /* 556 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 557 because, for example, the problem is small. This version is more efficient because it does not require copying any data 558 */ 559 static PetscErrorCode PCSetUp_Seq(PC pc) 560 { 561 PC_MPI *km = (PC_MPI *)pc->data; 562 Mat sA; 563 const char *prefix; 564 char *found = NULL, *cprefix; 565 566 PetscFunctionBegin; 567 PetscCall(PCGetOperators(pc, NULL, &sA)); 568 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 569 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 570 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 571 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 572 573 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 574 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 575 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 576 PetscCall(PetscStrallocpy(prefix, &cprefix)); 577 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 578 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 579 *found = 0; 580 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 581 PetscCall(PetscFree(cprefix)); 582 583 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 584 PetscCall(KSPSetFromOptions(km->ksps[0])); 585 PetscCall(KSPSetUp(km->ksps[0])); 586 PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 587 PCMPIKSPCountsSeq++; 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 592 { 593 PC_MPI *km = (PC_MPI *)pc->data; 594 PetscInt its, n; 595 Mat A; 596 597 PetscFunctionBegin; 598 PetscCall(KSPSolve(km->ksps[0], b, x)); 599 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 600 PCMPISolveCountsSeq++; 601 PCMPIIterationsSeq += its; 602 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 603 PetscCall(MatGetSize(A, &n, NULL)); 604 PCMPISizesSeq += n; 605 PetscFunctionReturn(PETSC_SUCCESS); 606 } 607 608 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 609 { 610 PC_MPI *km = (PC_MPI *)pc->data; 611 612 PetscFunctionBegin; 613 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 614 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 615 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 static PetscErrorCode PCDestroy_Seq(PC pc) 620 { 621 PC_MPI *km = (PC_MPI *)pc->data; 622 623 PetscFunctionBegin; 624 PetscCall(KSPDestroy(&km->ksps[0])); 625 PetscCall(PetscFree(pc->data)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 /* 630 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 631 right-hand side to the parallel PC 632 */ 633 static PetscErrorCode PCSetUp_MPI(PC pc) 634 { 635 PC_MPI *km = (PC_MPI *)pc->data; 636 PetscMPIInt rank, size; 637 PCMPICommand request; 638 PetscBool newmatrix = PETSC_FALSE; 639 640 PetscFunctionBegin; 641 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 642 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 643 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 644 645 if (!pc->setupcalled) { 646 if (!km->alwaysuseserver) { 647 PetscInt n; 648 Mat sA; 649 /* short circuit for small systems */ 650 PetscCall(PCGetOperators(pc, &sA, &sA)); 651 PetscCall(MatGetSize(sA, &n, NULL)); 652 if (n < 2 * km->mincntperrank - 1 || size == 1) { 653 pc->ops->setup = NULL; 654 pc->ops->apply = PCApply_Seq; 655 pc->ops->destroy = PCDestroy_Seq; 656 pc->ops->view = PCView_Seq; 657 PetscCall(PCSetUp_Seq(pc)); 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 } 661 662 request = PCMPI_CREATE; 663 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 664 PetscCall(PCMPICreate(pc)); 665 newmatrix = PETSC_TRUE; 666 } 667 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 668 669 if (newmatrix) { 670 PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n")); 671 request = PCMPI_SET_MAT; 672 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 673 PetscCall(PCMPISetMat(pc)); 674 } else { 675 PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n")); 676 request = PCMPI_UPDATE_MAT_VALUES; 677 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 678 PetscCall(PCMPIUpdateMatValues(pc)); 679 } 680 PetscFunctionReturn(PETSC_SUCCESS); 681 } 682 683 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 684 { 685 PCMPICommand request = PCMPI_SOLVE; 686 687 PetscFunctionBegin; 688 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 689 PetscCall(PCMPISolve(pc, b, x)); 690 PetscFunctionReturn(PETSC_SUCCESS); 691 } 692 693 static PetscErrorCode PCDestroy_MPI(PC pc) 694 { 695 PCMPICommand request = PCMPI_DESTROY; 696 697 PetscFunctionBegin; 698 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 699 PetscCall(PCMPIDestroy(pc)); 700 PetscCall(PetscFree(pc->data)); 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 /* 705 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 706 */ 707 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 708 { 709 PC_MPI *km = (PC_MPI *)pc->data; 710 MPI_Comm comm; 711 PetscMPIInt size; 712 713 PetscFunctionBegin; 714 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 715 PetscCallMPI(MPI_Comm_size(comm, &size)); 716 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 717 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank)); 718 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 723 { 724 PC_MPI *km = (PC_MPI *)pc->data; 725 726 PetscFunctionBegin; 727 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 728 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 729 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 730 PetscOptionsHeadEnd(); 731 PetscFunctionReturn(PETSC_SUCCESS); 732 } 733 734 /*MC 735 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 736 737 Options Database Keys for the Server: 738 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 739 - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 740 741 Options Database Keys for a specific `KSP` object 742 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 743 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 744 745 Level: developer 746 747 Notes: 748 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 749 750 It can be particularly useful for user OpenMP code or potentially user GPU code. 751 752 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 753 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 754 755 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 756 because they are not the actual solver objects. 757 758 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 759 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 760 761 Developer Note: 762 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 763 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 764 765 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 766 M*/ 767 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 768 { 769 PC_MPI *km; 770 char *found = NULL; 771 772 PetscFunctionBegin; 773 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 774 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 775 776 /* material from PCSetType() */ 777 PetscTryTypeMethod(pc, destroy); 778 pc->ops->destroy = NULL; 779 pc->data = NULL; 780 781 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 782 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 783 pc->modifysubmatrices = NULL; 784 pc->modifysubmatricesP = NULL; 785 pc->setupcalled = 0; 786 787 PetscCall(PetscNew(&km)); 788 pc->data = (void *)km; 789 790 km->mincntperrank = 10000; 791 792 pc->ops->setup = PCSetUp_MPI; 793 pc->ops->apply = PCApply_MPI; 794 pc->ops->destroy = PCDestroy_MPI; 795 pc->ops->view = PCView_MPI; 796 pc->ops->setfromoptions = PCSetFromOptions_MPI; 797 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800