1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right-hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 14 #include <petsc/private/kspimpl.h> 15 #include <petscts.h> 16 #include <petsctao.h> 17 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 18 #include <pthread.h> 19 #endif 20 21 #define PC_MPI_MAX_RANKS 256 22 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 23 24 typedef struct { 25 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each process, NULL when not on a process. */ 26 PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 27 PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 28 PetscInt mincntperrank; /* minimum number of desired matrix rows per active rank in MPI parallel KSP solve */ 29 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI process is used for the solve */ 30 } PC_MPI; 31 32 typedef enum { 33 PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 34 PCMPI_CREATE, 35 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 36 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 37 PCMPI_SOLVE, 38 PCMPI_VIEW, 39 PCMPI_DESTROY /* destroy a PC that is no longer needed */ 40 } PCMPICommand; 41 42 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 43 static PetscBool PCMPICommSet = PETSC_FALSE; 44 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 45 static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0; 46 static PetscLogEvent EventServerDist, EventServerDistMPI; 47 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 48 static pthread_mutex_t *PCMPIServerLocks; 49 #else 50 static void *PCMPIServerLocks; 51 #endif 52 53 static PetscErrorCode PCMPICommsCreate(void) 54 { 55 MPI_Comm comm = PC_MPI_COMM_WORLD; 56 PetscMPIInt size, rank, i; 57 58 PetscFunctionBegin; 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 61 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 62 /* comm for size 1 is useful only for debugging */ 63 for (i = 0; i < size; i++) { 64 PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED; 65 PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i])); 66 PCMPISolveCounts[i] = 0; 67 PCMPIKSPCounts[i] = 0; 68 PCMPIIterations[i] = 0; 69 PCMPISizes[i] = 0; 70 } 71 PCMPICommSet = PETSC_TRUE; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCMPICommsDestroy(void) 76 { 77 MPI_Comm comm = PC_MPI_COMM_WORLD; 78 PetscMPIInt size, rank, i; 79 80 PetscFunctionBegin; 81 if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS); 82 PetscCallMPI(MPI_Comm_size(comm, &size)); 83 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 84 for (i = 0; i < size; i++) { 85 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 86 } 87 PCMPICommSet = PETSC_FALSE; 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode PCMPICreate(PC pc) 92 { 93 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 94 MPI_Comm comm = PC_MPI_COMM_WORLD; 95 KSP ksp; 96 PetscInt N[2], mincntperrank = 0; 97 PetscMPIInt size; 98 Mat sA; 99 char *cprefix = NULL; 100 PetscMPIInt len = 0; 101 102 PetscFunctionBegin; 103 PCMPIServerInSolve = PETSC_TRUE; 104 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 105 PetscCallMPI(MPI_Comm_size(comm, &size)); 106 if (pc) { 107 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 108 PetscCall(PCGetOperators(pc, &sA, &sA)); 109 PetscCall(MatGetSize(sA, &N[0], &N[1])); 110 } 111 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 112 113 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 114 if (km) mincntperrank = km->mincntperrank; 115 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 116 comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1]; 117 if (comm == MPI_COMM_NULL) { 118 ksp = NULL; 119 PCMPIServerInSolve = PETSC_FALSE; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 PetscCall(PetscLogStagePush(PCMPIStage)); 123 PetscCall(KSPCreate(comm, &ksp)); 124 PetscCall(KSPSetNestLevel(ksp, 1)); 125 PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1)); 126 PetscCall(PetscLogStagePop()); 127 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 128 if (pc) { 129 size_t slen; 130 const char *prefix = NULL; 131 char *found = NULL; 132 133 PetscCallMPI(MPI_Comm_size(comm, &size)); 134 PCMPIKSPCounts[size - 1]++; 135 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 136 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 137 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 138 PetscCall(PetscStrallocpy(prefix, &cprefix)); 139 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 140 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 141 *found = 0; 142 PetscCall(PetscStrlen(cprefix, &slen)); 143 PetscCall(PetscMPIIntCast(slen, &len)); 144 } 145 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 146 if (len) { 147 if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix)); 148 PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm)); 149 PetscCall(KSPSetOptionsPrefix(ksp, cprefix)); 150 } 151 PetscCall(PetscFree(cprefix)); 152 PCMPIServerInSolve = PETSC_FALSE; 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode PCMPISetMat(PC pc) 157 { 158 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 159 Mat A; 160 PetscInt m, n, j, bs; 161 Mat sA; 162 MPI_Comm comm = PC_MPI_COMM_WORLD; 163 KSP ksp; 164 PetscLayout layout; 165 const PetscInt *IA = NULL, *JA = NULL, *ia, *ja; 166 const PetscInt *range; 167 PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i; 168 const PetscScalar *a = NULL, *sa; 169 PetscInt matproperties[8] = {0}, rstart, rend; 170 char *cprefix; 171 172 PetscFunctionBegin; 173 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 174 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 175 PCMPIServerInSolve = PETSC_TRUE; 176 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 177 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 178 if (pc) { 179 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 180 const char *prefix; 181 size_t clen; 182 183 PetscCallMPI(MPI_Comm_size(comm, &size)); 184 PCMPIMatCounts[size - 1]++; 185 PetscCall(PCGetOperators(pc, &sA, &sA)); 186 PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1])); 187 PetscCall(MatGetBlockSize(sA, &bs)); 188 matproperties[2] = bs; 189 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 190 matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2); 191 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 192 matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2); 193 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 194 matproperties[5] = !isset ? 0 : (isspd ? 1 : 2); 195 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 196 matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 197 /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */ 198 PetscCall(MatGetOptionsPrefix(sA, &prefix)); 199 PetscCall(PetscStrallocpy(prefix, &cprefix)); 200 PetscCall(PetscStrlen(cprefix, &clen)); 201 matproperties[7] = (PetscInt)clen; 202 } 203 PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm)); 204 205 /* determine ownership ranges of matrix columns */ 206 PetscCall(PetscLayoutCreate(comm, &layout)); 207 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 208 PetscCall(PetscLayoutSetSize(layout, matproperties[1])); 209 PetscCall(PetscLayoutSetUp(layout)); 210 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 211 PetscCall(PetscLayoutDestroy(&layout)); 212 213 /* determine ownership ranges of matrix rows */ 214 PetscCall(PetscLayoutCreate(comm, &layout)); 215 PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2])); 216 PetscCall(PetscLayoutSetSize(layout, matproperties[0])); 217 PetscCall(PetscLayoutSetUp(layout)); 218 PetscCall(PetscLayoutGetLocalSize(layout, &m)); 219 PetscCall(PetscLayoutGetRange(layout, &rstart, &rend)); 220 221 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 222 /* copy over the matrix nonzero structure and values */ 223 if (pc) { 224 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 225 if (!PCMPIServerUseShmget) { 226 NZ = km->NZ; 227 NZdispl = km->NZdispl; 228 PetscCall(PetscLayoutGetRanges(layout, &range)); 229 for (i = 0; i < size; i++) { 230 PetscCall(PetscMPIIntCast(1 + range[i + 1] - range[i], &sendcounti[i])); 231 PetscCall(PetscMPIIntCast(IA[range[i + 1]] - IA[range[i]], &NZ[i])); 232 } 233 displi[0] = 0; 234 NZdispl[0] = 0; 235 for (j = 1; j < size; j++) { 236 displi[j] = displi[j - 1] + sendcounti[j - 1] - 1; 237 NZdispl[j] = NZdispl[j - 1] + NZ[j - 1]; 238 } 239 } 240 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 241 } 242 PetscCall(PetscLayoutDestroy(&layout)); 243 244 PetscCall(MatCreate(comm, &A)); 245 if (matproperties[7] > 0) { 246 PetscMPIInt ni; 247 248 PetscCall(PetscMPIIntCast(matproperties[7] + 1, &ni)); 249 if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix)); 250 PetscCallMPI(MPI_Bcast(cprefix, ni, MPI_CHAR, 0, comm)); 251 PetscCall(MatSetOptionsPrefix(A, cprefix)); 252 PetscCall(PetscFree(cprefix)); 253 } 254 PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_")); 255 PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1])); 256 PetscCall(MatSetType(A, MATMPIAIJ)); 257 258 if (!PCMPIServerUseShmget) { 259 PetscMPIInt in; 260 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 261 PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a)); 262 PetscCall(PetscMPIIntCast(n, &in)); 263 PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, (void *)ia, in + 1, MPIU_INT, 0, comm)); 264 PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, (void *)ja, nz, MPIU_INT, 0, comm)); 265 PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, (void *)a, nz, MPIU_SCALAR, 0, comm)); 266 } else { 267 const void *addr[3] = {(const void **)IA, (const void **)JA, (const void **)sa}; 268 PCMPIServerAddresses *addresses; 269 270 PetscCall(PetscNew(&addresses)); 271 addresses->n = 3; 272 PetscCall(PetscShmgetMapAddresses(comm, addresses->n, addr, addresses->addr)); 273 ia = rstart + (PetscInt *)addresses->addr[0]; 274 ja = ia[0] + (PetscInt *)addresses->addr[1]; 275 a = ia[0] + (PetscScalar *)addresses->addr[2]; 276 PetscCall(PetscObjectContainerCompose((PetscObject)A, "PCMPIServerAddresses", (void *)addresses, PCMPIServerAddressesDestroy)); 277 } 278 279 if (pc) { 280 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 281 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL)); 282 } 283 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 284 285 PetscCall(PetscLogStagePush(PCMPIStage)); 286 PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a)); 287 PetscCall(MatSetBlockSize(A, matproperties[2])); 288 289 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 290 if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE)); 291 if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE)); 292 if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE)); 293 294 if (!PCMPIServerUseShmget) PetscCall(PetscFree3(ia, ja, a)); 295 PetscCall(KSPSetOperators(ksp, A, A)); 296 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs)); 297 PetscCall(PetscLogStagePop()); 298 if (pc && !PCMPIServerUseShmget) { /* needed for scatterv/gatherv of rhs and solution */ 299 const PetscInt *range; 300 301 PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range)); 302 for (i = 0; i < size; i++) { 303 PetscCall(PetscMPIIntCast(range[i + 1] - range[i], &km->sendcount[i])); 304 PetscCall(PetscMPIIntCast(range[i], &km->displ[i])); 305 } 306 } 307 PetscCall(MatDestroy(&A)); 308 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 309 PetscCall(KSPSetFromOptions(ksp)); 310 PCMPIServerInSolve = PETSC_FALSE; 311 PetscFunctionReturn(PETSC_SUCCESS); 312 } 313 314 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 315 { 316 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 317 KSP ksp; 318 Mat sA, A; 319 MPI_Comm comm = PC_MPI_COMM_WORLD; 320 const PetscInt *ia, *IA; 321 const PetscScalar *a; 322 PetscCount nz; 323 const PetscScalar *sa = NULL; 324 PetscMPIInt size; 325 PetscInt rstart, matproperties[4] = {0, 0, 0, 0}; 326 327 PetscFunctionBegin; 328 if (pc) { 329 PetscCall(PCGetOperators(pc, &sA, &sA)); 330 PetscCall(MatSeqAIJGetArrayRead(sA, &sa)); 331 PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 332 } 333 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 334 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 335 PCMPIServerInSolve = PETSC_TRUE; 336 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 337 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 338 PetscCallMPI(MPI_Comm_size(comm, &size)); 339 PCMPIMatCounts[size - 1]++; 340 PetscCall(KSPGetOperators(ksp, NULL, &A)); 341 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 342 if (!PCMPIServerUseShmget) { 343 PetscMPIInt mpi_nz; 344 345 PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz)); 346 PetscCall(PetscMPIIntCast(nz, &mpi_nz)); 347 PetscCall(PetscMalloc1(nz, &a)); 348 PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, (void *)a, mpi_nz, MPIU_SCALAR, 0, comm)); 349 } else { 350 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 351 PCMPIServerAddresses *addresses; 352 PetscCall(PetscObjectContainerQuery((PetscObject)A, "PCMPIServerAddresses", (void **)&addresses)); 353 ia = rstart + (PetscInt *)addresses->addr[0]; 354 a = ia[0] + (PetscScalar *)addresses->addr[2]; 355 } 356 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 357 if (pc) { 358 PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric; 359 360 PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa)); 361 PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, NULL, NULL)); 362 363 PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric)); 364 matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2); 365 PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian)); 366 matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2); 367 PetscCall(MatIsSPDKnown(sA, &isset, &isspd)); 368 matproperties[2] = !isset ? 0 : (isspd ? 1 : 2); 369 PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric)); 370 matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2); 371 } 372 PetscCall(MatUpdateMPIAIJWithArray(A, a)); 373 if (!PCMPIServerUseShmget) PetscCall(PetscFree(a)); 374 PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm)); 375 /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */ 376 if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE)); 377 if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE)); 378 if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE)); 379 if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE)); 380 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 381 PCMPIServerInSolve = PETSC_FALSE; 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X) 386 { 387 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 388 KSP ksp; 389 MPI_Comm comm = PC_MPI_COMM_WORLD; 390 const PetscScalar *sb = NULL, *x; 391 PetscScalar *b, *sx = NULL; 392 PetscInt its, n; 393 PetscMPIInt size; 394 void *addr[2]; 395 396 PetscFunctionBegin; 397 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 398 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 399 PCMPIServerInSolve = PETSC_TRUE; 400 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 401 PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm)); 402 403 /* scatterv rhs */ 404 PetscCallMPI(MPI_Comm_size(comm, &size)); 405 if (pc) { 406 PetscInt N; 407 408 PCMPISolveCounts[size - 1]++; 409 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 410 PCMPISizes[size - 1] += N; 411 } 412 PetscCall(VecGetLocalSize(ksp->vec_rhs, &n)); 413 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 414 if (!PCMPIServerUseShmget) { 415 PetscMPIInt in; 416 417 PetscCall(VecGetArray(ksp->vec_rhs, &b)); 418 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 419 PetscCall(PetscMPIIntCast(n, &in)); 420 PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, in, MPIU_SCALAR, 0, comm)); 421 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 422 PetscCall(VecRestoreArray(ksp->vec_rhs, &b)); 423 // TODO: scatter initial guess if needed 424 } else { 425 PetscInt rstart; 426 427 if (pc) PetscCall(VecGetArrayRead(B, &sb)); 428 if (pc) PetscCall(VecGetArray(X, &sx)); 429 const void *inaddr[2] = {(const void **)sb, (const void **)sx}; 430 if (pc) PetscCall(VecRestoreArray(X, &sx)); 431 if (pc) PetscCall(VecRestoreArrayRead(B, &sb)); 432 433 PetscCall(PetscShmgetMapAddresses(comm, 2, inaddr, addr)); 434 PetscCall(VecGetOwnershipRange(ksp->vec_rhs, &rstart, NULL)); 435 PetscCall(VecPlaceArray(ksp->vec_rhs, rstart + (PetscScalar *)addr[0])); 436 PetscCall(VecPlaceArray(ksp->vec_sol, rstart + (PetscScalar *)addr[1])); 437 } 438 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 439 440 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 441 PetscCall(PetscLogStagePush(PCMPIStage)); 442 PetscCall(KSPSolve(ksp, NULL, NULL)); 443 PetscCall(PetscLogStagePop()); 444 PetscCall(PetscLogEventBegin(EventServerDist, NULL, NULL, NULL, NULL)); 445 PetscCall(KSPGetIterationNumber(ksp, &its)); 446 PCMPIIterations[size - 1] += its; 447 // TODO: send iterations up to outer KSP 448 449 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(2, addr)); 450 451 /* gather solution */ 452 PetscCall(PetscLogEventBegin(EventServerDistMPI, NULL, NULL, NULL, NULL)); 453 if (!PCMPIServerUseShmget) { 454 PetscMPIInt in; 455 456 PetscCall(VecGetArrayRead(ksp->vec_sol, &x)); 457 if (pc) PetscCall(VecGetArray(X, &sx)); 458 PetscCall(PetscMPIIntCast(n, &in)); 459 PetscCallMPI(MPI_Gatherv(x, in, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm)); 460 if (pc) PetscCall(VecRestoreArray(X, &sx)); 461 PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x)); 462 } else { 463 PetscCall(VecResetArray(ksp->vec_rhs)); 464 PetscCall(VecResetArray(ksp->vec_sol)); 465 } 466 PetscCall(PetscLogEventEnd(EventServerDistMPI, NULL, NULL, NULL, NULL)); 467 PetscCall(PetscLogEventEnd(EventServerDist, NULL, NULL, NULL, NULL)); 468 PCMPIServerInSolve = PETSC_FALSE; 469 PetscFunctionReturn(PETSC_SUCCESS); 470 } 471 472 static PetscErrorCode PCMPIDestroy(PC pc) 473 { 474 PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL; 475 KSP ksp; 476 MPI_Comm comm = PC_MPI_COMM_WORLD; 477 478 PetscFunctionBegin; 479 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 480 if (!ksp) PetscFunctionReturn(PETSC_SUCCESS); 481 PetscCall(PetscLogStagePush(PCMPIStage)); 482 PCMPIServerInSolve = PETSC_TRUE; 483 PetscCall(KSPDestroy(&ksp)); 484 PetscCall(PetscLogStagePop()); 485 PCMPIServerInSolve = PETSC_FALSE; 486 PetscFunctionReturn(PETSC_SUCCESS); 487 } 488 489 static PetscErrorCode PCMPIServerBroadcastRequest(PCMPICommand request) 490 { 491 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 492 PetscMPIInt dummy1 = 1, dummy2; 493 #endif 494 495 PetscFunctionBegin; 496 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 497 if (PCMPIServerUseShmget) { 498 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_unlock(&PCMPIServerLocks[i]); 499 } 500 #endif 501 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 502 /* next line ensures the sender has already taken the lock */ 503 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 504 if (PCMPIServerUseShmget) { 505 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 506 for (PetscMPIInt i = 1; i < PetscGlobalSize; i++) pthread_mutex_lock(&PCMPIServerLocks[i]); 507 } 508 #endif 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 /*@C 513 PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for 514 parallel `KSP` solves and management of parallel `KSP` objects. 515 516 Logically Collective on all MPI processes except rank 0 517 518 Options Database Keys: 519 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 520 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program 521 - -mpi_linear_solver_server_use_shared_memory - use shared memory when communicating matrices and vectors to server processes (default where supported) 522 523 Level: developer 524 525 Note: 526 This is normally started automatically in `PetscInitialize()` when the option is provided 527 528 See `PCMPI` for information on using the solver with a `KSP` object 529 530 Developer Notes: 531 When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program 532 written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks 533 (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server. 534 535 Can this be integrated into the `PetscDevice` abstraction that is currently being developed? 536 537 Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage 538 539 This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object 540 541 The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory 542 nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver. 543 544 The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on 545 all MPI processes but the user callback only runs on one MPI process per node. 546 547 PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove 548 the `MPI_Comm` argument from PETSc calls. 549 550 .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()` 551 @*/ 552 PetscErrorCode PCMPIServerBegin(void) 553 { 554 PetscMPIInt rank; 555 556 PetscFunctionBegin; 557 PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n")); 558 if (PetscDefined(USE_SINGLE_LIBRARY)) { 559 PetscCall(VecInitializePackage()); 560 PetscCall(MatInitializePackage()); 561 PetscCall(DMInitializePackage()); 562 PetscCall(PCInitializePackage()); 563 PetscCall(KSPInitializePackage()); 564 PetscCall(SNESInitializePackage()); 565 PetscCall(TSInitializePackage()); 566 PetscCall(TaoInitializePackage()); 567 } 568 PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage)); 569 PetscCall(PetscLogEventRegister("ServerDist", PC_CLASSID, &EventServerDist)); 570 PetscCall(PetscLogEventRegister("ServerDistMPI", PC_CLASSID, &EventServerDistMPI)); 571 572 if (!PetscDefined(HAVE_SHMGET)) PCMPIServerUseShmget = PETSC_FALSE; 573 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mpi_linear_solver_server_use_shared_memory", &PCMPIServerUseShmget, NULL)); 574 575 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank)); 576 if (PCMPIServerUseShmget) { 577 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 578 PetscMPIInt size; 579 580 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 581 if (size > 1) { 582 pthread_mutex_t *locks; 583 584 if (rank == 0) { 585 PCMPIServerActive = PETSC_TRUE; 586 PetscCall(PetscShmgetAllocateArray(size, sizeof(pthread_mutex_t), (void **)&locks)); 587 } 588 PetscCall(PetscShmgetMapAddresses(PETSC_COMM_WORLD, 1, (const void **)&locks, (void **)&PCMPIServerLocks)); 589 if (rank == 0) { 590 pthread_mutexattr_t attr; 591 592 pthread_mutexattr_init(&attr); 593 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 594 595 for (int i = 1; i < size; i++) { 596 pthread_mutex_init(&PCMPIServerLocks[i], &attr); 597 pthread_mutex_lock(&PCMPIServerLocks[i]); 598 } 599 } 600 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 601 } 602 #endif 603 } 604 if (rank == 0) { 605 PETSC_COMM_WORLD = PETSC_COMM_SELF; 606 PCMPIServerActive = PETSC_TRUE; 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 while (PETSC_TRUE) { 611 PCMPICommand request = PCMPI_CREATE; 612 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 613 PetscMPIInt dummy1 = 1, dummy2; 614 #endif 615 616 // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters? 617 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 618 if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); 619 #endif 620 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 621 #if defined(PETSC_HAVE_PTHREAD_MUTEX) 622 if (PCMPIServerUseShmget) { 623 /* next line ensures PetscGlobalRank has locked before rank 0 can take the lock back */ 624 PetscCallMPI(MPI_Reduce(&dummy1, &dummy2, 1, MPI_INT, MPI_SUM, 0, PC_MPI_COMM_WORLD)); 625 pthread_mutex_unlock(&PCMPIServerLocks[PetscGlobalRank]); 626 } 627 #endif 628 switch (request) { 629 case PCMPI_CREATE: 630 PetscCall(PCMPICreate(NULL)); 631 break; 632 case PCMPI_SET_MAT: 633 PetscCall(PCMPISetMat(NULL)); 634 break; 635 case PCMPI_UPDATE_MAT_VALUES: 636 PetscCall(PCMPIUpdateMatValues(NULL)); 637 break; 638 case PCMPI_VIEW: 639 // PetscCall(PCMPIView(NULL)); 640 break; 641 case PCMPI_SOLVE: 642 PetscCall(PCMPISolve(NULL, NULL, NULL)); 643 break; 644 case PCMPI_DESTROY: 645 PetscCall(PCMPIDestroy(NULL)); 646 break; 647 case PCMPI_EXIT: 648 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 649 PetscCall(PetscFinalize()); 650 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 651 break; 652 default: 653 break; 654 } 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /*@C 660 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 661 parallel KSP solves and management of parallel `KSP` objects. 662 663 Logically Collective on all MPI ranks except 0 664 665 Level: developer 666 667 Note: 668 This is normally called automatically in `PetscFinalize()` 669 670 .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()` 671 @*/ 672 PetscErrorCode PCMPIServerEnd(void) 673 { 674 PetscFunctionBegin; 675 if (PetscGlobalRank == 0) { 676 PetscViewer viewer = NULL; 677 PetscViewerFormat format; 678 679 PetscCall(PetscShmgetAddressesFinalize()); 680 PetscCall(PCMPIServerBroadcastRequest(PCMPI_EXIT)); 681 if (PCMPIServerUseShmget) PetscCall(PetscShmgetUnmapAddresses(1, (void **)&PCMPIServerLocks)); 682 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 683 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL); 684 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL)); 685 PetscOptionsEnd(); 686 if (viewer) { 687 PetscBool isascii; 688 689 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 690 if (isascii) { 691 PetscMPIInt size; 692 PetscMPIInt i; 693 694 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 695 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n")); 696 PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n")); 697 if (PCMPIKSPCountsSeq) { 698 PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq)); 699 } 700 for (i = 0; i < size; i++) { 701 if (PCMPIKSPCounts[i]) { 702 PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i])); 703 } 704 } 705 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server %susing shared memory\n", PCMPIServerUseShmget ? "" : "not ")); 706 } 707 PetscCall(PetscViewerDestroy(&viewer)); 708 } 709 } 710 PetscCall(PCMPICommsDestroy()); 711 PCMPIServerActive = PETSC_FALSE; 712 PetscFunctionReturn(PETSC_SUCCESS); 713 } 714 715 /* 716 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 717 because, for example, the problem is small. This version is more efficient because it does not require copying any data 718 */ 719 static PetscErrorCode PCSetUp_Seq(PC pc) 720 { 721 PC_MPI *km = (PC_MPI *)pc->data; 722 Mat sA; 723 const char *prefix; 724 char *found = NULL, *cprefix; 725 726 PetscFunctionBegin; 727 PCMPIServerInSolve = PETSC_TRUE; 728 PetscCall(PCGetOperators(pc, NULL, &sA)); 729 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 730 PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0])); 731 PetscCall(KSPSetNestLevel(km->ksps[0], 1)); 732 PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1)); 733 734 /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */ 735 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 736 PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix"); 737 PetscCall(PetscStrallocpy(prefix, &cprefix)); 738 PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found)); 739 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix"); 740 *found = 0; 741 PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix)); 742 PetscCall(PetscFree(cprefix)); 743 744 PetscCall(KSPSetOperators(km->ksps[0], sA, sA)); 745 PetscCall(KSPSetFromOptions(km->ksps[0])); 746 PetscCall(KSPSetUp(km->ksps[0])); 747 PetscCall(PetscInfo(pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n")); 748 PCMPIKSPCountsSeq++; 749 PCMPIServerInSolve = PETSC_FALSE; 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 753 static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x) 754 { 755 PC_MPI *km = (PC_MPI *)pc->data; 756 PetscInt its, n; 757 Mat A; 758 759 PetscFunctionBegin; 760 PCMPIServerInSolve = PETSC_TRUE; 761 PetscCall(KSPSolve(km->ksps[0], b, x)); 762 PetscCall(KSPGetIterationNumber(km->ksps[0], &its)); 763 PCMPISolveCountsSeq++; 764 PCMPIIterationsSeq += its; 765 PetscCall(KSPGetOperators(km->ksps[0], NULL, &A)); 766 PetscCall(MatGetSize(A, &n, NULL)); 767 PCMPISizesSeq += n; 768 PCMPIServerInSolve = PETSC_FALSE; 769 /* 770 do not keep reference to previous rhs and solution since destroying them in the next KSPSolve() 771 my use PetscFree() instead of PCMPIArrayDeallocate() 772 */ 773 PetscCall(VecDestroy(&km->ksps[0]->vec_rhs)); 774 PetscCall(VecDestroy(&km->ksps[0]->vec_sol)); 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer) 779 { 780 PC_MPI *km = (PC_MPI *)pc->data; 781 782 PetscFunctionBegin; 783 PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n")); 784 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 785 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n")); 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 static PetscErrorCode PCDestroy_Seq(PC pc) 790 { 791 PC_MPI *km = (PC_MPI *)pc->data; 792 Mat A, B; 793 Vec x, b; 794 795 PetscFunctionBegin; 796 PCMPIServerInSolve = PETSC_TRUE; 797 /* since matrices and vectors are shared with outer KSP we need to ensure they are not destroyed with PetscFree() */ 798 PetscCall(KSPGetOperators(km->ksps[0], &A, &B)); 799 PetscCall(PetscObjectReference((PetscObject)A)); 800 PetscCall(PetscObjectReference((PetscObject)B)); 801 PetscCall(KSPGetSolution(km->ksps[0], &x)); 802 PetscCall(PetscObjectReference((PetscObject)x)); 803 PetscCall(KSPGetRhs(km->ksps[0], &b)); 804 PetscCall(PetscObjectReference((PetscObject)b)); 805 PetscCall(KSPDestroy(&km->ksps[0])); 806 PetscCall(PetscFree(pc->data)); 807 PCMPIServerInSolve = PETSC_FALSE; 808 PetscCall(MatDestroy(&A)); 809 PetscCall(MatDestroy(&B)); 810 PetscCall(VecDestroy(&x)); 811 PetscCall(VecDestroy(&b)); 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 /* 816 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 817 right-hand side to the parallel PC 818 */ 819 static PetscErrorCode PCSetUp_MPI(PC pc) 820 { 821 PC_MPI *km = (PC_MPI *)pc->data; 822 PetscMPIInt rank, size; 823 PetscBool newmatrix = PETSC_FALSE; 824 825 PetscFunctionBegin; 826 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 827 PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 828 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 829 830 if (!pc->setupcalled) { 831 if (!km->alwaysuseserver) { 832 PetscInt n; 833 Mat sA; 834 /* short circuit for small systems */ 835 PetscCall(PCGetOperators(pc, &sA, &sA)); 836 PetscCall(MatGetSize(sA, &n, NULL)); 837 if (n < 2 * km->mincntperrank - 1 || size == 1) { 838 pc->ops->setup = NULL; 839 pc->ops->apply = PCApply_Seq; 840 pc->ops->destroy = PCDestroy_Seq; 841 pc->ops->view = PCView_Seq; 842 PetscCall(PCSetUp_Seq(pc)); 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 } 846 847 PetscCall(PCMPIServerBroadcastRequest(PCMPI_CREATE)); 848 PetscCall(PCMPICreate(pc)); 849 newmatrix = PETSC_TRUE; 850 } 851 if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 852 853 if (newmatrix) { 854 PetscCall(PetscInfo(pc, "New matrix or matrix has changed nonzero structure\n")); 855 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SET_MAT)); 856 PetscCall(PCMPISetMat(pc)); 857 } else { 858 PetscCall(PetscInfo(pc, "Matrix has only changed nonzero values\n")); 859 PetscCall(PCMPIServerBroadcastRequest(PCMPI_UPDATE_MAT_VALUES)); 860 PetscCall(PCMPIUpdateMatValues(pc)); 861 } 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x) 866 { 867 PetscFunctionBegin; 868 PetscCall(PCMPIServerBroadcastRequest(PCMPI_SOLVE)); 869 PetscCall(PCMPISolve(pc, b, x)); 870 PetscFunctionReturn(PETSC_SUCCESS); 871 } 872 873 static PetscErrorCode PCDestroy_MPI(PC pc) 874 { 875 PetscFunctionBegin; 876 PetscCall(PCMPIServerBroadcastRequest(PCMPI_DESTROY)); 877 PetscCall(PCMPIDestroy(pc)); 878 PetscCall(PetscFree(pc->data)); 879 PetscFunctionReturn(PETSC_SUCCESS); 880 } 881 882 /* 883 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer, use options database 884 */ 885 static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer) 886 { 887 PC_MPI *km = (PC_MPI *)pc->data; 888 MPI_Comm comm; 889 PetscMPIInt size; 890 891 PetscFunctionBegin; 892 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm)); 893 PetscCallMPI(MPI_Comm_size(comm, &size)); 894 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size)); 895 PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of matrix rows on each MPI process for MPI parallel solve %" PetscInt_FMT "\n", km->mincntperrank)); 896 PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to view statistics on all the solves ***\n")); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject) 901 { 902 PC_MPI *km = (PC_MPI *)pc->data; 903 904 PetscFunctionBegin; 905 PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options"); 906 PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL)); 907 PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL)); 908 PetscOptionsHeadEnd(); 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 /*MC 913 PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process 914 915 Options Database Keys for the Server: 916 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 917 . -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server 918 - -mpi_linear_solver_server_use_shared_memory <true, false> - use shared memory to distribute matrix and right hand side, defaults to true 919 920 Options Database Keys for a specific `KSP` object 921 + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 922 - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes) 923 924 Level: developer 925 926 Notes: 927 This cannot be used with vectors or matrices that are created using arrays provided by the user, such as `VecCreateWithArray()` or 928 `MatCreateSeqAIJWithArrays()` 929 930 The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used. 931 932 It can be particularly useful for user OpenMP code or potentially user GPU code. 933 934 When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side 935 and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly. 936 937 The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect 938 because they are not the actual solver objects. 939 940 When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other 941 stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading. 942 943 Developer Note: 944 This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of 945 a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user. 946 947 .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()` 948 M*/ 949 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 950 { 951 PC_MPI *km; 952 char *found = NULL; 953 954 PetscFunctionBegin; 955 PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found)); 956 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_"); 957 958 /* material from PCSetType() */ 959 PetscTryTypeMethod(pc, destroy); 960 pc->ops->destroy = NULL; 961 pc->data = NULL; 962 963 PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist)); 964 PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps))); 965 pc->modifysubmatrices = NULL; 966 pc->modifysubmatricesP = NULL; 967 pc->setupcalled = 0; 968 969 PetscCall(PetscNew(&km)); 970 pc->data = (void *)km; 971 972 km->mincntperrank = 10000; 973 974 pc->ops->setup = PCSetUp_MPI; 975 pc->ops->apply = PCApply_MPI; 976 pc->ops->destroy = PCDestroy_MPI; 977 pc->ops->view = PCView_MPI; 978 pc->ops->setfromoptions = PCSetFromOptions_MPI; 979 PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI)); 980 PetscFunctionReturn(PETSC_SUCCESS); 981 } 982 983 /*@ 984 PCMPIGetKSP - Gets the `KSP` created by the `PCMPI` 985 986 Not Collective 987 988 Input Parameter: 989 . pc - the preconditioner context 990 991 Output Parameter: 992 . innerksp - the inner `KSP` 993 994 Level: advanced 995 996 .seealso: [](ch_ksp), `KSP`, `PCMPI`, `PCREDISTRIBUTE` 997 @*/ 998 PetscErrorCode PCMPIGetKSP(PC pc, KSP *innerksp) 999 { 1000 PC_MPI *red = (PC_MPI *)pc->data; 1001 1002 PetscFunctionBegin; 1003 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1004 PetscAssertPointer(innerksp, 2); 1005 *innerksp = red->ksps[0]; 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008