1 /* 2 Defines a block Jacobi preconditioner. 3 */ 4 5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/ 6 7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat); 8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat); 9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC); 10 11 static PetscErrorCode PCSetUp_BJacobi(PC pc) 12 { 13 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 14 Mat mat = pc->mat, pmat = pc->pmat; 15 PetscBool hasop; 16 PetscInt N, M, start, i, sum, end; 17 PetscInt bs, i_start = -1, i_end = -1; 18 PetscMPIInt rank, size; 19 20 PetscFunctionBegin; 21 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 22 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 23 PetscCall(MatGetLocalSize(pc->pmat, &M, &N)); 24 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 25 26 if (jac->n > 0 && jac->n < size) { 27 PetscCall(PCSetUp_BJacobi_Multiproc(pc)); 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 /* Determines the number of blocks assigned to each processor */ 32 /* local block count given */ 33 if (jac->n_local > 0 && jac->n < 0) { 34 PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 35 if (jac->l_lens) { /* check that user set these correctly */ 36 sum = 0; 37 for (i = 0; i < jac->n_local; i++) { 38 PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout"); 39 sum += jac->l_lens[i]; 40 } 41 PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly"); 42 } else { 43 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 44 for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)); 45 } 46 } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */ 47 /* global blocks given: determine which ones are local */ 48 if (jac->g_lens) { 49 /* check if the g_lens is has valid entries */ 50 for (i = 0; i < jac->n; i++) { 51 PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed"); 52 PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout"); 53 } 54 if (size == 1) { 55 jac->n_local = jac->n; 56 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 57 PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local)); 58 /* check that user set these correctly */ 59 sum = 0; 60 for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i]; 61 PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly"); 62 } else { 63 PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end)); 64 /* loop over blocks determining first one owned by me */ 65 sum = 0; 66 for (i = 0; i < jac->n + 1; i++) { 67 if (sum == start) { 68 i_start = i; 69 goto start_1; 70 } 71 if (i < jac->n) sum += jac->g_lens[i]; 72 } 73 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 74 start_1: 75 for (i = i_start; i < jac->n + 1; i++) { 76 if (sum == end) { 77 i_end = i; 78 goto end_1; 79 } 80 if (i < jac->n) sum += jac->g_lens[i]; 81 } 82 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 83 end_1: 84 jac->n_local = i_end - i_start; 85 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 86 PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local)); 87 } 88 } else { /* no global blocks given, determine then using default layout */ 89 jac->n_local = jac->n / size + ((jac->n % size) > rank); 90 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 91 for (i = 0; i < jac->n_local; i++) { 92 jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs; 93 PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given"); 94 } 95 } 96 } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */ 97 jac->n = size; 98 jac->n_local = 1; 99 PetscCall(PetscMalloc1(1, &jac->l_lens)); 100 jac->l_lens[0] = M; 101 } else { /* jac->n > 0 && jac->n_local > 0 */ 102 if (!jac->l_lens) { 103 PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens)); 104 for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)); 105 } 106 } 107 PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors"); 108 109 /* Determines mat and pmat */ 110 PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop)); 111 if (!hasop && size == 1) { 112 mat = pc->mat; 113 pmat = pc->pmat; 114 } else { 115 if (pc->useAmat) { 116 /* use block from Amat matrix, not Pmat for local MatMult() */ 117 PetscCall(MatGetDiagonalBlock(pc->mat, &mat)); 118 } 119 if (pc->pmat != pc->mat || !pc->useAmat) { 120 PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat)); 121 } else pmat = mat; 122 } 123 124 /* 125 Setup code depends on the number of blocks 126 */ 127 if (jac->n_local == 1) { 128 PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat)); 129 } else { 130 PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat)); 131 } 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 /* Default destroy, if it has never been setup */ 136 static PetscErrorCode PCDestroy_BJacobi(PC pc) 137 { 138 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 139 140 PetscFunctionBegin; 141 PetscCall(PetscFree(jac->g_lens)); 142 PetscCall(PetscFree(jac->l_lens)); 143 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL)); 144 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL)); 145 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL)); 146 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL)); 147 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL)); 148 PetscCall(PetscFree(pc->data)); 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject) 153 { 154 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 155 PetscInt blocks, i; 156 PetscBool flg; 157 158 PetscFunctionBegin; 159 PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options"); 160 PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg)); 161 if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL)); 162 PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg)); 163 if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL)); 164 if (jac->ksp) { 165 /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called 166 * unless we had already been called. */ 167 for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i])); 168 } 169 PetscOptionsHeadEnd(); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 #include <petscdraw.h> 174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer) 175 { 176 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 177 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 178 PetscMPIInt rank; 179 PetscInt i; 180 PetscBool iascii, isstring, isdraw; 181 PetscViewer sviewer; 182 PetscViewerFormat format; 183 const char *prefix; 184 185 PetscFunctionBegin; 186 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 187 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 188 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 189 if (iascii) { 190 if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, " using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n)); 191 PetscCall(PetscViewerASCIIPrintf(viewer, " number of blocks = %" PetscInt_FMT "\n", jac->n)); 192 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 193 PetscCall(PetscViewerGetFormat(viewer, &format)); 194 if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) { 195 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for first block is in the following KSP and PC objects on rank 0:\n")); 196 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 197 PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : "")); 198 if (jac->ksp && !jac->psubcomm) { 199 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 200 if (rank == 0) { 201 PetscCall(PetscViewerASCIIPushTab(sviewer)); 202 PetscCall(KSPView(jac->ksp[0], sviewer)); 203 PetscCall(PetscViewerASCIIPopTab(sviewer)); 204 } 205 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 206 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 207 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 208 } else if (mpjac && jac->ksp && mpjac->psubcomm) { 209 PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer)); 210 if (!mpjac->psubcomm->color) { 211 PetscCall(PetscViewerASCIIPushTab(sviewer)); 212 PetscCall(KSPView(*jac->ksp, sviewer)); 213 PetscCall(PetscViewerASCIIPopTab(sviewer)); 214 } 215 PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer)); 216 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 217 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 218 } 219 } else { 220 PetscInt n_global; 221 PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 222 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 223 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for each block is in the following KSP and PC objects:\n")); 224 PetscCall(PetscViewerASCIIPushTab(viewer)); 225 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 226 PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local)); 227 for (i = 0; i < jac->n_local; i++) { 228 PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i)); 229 PetscCall(KSPView(jac->ksp[i], sviewer)); 230 PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n")); 231 } 232 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 233 PetscCall(PetscViewerASCIIPopTab(viewer)); 234 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 235 } 236 } else if (isstring) { 237 PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n)); 238 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 239 if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer)); 240 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 241 } else if (isdraw) { 242 PetscDraw draw; 243 char str[25]; 244 PetscReal x, y, bottom, h; 245 246 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 247 PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y)); 248 PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n)); 249 PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h)); 250 bottom = y - h; 251 PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom)); 252 /* warning the communicator on viewer is different then on ksp in parallel */ 253 if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer)); 254 PetscCall(PetscDrawPopCurrentPoint(draw)); 255 } 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp) 260 { 261 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 262 263 PetscFunctionBegin; 264 PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first"); 265 266 if (n_local) *n_local = jac->n_local; 267 if (first_local) *first_local = jac->first_local; 268 if (ksp) *ksp = jac->ksp; 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens) 273 { 274 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 275 276 PetscFunctionBegin; 277 PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called"); 278 jac->n = blocks; 279 if (!lens) jac->g_lens = NULL; 280 else { 281 PetscCall(PetscMalloc1(blocks, &jac->g_lens)); 282 PetscCall(PetscArraycpy(jac->g_lens, lens, blocks)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 288 { 289 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 290 291 PetscFunctionBegin; 292 *blocks = jac->n; 293 if (lens) *lens = jac->g_lens; 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[]) 298 { 299 PC_BJacobi *jac; 300 301 PetscFunctionBegin; 302 jac = (PC_BJacobi *)pc->data; 303 304 jac->n_local = blocks; 305 if (!lens) jac->l_lens = NULL; 306 else { 307 PetscCall(PetscMalloc1(blocks, &jac->l_lens)); 308 PetscCall(PetscArraycpy(jac->l_lens, lens, blocks)); 309 } 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 314 { 315 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 316 317 PetscFunctionBegin; 318 *blocks = jac->n_local; 319 if (lens) *lens = jac->l_lens; 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 /*@C 324 PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on 325 this processor. 326 327 Not Collective 328 329 Input Parameter: 330 . pc - the preconditioner context 331 332 Output Parameters: 333 + n_local - the number of blocks on this processor, or NULL 334 . first_local - the global number of the first block on this processor, or NULL 335 - ksp - the array of KSP contexts 336 337 Level: advanced 338 339 Notes: 340 After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed. 341 342 Currently for some matrix implementations only 1 block per processor 343 is supported. 344 345 You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`. 346 347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()` 348 @*/ 349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[]) 350 { 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 353 PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /*@ 358 PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block 359 Jacobi preconditioner. 360 361 Collective 362 363 Input Parameters: 364 + pc - the preconditioner context 365 . blocks - the number of blocks 366 - lens - [optional] integer array containing the size of each block 367 368 Options Database Key: 369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks 370 371 Level: intermediate 372 373 Note: 374 Currently only a limited number of blocking configurations are supported. 375 All processors sharing the `PC` must call this routine with the same data. 376 377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()` 378 @*/ 379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) 380 { 381 PetscFunctionBegin; 382 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 383 PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks"); 384 PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 /*@C 389 PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block 390 Jacobi, `PCBJACOBI`, preconditioner. 391 392 Not Collective 393 394 Input Parameter: 395 . pc - the preconditioner context 396 397 Output Parameters: 398 + blocks - the number of blocks 399 - lens - integer array containing the size of each block 400 401 Level: intermediate 402 403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()` 404 @*/ 405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 406 { 407 PetscFunctionBegin; 408 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 409 PetscAssertPointer(blocks, 2); 410 PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens)); 411 PetscFunctionReturn(PETSC_SUCCESS); 412 } 413 414 /*@ 415 PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block 416 Jacobi, `PCBJACOBI`, preconditioner. 417 418 Not Collective 419 420 Input Parameters: 421 + pc - the preconditioner context 422 . blocks - the number of blocks 423 - lens - [optional] integer array containing size of each block 424 425 Options Database Key: 426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks 427 428 Level: intermediate 429 430 Note: 431 Currently only a limited number of blocking configurations are supported. 432 433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()` 434 @*/ 435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[]) 436 { 437 PetscFunctionBegin; 438 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 439 PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks"); 440 PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens)); 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 444 /*@C 445 PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block 446 Jacobi, `PCBJACOBI`, preconditioner. 447 448 Not Collective 449 450 Input Parameters: 451 + pc - the preconditioner context 452 . blocks - the number of blocks 453 - lens - [optional] integer array containing size of each block 454 455 Level: intermediate 456 457 Note: 458 Currently only a limited number of blocking configurations are supported. 459 460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()` 461 @*/ 462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 463 { 464 PetscFunctionBegin; 465 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 466 PetscAssertPointer(blocks, 2); 467 PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens)); 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 /*MC 472 PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with 473 its own `KSP` object. 474 475 Options Database Keys: 476 + -pc_use_amat - use Amat to apply block of operator in inner Krylov method 477 - -pc_bjacobi_blocks <n> - use n total blocks 478 479 Level: beginner 480 481 Notes: 482 See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block 483 484 Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor. 485 486 To set options on the solvers for each block append -sub_ to all the `KSP` and `PC` 487 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 488 489 To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()` 490 and set the options directly on the resulting `KSP` object (you can access its `PC` 491 `KSPGetPC()`) 492 493 For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best 494 performance. Different block partitioning may lead to additional data transfers 495 between host and GPU that lead to degraded performance. 496 497 When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes. 498 499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`, 500 `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`, 501 `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI` 502 M*/ 503 504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc) 505 { 506 PetscMPIInt rank; 507 PC_BJacobi *jac; 508 509 PetscFunctionBegin; 510 PetscCall(PetscNew(&jac)); 511 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 512 513 pc->ops->apply = NULL; 514 pc->ops->matapply = NULL; 515 pc->ops->applytranspose = NULL; 516 pc->ops->matapplytranspose = NULL; 517 pc->ops->setup = PCSetUp_BJacobi; 518 pc->ops->destroy = PCDestroy_BJacobi; 519 pc->ops->setfromoptions = PCSetFromOptions_BJacobi; 520 pc->ops->view = PCView_BJacobi; 521 pc->ops->applyrichardson = NULL; 522 523 pc->data = (void *)jac; 524 jac->n = -1; 525 jac->n_local = -1; 526 jac->first_local = rank; 527 jac->ksp = NULL; 528 jac->g_lens = NULL; 529 jac->l_lens = NULL; 530 jac->psubcomm = NULL; 531 532 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi)); 533 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi)); 534 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi)); 535 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi)); 536 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi)); 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 /* 541 These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI 542 */ 543 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc) 544 { 545 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 546 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 547 548 PetscFunctionBegin; 549 PetscCall(KSPReset(jac->ksp[0])); 550 PetscCall(VecDestroy(&bjac->x)); 551 PetscCall(VecDestroy(&bjac->y)); 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc) 556 { 557 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 558 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 559 560 PetscFunctionBegin; 561 PetscCall(PCReset_BJacobi_Singleblock(pc)); 562 PetscCall(KSPDestroy(&jac->ksp[0])); 563 PetscCall(PetscFree(jac->ksp)); 564 PetscCall(PetscFree(bjac)); 565 PetscCall(PCDestroy_BJacobi(pc)); 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc) 570 { 571 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 572 KSP subksp = jac->ksp[0]; 573 KSPConvergedReason reason; 574 575 PetscFunctionBegin; 576 PetscCall(KSPSetUp(subksp)); 577 PetscCall(KSPGetConvergedReason(subksp, &reason)); 578 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 579 PetscFunctionReturn(PETSC_SUCCESS); 580 } 581 582 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y) 583 { 584 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 585 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 586 587 PetscFunctionBegin; 588 PetscCall(VecGetLocalVectorRead(x, bjac->x)); 589 PetscCall(VecGetLocalVector(y, bjac->y)); 590 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 591 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 592 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 593 PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner)); 594 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0)); 595 PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y)); 596 PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y)); 597 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0)); 598 PetscCall(VecRestoreLocalVectorRead(x, bjac->x)); 599 PetscCall(VecRestoreLocalVector(y, bjac->y)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode PCMatApply_BJacobi_Singleblock_Private(PC pc, Mat X, Mat Y, PetscBool transpose) 604 { 605 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 606 Mat sX, sY; 607 608 PetscFunctionBegin; 609 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 610 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 611 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 612 PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner)); 613 PetscCall(MatDenseGetLocalMatrix(X, &sX)); 614 PetscCall(MatDenseGetLocalMatrix(Y, &sY)); 615 if (!transpose) PetscCall(KSPMatSolve(jac->ksp[0], sX, sY)); 616 else PetscCall(KSPMatSolveTranspose(jac->ksp[0], sX, sY)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y) 621 { 622 PetscFunctionBegin; 623 PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_FALSE)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode PCMatApplyTranspose_BJacobi_Singleblock(PC pc, Mat X, Mat Y) 628 { 629 PetscFunctionBegin; 630 PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_TRUE)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y) 635 { 636 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 637 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 638 PetscScalar *y_array; 639 const PetscScalar *x_array; 640 PC subpc; 641 642 PetscFunctionBegin; 643 /* 644 The VecPlaceArray() is to avoid having to copy the 645 y vector into the bjac->x vector. The reason for 646 the bjac->x vector is that we need a sequential vector 647 for the sequential solve. 648 */ 649 PetscCall(VecGetArrayRead(x, &x_array)); 650 PetscCall(VecGetArray(y, &y_array)); 651 PetscCall(VecPlaceArray(bjac->x, x_array)); 652 PetscCall(VecPlaceArray(bjac->y, y_array)); 653 /* apply the symmetric left portion of the inner PC operator */ 654 /* note this bypasses the inner KSP and its options completely */ 655 PetscCall(KSPGetPC(jac->ksp[0], &subpc)); 656 PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y)); 657 PetscCall(VecResetArray(bjac->x)); 658 PetscCall(VecResetArray(bjac->y)); 659 PetscCall(VecRestoreArrayRead(x, &x_array)); 660 PetscCall(VecRestoreArray(y, &y_array)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y) 665 { 666 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 667 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 668 PetscScalar *y_array; 669 const PetscScalar *x_array; 670 PC subpc; 671 672 PetscFunctionBegin; 673 /* 674 The VecPlaceArray() is to avoid having to copy the 675 y vector into the bjac->x vector. The reason for 676 the bjac->x vector is that we need a sequential vector 677 for the sequential solve. 678 */ 679 PetscCall(VecGetArrayRead(x, &x_array)); 680 PetscCall(VecGetArray(y, &y_array)); 681 PetscCall(VecPlaceArray(bjac->x, x_array)); 682 PetscCall(VecPlaceArray(bjac->y, y_array)); 683 684 /* apply the symmetric right portion of the inner PC operator */ 685 /* note this bypasses the inner KSP and its options completely */ 686 687 PetscCall(KSPGetPC(jac->ksp[0], &subpc)); 688 PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y)); 689 690 PetscCall(VecResetArray(bjac->x)); 691 PetscCall(VecResetArray(bjac->y)); 692 PetscCall(VecRestoreArrayRead(x, &x_array)); 693 PetscCall(VecRestoreArray(y, &y_array)); 694 PetscFunctionReturn(PETSC_SUCCESS); 695 } 696 697 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y) 698 { 699 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 700 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data; 701 PetscScalar *y_array; 702 const PetscScalar *x_array; 703 704 PetscFunctionBegin; 705 /* 706 The VecPlaceArray() is to avoid having to copy the 707 y vector into the bjac->x vector. The reason for 708 the bjac->x vector is that we need a sequential vector 709 for the sequential solve. 710 */ 711 PetscCall(VecGetArrayRead(x, &x_array)); 712 PetscCall(VecGetArray(y, &y_array)); 713 PetscCall(VecPlaceArray(bjac->x, x_array)); 714 PetscCall(VecPlaceArray(bjac->y, y_array)); 715 PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0)); 716 PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y)); 717 PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y)); 718 PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0)); 719 PetscCall(VecResetArray(bjac->x)); 720 PetscCall(VecResetArray(bjac->y)); 721 PetscCall(VecRestoreArrayRead(x, &x_array)); 722 PetscCall(VecRestoreArray(y, &y_array)); 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat) 727 { 728 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 729 PetscInt m; 730 KSP ksp; 731 PC_BJacobi_Singleblock *bjac; 732 PetscBool wasSetup = PETSC_TRUE; 733 VecType vectype; 734 const char *prefix; 735 736 PetscFunctionBegin; 737 if (!pc->setupcalled) { 738 if (!jac->ksp) { 739 PetscInt nestlevel; 740 741 wasSetup = PETSC_FALSE; 742 743 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 744 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 745 PetscCall(KSPSetNestLevel(ksp, nestlevel + 1)); 746 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 747 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 748 PetscCall(KSPSetType(ksp, KSPPREONLY)); 749 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 750 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 751 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 752 753 pc->ops->reset = PCReset_BJacobi_Singleblock; 754 pc->ops->destroy = PCDestroy_BJacobi_Singleblock; 755 pc->ops->apply = PCApply_BJacobi_Singleblock; 756 pc->ops->matapply = PCMatApply_BJacobi_Singleblock; 757 pc->ops->matapplytranspose = PCMatApplyTranspose_BJacobi_Singleblock; 758 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Singleblock; 759 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock; 760 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Singleblock; 761 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Singleblock; 762 763 PetscCall(PetscMalloc1(1, &jac->ksp)); 764 jac->ksp[0] = ksp; 765 766 PetscCall(PetscNew(&bjac)); 767 jac->data = (void *)bjac; 768 } else { 769 ksp = jac->ksp[0]; 770 bjac = (PC_BJacobi_Singleblock *)jac->data; 771 } 772 773 /* 774 The reason we need to generate these vectors is to serve 775 as the right-hand side and solution vector for the solve on the 776 block. We do not need to allocate space for the vectors since 777 that is provided via VecPlaceArray() just before the call to 778 KSPSolve() on the block. 779 */ 780 PetscCall(MatGetSize(pmat, &m, &m)); 781 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x)); 782 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y)); 783 PetscCall(MatGetVecType(pmat, &vectype)); 784 PetscCall(VecSetType(bjac->x, vectype)); 785 PetscCall(VecSetType(bjac->y, vectype)); 786 } else { 787 ksp = jac->ksp[0]; 788 bjac = (PC_BJacobi_Singleblock *)jac->data; 789 } 790 PetscCall(KSPGetOptionsPrefix(ksp, &prefix)); 791 if (pc->useAmat) { 792 PetscCall(KSPSetOperators(ksp, mat, pmat)); 793 PetscCall(MatSetOptionsPrefix(mat, prefix)); 794 } else { 795 PetscCall(KSPSetOperators(ksp, pmat, pmat)); 796 } 797 PetscCall(MatSetOptionsPrefix(pmat, prefix)); 798 if (!wasSetup && pc->setfromoptionscalled) { 799 /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */ 800 PetscCall(KSPSetFromOptions(ksp)); 801 } 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc) 806 { 807 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 808 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 809 PetscInt i; 810 811 PetscFunctionBegin; 812 if (bjac && bjac->pmat) { 813 PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat)); 814 if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat)); 815 } 816 817 for (i = 0; i < jac->n_local; i++) { 818 PetscCall(KSPReset(jac->ksp[i])); 819 if (bjac && bjac->x) { 820 PetscCall(VecDestroy(&bjac->x[i])); 821 PetscCall(VecDestroy(&bjac->y[i])); 822 PetscCall(ISDestroy(&bjac->is[i])); 823 } 824 } 825 PetscCall(PetscFree(jac->l_lens)); 826 PetscCall(PetscFree(jac->g_lens)); 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc) 831 { 832 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 833 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 834 PetscInt i; 835 836 PetscFunctionBegin; 837 PetscCall(PCReset_BJacobi_Multiblock(pc)); 838 if (bjac) { 839 PetscCall(PetscFree2(bjac->x, bjac->y)); 840 PetscCall(PetscFree(bjac->starts)); 841 PetscCall(PetscFree(bjac->is)); 842 } 843 PetscCall(PetscFree(jac->data)); 844 for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i])); 845 PetscCall(PetscFree(jac->ksp)); 846 PetscCall(PCDestroy_BJacobi(pc)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc) 851 { 852 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 853 PetscInt i, n_local = jac->n_local; 854 KSPConvergedReason reason; 855 856 PetscFunctionBegin; 857 for (i = 0; i < n_local; i++) { 858 PetscCall(KSPSetUp(jac->ksp[i])); 859 PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason)); 860 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 861 } 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y) 866 { 867 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 868 PetscInt i, n_local = jac->n_local; 869 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 870 PetscScalar *yin; 871 const PetscScalar *xin; 872 873 PetscFunctionBegin; 874 PetscCall(VecGetArrayRead(x, &xin)); 875 PetscCall(VecGetArray(y, &yin)); 876 for (i = 0; i < n_local; i++) { 877 /* 878 To avoid copying the subvector from x into a workspace we instead 879 make the workspace vector array point to the subpart of the array of 880 the global vector. 881 */ 882 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 883 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 884 885 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 886 PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i])); 887 PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i])); 888 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 889 890 PetscCall(VecResetArray(bjac->x[i])); 891 PetscCall(VecResetArray(bjac->y[i])); 892 } 893 PetscCall(VecRestoreArrayRead(x, &xin)); 894 PetscCall(VecRestoreArray(y, &yin)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y) 899 { 900 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 901 PetscInt i, n_local = jac->n_local; 902 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 903 PetscScalar *yin; 904 const PetscScalar *xin; 905 PC subpc; 906 907 PetscFunctionBegin; 908 PetscCall(VecGetArrayRead(x, &xin)); 909 PetscCall(VecGetArray(y, &yin)); 910 for (i = 0; i < n_local; i++) { 911 /* 912 To avoid copying the subvector from x into a workspace we instead 913 make the workspace vector array point to the subpart of the array of 914 the global vector. 915 */ 916 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 917 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 918 919 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 920 /* apply the symmetric left portion of the inner PC operator */ 921 /* note this bypasses the inner KSP and its options completely */ 922 PetscCall(KSPGetPC(jac->ksp[i], &subpc)); 923 PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i])); 924 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 925 926 PetscCall(VecResetArray(bjac->x[i])); 927 PetscCall(VecResetArray(bjac->y[i])); 928 } 929 PetscCall(VecRestoreArrayRead(x, &xin)); 930 PetscCall(VecRestoreArray(y, &yin)); 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y) 935 { 936 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 937 PetscInt i, n_local = jac->n_local; 938 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 939 PetscScalar *yin; 940 const PetscScalar *xin; 941 PC subpc; 942 943 PetscFunctionBegin; 944 PetscCall(VecGetArrayRead(x, &xin)); 945 PetscCall(VecGetArray(y, &yin)); 946 for (i = 0; i < n_local; i++) { 947 /* 948 To avoid copying the subvector from x into a workspace we instead 949 make the workspace vector array point to the subpart of the array of 950 the global vector. 951 */ 952 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 953 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 954 955 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 956 /* apply the symmetric left portion of the inner PC operator */ 957 /* note this bypasses the inner KSP and its options completely */ 958 PetscCall(KSPGetPC(jac->ksp[i], &subpc)); 959 PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i])); 960 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 961 962 PetscCall(VecResetArray(bjac->x[i])); 963 PetscCall(VecResetArray(bjac->y[i])); 964 } 965 PetscCall(VecRestoreArrayRead(x, &xin)); 966 PetscCall(VecRestoreArray(y, &yin)); 967 PetscFunctionReturn(PETSC_SUCCESS); 968 } 969 970 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y) 971 { 972 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 973 PetscInt i, n_local = jac->n_local; 974 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 975 PetscScalar *yin; 976 const PetscScalar *xin; 977 978 PetscFunctionBegin; 979 PetscCall(VecGetArrayRead(x, &xin)); 980 PetscCall(VecGetArray(y, &yin)); 981 for (i = 0; i < n_local; i++) { 982 /* 983 To avoid copying the subvector from x into a workspace we instead 984 make the workspace vector array point to the subpart of the array of 985 the global vector. 986 */ 987 PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i])); 988 PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i])); 989 990 PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 991 PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i])); 992 PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i])); 993 PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0)); 994 995 PetscCall(VecResetArray(bjac->x[i])); 996 PetscCall(VecResetArray(bjac->y[i])); 997 } 998 PetscCall(VecRestoreArrayRead(x, &xin)); 999 PetscCall(VecRestoreArray(y, &yin)); 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat) 1004 { 1005 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1006 PetscInt m, n_local, N, M, start, i; 1007 const char *prefix; 1008 KSP ksp; 1009 Vec x, y; 1010 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data; 1011 PC subpc; 1012 IS is; 1013 MatReuse scall; 1014 VecType vectype; 1015 MatNullSpace *nullsp_mat = NULL, *nullsp_pmat = NULL; 1016 1017 PetscFunctionBegin; 1018 PetscCall(MatGetLocalSize(pc->pmat, &M, &N)); 1019 1020 n_local = jac->n_local; 1021 1022 if (pc->useAmat) { 1023 PetscBool same; 1024 PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same)); 1025 PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type"); 1026 } 1027 1028 if (!pc->setupcalled) { 1029 PetscInt nestlevel; 1030 1031 scall = MAT_INITIAL_MATRIX; 1032 1033 if (!jac->ksp) { 1034 pc->ops->reset = PCReset_BJacobi_Multiblock; 1035 pc->ops->destroy = PCDestroy_BJacobi_Multiblock; 1036 pc->ops->apply = PCApply_BJacobi_Multiblock; 1037 pc->ops->matapply = NULL; 1038 pc->ops->matapplytranspose = NULL; 1039 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Multiblock; 1040 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock; 1041 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Multiblock; 1042 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock; 1043 1044 PetscCall(PetscNew(&bjac)); 1045 PetscCall(PetscMalloc1(n_local, &jac->ksp)); 1046 PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y)); 1047 PetscCall(PetscMalloc1(n_local, &bjac->starts)); 1048 1049 jac->data = (void *)bjac; 1050 PetscCall(PetscMalloc1(n_local, &bjac->is)); 1051 1052 for (i = 0; i < n_local; i++) { 1053 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 1054 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 1055 PetscCall(KSPSetNestLevel(ksp, nestlevel + 1)); 1056 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 1057 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 1058 PetscCall(KSPSetType(ksp, KSPPREONLY)); 1059 PetscCall(KSPGetPC(ksp, &subpc)); 1060 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1061 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 1062 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 1063 1064 jac->ksp[i] = ksp; 1065 } 1066 } else { 1067 bjac = (PC_BJacobi_Multiblock *)jac->data; 1068 } 1069 1070 start = 0; 1071 PetscCall(MatGetVecType(pmat, &vectype)); 1072 for (i = 0; i < n_local; i++) { 1073 m = jac->l_lens[i]; 1074 /* 1075 The reason we need to generate these vectors is to serve 1076 as the right-hand side and solution vector for the solve on the 1077 block. We do not need to allocate space for the vectors since 1078 that is provided via VecPlaceArray() just before the call to 1079 KSPSolve() on the block. 1080 1081 */ 1082 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x)); 1083 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y)); 1084 PetscCall(VecSetType(x, vectype)); 1085 PetscCall(VecSetType(y, vectype)); 1086 1087 bjac->x[i] = x; 1088 bjac->y[i] = y; 1089 bjac->starts[i] = start; 1090 1091 PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is)); 1092 bjac->is[i] = is; 1093 1094 start += m; 1095 } 1096 } else { 1097 bjac = (PC_BJacobi_Multiblock *)jac->data; 1098 /* 1099 Destroy the blocks from the previous iteration 1100 */ 1101 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1102 PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat)); 1103 PetscCall(MatDestroyMatrices(n_local, &bjac->pmat)); 1104 if (pc->useAmat) { 1105 PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat)); 1106 PetscCall(MatDestroyMatrices(n_local, &bjac->mat)); 1107 } 1108 scall = MAT_INITIAL_MATRIX; 1109 } else scall = MAT_REUSE_MATRIX; 1110 } 1111 1112 PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat)); 1113 if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat)); 1114 if (pc->useAmat) { 1115 PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat)); 1116 if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat)); 1117 } 1118 /* Return control to the user so that the submatrices can be modified (e.g., to apply 1119 different boundary conditions for the submatrices than for the global problem) */ 1120 PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP)); 1121 1122 for (i = 0; i < n_local; i++) { 1123 PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix)); 1124 if (pc->useAmat) { 1125 PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i])); 1126 PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix)); 1127 } else { 1128 PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i])); 1129 } 1130 PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix)); 1131 if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i])); 1132 } 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 /* 1137 These are for a single block with multiple processes 1138 */ 1139 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc) 1140 { 1141 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1142 KSP subksp = jac->ksp[0]; 1143 KSPConvergedReason reason; 1144 1145 PetscFunctionBegin; 1146 PetscCall(KSPSetUp(subksp)); 1147 PetscCall(KSPGetConvergedReason(subksp, &reason)); 1148 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc) 1153 { 1154 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1155 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1156 1157 PetscFunctionBegin; 1158 PetscCall(VecDestroy(&mpjac->ysub)); 1159 PetscCall(VecDestroy(&mpjac->xsub)); 1160 PetscCall(MatDestroy(&mpjac->submats)); 1161 if (jac->ksp) PetscCall(KSPReset(jac->ksp[0])); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc) 1166 { 1167 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1168 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(PCReset_BJacobi_Multiproc(pc)); 1172 PetscCall(KSPDestroy(&jac->ksp[0])); 1173 PetscCall(PetscFree(jac->ksp)); 1174 PetscCall(PetscSubcommDestroy(&mpjac->psubcomm)); 1175 1176 PetscCall(PetscFree(mpjac)); 1177 PetscCall(PCDestroy_BJacobi(pc)); 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y) 1182 { 1183 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1184 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1185 PetscScalar *yarray; 1186 const PetscScalar *xarray; 1187 KSPConvergedReason reason; 1188 1189 PetscFunctionBegin; 1190 /* place x's and y's local arrays into xsub and ysub */ 1191 PetscCall(VecGetArrayRead(x, &xarray)); 1192 PetscCall(VecGetArray(y, &yarray)); 1193 PetscCall(VecPlaceArray(mpjac->xsub, xarray)); 1194 PetscCall(VecPlaceArray(mpjac->ysub, yarray)); 1195 1196 /* apply preconditioner on each matrix block */ 1197 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0)); 1198 PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub)); 1199 PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub)); 1200 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0)); 1201 PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason)); 1202 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1203 1204 PetscCall(VecResetArray(mpjac->xsub)); 1205 PetscCall(VecResetArray(mpjac->ysub)); 1206 PetscCall(VecRestoreArrayRead(x, &xarray)); 1207 PetscCall(VecRestoreArray(y, &yarray)); 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y) 1212 { 1213 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1214 KSPConvergedReason reason; 1215 Mat sX, sY; 1216 const PetscScalar *x; 1217 PetscScalar *y; 1218 PetscInt m, N, lda, ldb; 1219 1220 PetscFunctionBegin; 1221 /* apply preconditioner on each matrix block */ 1222 PetscCall(MatGetLocalSize(X, &m, NULL)); 1223 PetscCall(MatGetSize(X, NULL, &N)); 1224 PetscCall(MatDenseGetLDA(X, &lda)); 1225 PetscCall(MatDenseGetLDA(Y, &ldb)); 1226 PetscCall(MatDenseGetArrayRead(X, &x)); 1227 PetscCall(MatDenseGetArrayWrite(Y, &y)); 1228 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX)); 1229 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY)); 1230 PetscCall(MatDenseSetLDA(sX, lda)); 1231 PetscCall(MatDenseSetLDA(sY, ldb)); 1232 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0)); 1233 PetscCall(KSPMatSolve(jac->ksp[0], sX, sY)); 1234 PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL)); 1235 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0)); 1236 PetscCall(MatDestroy(&sY)); 1237 PetscCall(MatDestroy(&sX)); 1238 PetscCall(MatDenseRestoreArrayWrite(Y, &y)); 1239 PetscCall(MatDenseRestoreArrayRead(X, &x)); 1240 PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason)); 1241 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 1242 PetscFunctionReturn(PETSC_SUCCESS); 1243 } 1244 1245 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc) 1246 { 1247 PC_BJacobi *jac = (PC_BJacobi *)pc->data; 1248 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data; 1249 PetscInt m, n; 1250 MPI_Comm comm, subcomm = 0; 1251 const char *prefix; 1252 PetscBool wasSetup = PETSC_TRUE; 1253 VecType vectype; 1254 1255 PetscFunctionBegin; 1256 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 1257 PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported"); 1258 jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */ 1259 if (!pc->setupcalled) { 1260 PetscInt nestlevel; 1261 1262 wasSetup = PETSC_FALSE; 1263 PetscCall(PetscNew(&mpjac)); 1264 jac->data = (void *)mpjac; 1265 1266 /* initialize datastructure mpjac */ 1267 if (!jac->psubcomm) { 1268 /* Create default contiguous subcommunicatiors if user does not provide them */ 1269 PetscCall(PetscSubcommCreate(comm, &jac->psubcomm)); 1270 PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n)); 1271 PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS)); 1272 } 1273 mpjac->psubcomm = jac->psubcomm; 1274 subcomm = PetscSubcommChild(mpjac->psubcomm); 1275 1276 /* Get matrix blocks of pmat */ 1277 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats)); 1278 1279 /* create a new PC that processors in each subcomm have copy of */ 1280 PetscCall(PetscMalloc1(1, &jac->ksp)); 1281 PetscCall(KSPCreate(subcomm, &jac->ksp[0])); 1282 PetscCall(PCGetKSPNestLevel(pc, &nestlevel)); 1283 PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1)); 1284 PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure)); 1285 PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1)); 1286 PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats)); 1287 PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc)); 1288 1289 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1290 PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix)); 1291 PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_")); 1292 PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix)); 1293 PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix)); 1294 1295 /* create dummy vectors xsub and ysub */ 1296 PetscCall(MatGetLocalSize(mpjac->submats, &m, &n)); 1297 PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub)); 1298 PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub)); 1299 PetscCall(MatGetVecType(mpjac->submats, &vectype)); 1300 PetscCall(VecSetType(mpjac->xsub, vectype)); 1301 PetscCall(VecSetType(mpjac->ysub, vectype)); 1302 1303 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc; 1304 pc->ops->reset = PCReset_BJacobi_Multiproc; 1305 pc->ops->destroy = PCDestroy_BJacobi_Multiproc; 1306 pc->ops->apply = PCApply_BJacobi_Multiproc; 1307 pc->ops->matapply = PCMatApply_BJacobi_Multiproc; 1308 } else { /* pc->setupcalled */ 1309 subcomm = PetscSubcommChild(mpjac->psubcomm); 1310 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1311 /* destroy old matrix blocks, then get new matrix blocks */ 1312 if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats)); 1313 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats)); 1314 } else { 1315 PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats)); 1316 } 1317 PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats)); 1318 } 1319 1320 if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0])); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } 1323