xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 78d05565dabe5a0ad9c88b34bc8daea9a6d9da20)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(sviewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(sviewer));
204         }
205         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
206         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
207         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
208       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
209         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
210         if (!mpjac->psubcomm->color) {
211           PetscCall(PetscViewerASCIIPushTab(sviewer));
212           PetscCall(KSPView(*jac->ksp, sviewer));
213           PetscCall(PetscViewerASCIIPopTab(sviewer));
214         }
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
217         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
218       }
219     } else {
220       PetscInt n_global;
221       PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
222       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
223       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
224       PetscCall(PetscViewerASCIIPushTab(viewer));
225       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
226       PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
227       for (i = 0; i < jac->n_local; i++) {
228         PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
229         PetscCall(KSPView(jac->ksp[i], sviewer));
230         PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n"));
231       }
232       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       PetscCall(PetscViewerASCIIPopTab(viewer));
234       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
235     }
236   } else if (isstring) {
237     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
238     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
240     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241   } else if (isdraw) {
242     PetscDraw draw;
243     char      str[25];
244     PetscReal x, y, bottom, h;
245 
246     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
247     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
248     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
249     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
250     bottom = y - h;
251     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
252     /* warning the communicator on viewer is different then on ksp in parallel */
253     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
254     PetscCall(PetscDrawPopCurrentPoint(draw));
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
260 {
261   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
265 
266   if (n_local) *n_local = jac->n_local;
267   if (first_local) *first_local = jac->first_local;
268   if (ksp) *ksp = jac->ksp;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
273 {
274   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
278   jac->n = blocks;
279   if (!lens) jac->g_lens = NULL;
280   else {
281     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
282     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
288 {
289   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
290 
291   PetscFunctionBegin;
292   *blocks = jac->n;
293   if (lens) *lens = jac->g_lens;
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
298 {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314 {
315   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
316 
317   PetscFunctionBegin;
318   *blocks = jac->n_local;
319   if (lens) *lens = jac->l_lens;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
325   this processor.
326 
327   Not Collective
328 
329   Input Parameter:
330 . pc - the preconditioner context
331 
332   Output Parameters:
333 + n_local     - the number of blocks on this processor, or NULL
334 . first_local - the global number of the first block on this processor, or NULL
335 - ksp         - the array of KSP contexts
336 
337   Level: advanced
338 
339   Notes:
340   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
341 
342   Currently for some matrix implementations only 1 block per processor
343   is supported.
344 
345   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
346 
347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
348 @*/
349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
350 {
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
353   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*@
358   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
359   Jacobi preconditioner.
360 
361   Collective
362 
363   Input Parameters:
364 + pc     - the preconditioner context
365 . blocks - the number of blocks
366 - lens   - [optional] integer array containing the size of each block
367 
368   Options Database Key:
369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
370 
371   Level: intermediate
372 
373   Note:
374   Currently only a limited number of blocking configurations are supported.
375   All processors sharing the `PC` must call this routine with the same data.
376 
377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
378 @*/
379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
380 {
381   PetscFunctionBegin;
382   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
383   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
384   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*@C
389   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
390   Jacobi, `PCBJACOBI`, preconditioner.
391 
392   Not Collective
393 
394   Input Parameter:
395 . pc - the preconditioner context
396 
397   Output Parameters:
398 + blocks - the number of blocks
399 - lens   - integer array containing the size of each block
400 
401   Level: intermediate
402 
403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
404 @*/
405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
406 {
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
409   PetscAssertPointer(blocks, 2);
410   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 /*@
415   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
416   Jacobi, `PCBJACOBI`,  preconditioner.
417 
418   Not Collective
419 
420   Input Parameters:
421 + pc     - the preconditioner context
422 . blocks - the number of blocks
423 - lens   - [optional] integer array containing size of each block
424 
425   Options Database Key:
426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
427 
428   Level: intermediate
429 
430   Note:
431   Currently only a limited number of blocking configurations are supported.
432 
433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
434 @*/
435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
436 {
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
439   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
440   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@C
445   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
446   Jacobi, `PCBJACOBI`, preconditioner.
447 
448   Not Collective
449 
450   Input Parameters:
451 + pc     - the preconditioner context
452 . blocks - the number of blocks
453 - lens   - [optional] integer array containing size of each block
454 
455   Level: intermediate
456 
457   Note:
458   Currently only a limited number of blocking configurations are supported.
459 
460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
461 @*/
462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
463 {
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
466   PetscAssertPointer(blocks, 2);
467   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 /*MC
472    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473            its own `KSP` object.
474 
475    Options Database Keys:
476 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477 -  -pc_bjacobi_blocks <n> - use n total blocks
478 
479    Level: beginner
480 
481    Notes:
482     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
483 
484     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
485 
486      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
487         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
488 
489      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
490          and set the options directly on the resulting `KSP` object (you can access its `PC`
491          `KSPGetPC()`)
492 
493      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
494          performance.  Different block partitioning may lead to additional data transfers
495          between host and GPU that lead to degraded performance.
496 
497      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
498 
499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
505 {
506   PetscMPIInt rank;
507   PC_BJacobi *jac;
508 
509   PetscFunctionBegin;
510   PetscCall(PetscNew(&jac));
511   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
512 
513   pc->ops->apply             = NULL;
514   pc->ops->matapply          = NULL;
515   pc->ops->applytranspose    = NULL;
516   pc->ops->matapplytranspose = NULL;
517   pc->ops->setup             = PCSetUp_BJacobi;
518   pc->ops->destroy           = PCDestroy_BJacobi;
519   pc->ops->setfromoptions    = PCSetFromOptions_BJacobi;
520   pc->ops->view              = PCView_BJacobi;
521   pc->ops->applyrichardson   = NULL;
522 
523   pc->data         = (void *)jac;
524   jac->n           = -1;
525   jac->n_local     = -1;
526   jac->first_local = rank;
527   jac->ksp         = NULL;
528   jac->g_lens      = NULL;
529   jac->l_lens      = NULL;
530   jac->psubcomm    = NULL;
531 
532   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
533   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
534   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
535   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
536   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 /*
541         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
542 */
543 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
544 {
545   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
546   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
547 
548   PetscFunctionBegin;
549   PetscCall(KSPReset(jac->ksp[0]));
550   PetscCall(VecDestroy(&bjac->x));
551   PetscCall(VecDestroy(&bjac->y));
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
556 {
557   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
558   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
559 
560   PetscFunctionBegin;
561   PetscCall(PCReset_BJacobi_Singleblock(pc));
562   PetscCall(KSPDestroy(&jac->ksp[0]));
563   PetscCall(PetscFree(jac->ksp));
564   PetscCall(PetscFree(bjac));
565   PetscCall(PCDestroy_BJacobi(pc));
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
570 {
571   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
572   KSP                subksp = jac->ksp[0];
573   KSPConvergedReason reason;
574 
575   PetscFunctionBegin;
576   PetscCall(KSPSetUp(subksp));
577   PetscCall(KSPGetConvergedReason(subksp, &reason));
578   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
583 {
584   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
585   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
586 
587   PetscFunctionBegin;
588   PetscCall(VecGetLocalVectorRead(x, bjac->x));
589   PetscCall(VecGetLocalVector(y, bjac->y));
590   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
591      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
592      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
593   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
594   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
595   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
596   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
597   PetscCall(VecRestoreLocalVector(y, bjac->y));
598   PetscFunctionReturn(PETSC_SUCCESS);
599 }
600 
601 static PetscErrorCode PCMatApply_BJacobi_Singleblock_Private(PC pc, Mat X, Mat Y, PetscBool transpose)
602 {
603   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
604   Mat         sX, sY;
605 
606   PetscFunctionBegin;
607   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
608      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
609      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
610   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
611   PetscCall(MatDenseGetLocalMatrix(X, &sX));
612   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
613   if (!transpose) {
614     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], sX, sY, 0));
615     PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
616     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], sX, sY, 0));
617   } else {
618     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[0], sX, sY, 0));
619     PetscCall(KSPMatSolveTranspose(jac->ksp[0], sX, sY));
620     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[0], sX, sY, 0));
621   }
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
626 {
627   PetscFunctionBegin;
628   PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_FALSE));
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 static PetscErrorCode PCMatApplyTranspose_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
633 {
634   PetscFunctionBegin;
635   PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_TRUE));
636   PetscFunctionReturn(PETSC_SUCCESS);
637 }
638 
639 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
640 {
641   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
642   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
643   PetscScalar            *y_array;
644   const PetscScalar      *x_array;
645   PC                      subpc;
646 
647   PetscFunctionBegin;
648   /*
649       The VecPlaceArray() is to avoid having to copy the
650     y vector into the bjac->x vector. The reason for
651     the bjac->x vector is that we need a sequential vector
652     for the sequential solve.
653   */
654   PetscCall(VecGetArrayRead(x, &x_array));
655   PetscCall(VecGetArray(y, &y_array));
656   PetscCall(VecPlaceArray(bjac->x, x_array));
657   PetscCall(VecPlaceArray(bjac->y, y_array));
658   /* apply the symmetric left portion of the inner PC operator */
659   /* note this bypasses the inner KSP and its options completely */
660   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
661   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
662   PetscCall(VecResetArray(bjac->x));
663   PetscCall(VecResetArray(bjac->y));
664   PetscCall(VecRestoreArrayRead(x, &x_array));
665   PetscCall(VecRestoreArray(y, &y_array));
666   PetscFunctionReturn(PETSC_SUCCESS);
667 }
668 
669 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
670 {
671   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
672   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
673   PetscScalar            *y_array;
674   const PetscScalar      *x_array;
675   PC                      subpc;
676 
677   PetscFunctionBegin;
678   /*
679       The VecPlaceArray() is to avoid having to copy the
680     y vector into the bjac->x vector. The reason for
681     the bjac->x vector is that we need a sequential vector
682     for the sequential solve.
683   */
684   PetscCall(VecGetArrayRead(x, &x_array));
685   PetscCall(VecGetArray(y, &y_array));
686   PetscCall(VecPlaceArray(bjac->x, x_array));
687   PetscCall(VecPlaceArray(bjac->y, y_array));
688 
689   /* apply the symmetric right portion of the inner PC operator */
690   /* note this bypasses the inner KSP and its options completely */
691 
692   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
693   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
694 
695   PetscCall(VecResetArray(bjac->x));
696   PetscCall(VecResetArray(bjac->y));
697   PetscCall(VecRestoreArrayRead(x, &x_array));
698   PetscCall(VecRestoreArray(y, &y_array));
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
703 {
704   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
705   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
706   PetscScalar            *y_array;
707   const PetscScalar      *x_array;
708 
709   PetscFunctionBegin;
710   /*
711       The VecPlaceArray() is to avoid having to copy the
712     y vector into the bjac->x vector. The reason for
713     the bjac->x vector is that we need a sequential vector
714     for the sequential solve.
715   */
716   PetscCall(VecGetArrayRead(x, &x_array));
717   PetscCall(VecGetArray(y, &y_array));
718   PetscCall(VecPlaceArray(bjac->x, x_array));
719   PetscCall(VecPlaceArray(bjac->y, y_array));
720   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
721   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
722   PetscCall(VecResetArray(bjac->x));
723   PetscCall(VecResetArray(bjac->y));
724   PetscCall(VecRestoreArrayRead(x, &x_array));
725   PetscCall(VecRestoreArray(y, &y_array));
726   PetscFunctionReturn(PETSC_SUCCESS);
727 }
728 
729 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
730 {
731   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
732   PetscInt                m;
733   KSP                     ksp;
734   PC_BJacobi_Singleblock *bjac;
735   PetscBool               wasSetup = PETSC_TRUE;
736   VecType                 vectype;
737   const char             *prefix;
738 
739   PetscFunctionBegin;
740   if (!pc->setupcalled) {
741     if (!jac->ksp) {
742       PetscInt nestlevel;
743 
744       wasSetup = PETSC_FALSE;
745 
746       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
747       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
748       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
749       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
750       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
751       PetscCall(KSPSetType(ksp, KSPPREONLY));
752       PetscCall(PCGetOptionsPrefix(pc, &prefix));
753       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
754       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
755 
756       pc->ops->reset               = PCReset_BJacobi_Singleblock;
757       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
758       pc->ops->apply               = PCApply_BJacobi_Singleblock;
759       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
760       pc->ops->matapplytranspose   = PCMatApplyTranspose_BJacobi_Singleblock;
761       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
762       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
763       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
764       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
765 
766       PetscCall(PetscMalloc1(1, &jac->ksp));
767       jac->ksp[0] = ksp;
768 
769       PetscCall(PetscNew(&bjac));
770       jac->data = (void *)bjac;
771     } else {
772       ksp  = jac->ksp[0];
773       bjac = (PC_BJacobi_Singleblock *)jac->data;
774     }
775 
776     /*
777       The reason we need to generate these vectors is to serve
778       as the right-hand side and solution vector for the solve on the
779       block. We do not need to allocate space for the vectors since
780       that is provided via VecPlaceArray() just before the call to
781       KSPSolve() on the block.
782     */
783     PetscCall(MatGetSize(pmat, &m, &m));
784     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
785     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
786     PetscCall(MatGetVecType(pmat, &vectype));
787     PetscCall(VecSetType(bjac->x, vectype));
788     PetscCall(VecSetType(bjac->y, vectype));
789   } else {
790     ksp  = jac->ksp[0];
791     bjac = (PC_BJacobi_Singleblock *)jac->data;
792   }
793   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
794   if (pc->useAmat) {
795     PetscCall(KSPSetOperators(ksp, mat, pmat));
796     PetscCall(MatSetOptionsPrefix(mat, prefix));
797   } else {
798     PetscCall(KSPSetOperators(ksp, pmat, pmat));
799   }
800   PetscCall(MatSetOptionsPrefix(pmat, prefix));
801   if (!wasSetup && pc->setfromoptionscalled) {
802     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
803     PetscCall(KSPSetFromOptions(ksp));
804   }
805   PetscFunctionReturn(PETSC_SUCCESS);
806 }
807 
808 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
809 {
810   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
811   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
812   PetscInt               i;
813 
814   PetscFunctionBegin;
815   if (bjac && bjac->pmat) {
816     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
817     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
818   }
819 
820   for (i = 0; i < jac->n_local; i++) {
821     PetscCall(KSPReset(jac->ksp[i]));
822     if (bjac && bjac->x) {
823       PetscCall(VecDestroy(&bjac->x[i]));
824       PetscCall(VecDestroy(&bjac->y[i]));
825       PetscCall(ISDestroy(&bjac->is[i]));
826     }
827   }
828   PetscCall(PetscFree(jac->l_lens));
829   PetscCall(PetscFree(jac->g_lens));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
834 {
835   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
836   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
837   PetscInt               i;
838 
839   PetscFunctionBegin;
840   PetscCall(PCReset_BJacobi_Multiblock(pc));
841   if (bjac) {
842     PetscCall(PetscFree2(bjac->x, bjac->y));
843     PetscCall(PetscFree(bjac->starts));
844     PetscCall(PetscFree(bjac->is));
845   }
846   PetscCall(PetscFree(jac->data));
847   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
848   PetscCall(PetscFree(jac->ksp));
849   PetscCall(PCDestroy_BJacobi(pc));
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
854 {
855   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
856   PetscInt           i, n_local = jac->n_local;
857   KSPConvergedReason reason;
858 
859   PetscFunctionBegin;
860   for (i = 0; i < n_local; i++) {
861     PetscCall(KSPSetUp(jac->ksp[i]));
862     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
863     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
864   }
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
869 {
870   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
871   PetscInt               i, n_local = jac->n_local;
872   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
873   PetscScalar           *yin;
874   const PetscScalar     *xin;
875 
876   PetscFunctionBegin;
877   PetscCall(VecGetArrayRead(x, &xin));
878   PetscCall(VecGetArray(y, &yin));
879   for (i = 0; i < n_local; i++) {
880     /*
881        To avoid copying the subvector from x into a workspace we instead
882        make the workspace vector array point to the subpart of the array of
883        the global vector.
884     */
885     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
886     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
887 
888     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
889     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
890     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
891     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
892 
893     PetscCall(VecResetArray(bjac->x[i]));
894     PetscCall(VecResetArray(bjac->y[i]));
895   }
896   PetscCall(VecRestoreArrayRead(x, &xin));
897   PetscCall(VecRestoreArray(y, &yin));
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
902 {
903   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
904   PetscInt               i, n_local = jac->n_local;
905   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
906   PetscScalar           *yin;
907   const PetscScalar     *xin;
908   PC                     subpc;
909 
910   PetscFunctionBegin;
911   PetscCall(VecGetArrayRead(x, &xin));
912   PetscCall(VecGetArray(y, &yin));
913   for (i = 0; i < n_local; i++) {
914     /*
915        To avoid copying the subvector from x into a workspace we instead
916        make the workspace vector array point to the subpart of the array of
917        the global vector.
918     */
919     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
920     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
921 
922     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
923     /* apply the symmetric left portion of the inner PC operator */
924     /* note this bypasses the inner KSP and its options completely */
925     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
926     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
927     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
928 
929     PetscCall(VecResetArray(bjac->x[i]));
930     PetscCall(VecResetArray(bjac->y[i]));
931   }
932   PetscCall(VecRestoreArrayRead(x, &xin));
933   PetscCall(VecRestoreArray(y, &yin));
934   PetscFunctionReturn(PETSC_SUCCESS);
935 }
936 
937 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
938 {
939   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
940   PetscInt               i, n_local = jac->n_local;
941   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
942   PetscScalar           *yin;
943   const PetscScalar     *xin;
944   PC                     subpc;
945 
946   PetscFunctionBegin;
947   PetscCall(VecGetArrayRead(x, &xin));
948   PetscCall(VecGetArray(y, &yin));
949   for (i = 0; i < n_local; i++) {
950     /*
951        To avoid copying the subvector from x into a workspace we instead
952        make the workspace vector array point to the subpart of the array of
953        the global vector.
954     */
955     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
956     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
957 
958     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
959     /* apply the symmetric left portion of the inner PC operator */
960     /* note this bypasses the inner KSP and its options completely */
961     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
962     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
963     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
964 
965     PetscCall(VecResetArray(bjac->x[i]));
966     PetscCall(VecResetArray(bjac->y[i]));
967   }
968   PetscCall(VecRestoreArrayRead(x, &xin));
969   PetscCall(VecRestoreArray(y, &yin));
970   PetscFunctionReturn(PETSC_SUCCESS);
971 }
972 
973 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
974 {
975   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
976   PetscInt               i, n_local = jac->n_local;
977   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
978   PetscScalar           *yin;
979   const PetscScalar     *xin;
980 
981   PetscFunctionBegin;
982   PetscCall(VecGetArrayRead(x, &xin));
983   PetscCall(VecGetArray(y, &yin));
984   for (i = 0; i < n_local; i++) {
985     /*
986        To avoid copying the subvector from x into a workspace we instead
987        make the workspace vector array point to the subpart of the array of
988        the global vector.
989     */
990     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
991     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
992 
993     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
994     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
995     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
996     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
997 
998     PetscCall(VecResetArray(bjac->x[i]));
999     PetscCall(VecResetArray(bjac->y[i]));
1000   }
1001   PetscCall(VecRestoreArrayRead(x, &xin));
1002   PetscCall(VecRestoreArray(y, &yin));
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005 
1006 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
1007 {
1008   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
1009   PetscInt               m, n_local, N, M, start, i;
1010   const char            *prefix;
1011   KSP                    ksp;
1012   Vec                    x, y;
1013   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1014   PC                     subpc;
1015   IS                     is;
1016   MatReuse               scall;
1017   VecType                vectype;
1018   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1022 
1023   n_local = jac->n_local;
1024 
1025   if (pc->useAmat) {
1026     PetscBool same;
1027     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1028     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1029   }
1030 
1031   if (!pc->setupcalled) {
1032     PetscInt nestlevel;
1033 
1034     scall = MAT_INITIAL_MATRIX;
1035 
1036     if (!jac->ksp) {
1037       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1038       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1039       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1040       pc->ops->matapply            = NULL;
1041       pc->ops->matapplytranspose   = NULL;
1042       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1043       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1044       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1045       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1046 
1047       PetscCall(PetscNew(&bjac));
1048       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1049       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1050       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1051 
1052       jac->data = (void *)bjac;
1053       PetscCall(PetscMalloc1(n_local, &bjac->is));
1054 
1055       for (i = 0; i < n_local; i++) {
1056         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1057         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1058         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1059         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1060         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1061         PetscCall(KSPSetType(ksp, KSPPREONLY));
1062         PetscCall(KSPGetPC(ksp, &subpc));
1063         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1064         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1065         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1066 
1067         jac->ksp[i] = ksp;
1068       }
1069     } else {
1070       bjac = (PC_BJacobi_Multiblock *)jac->data;
1071     }
1072 
1073     start = 0;
1074     PetscCall(MatGetVecType(pmat, &vectype));
1075     for (i = 0; i < n_local; i++) {
1076       m = jac->l_lens[i];
1077       /*
1078       The reason we need to generate these vectors is to serve
1079       as the right-hand side and solution vector for the solve on the
1080       block. We do not need to allocate space for the vectors since
1081       that is provided via VecPlaceArray() just before the call to
1082       KSPSolve() on the block.
1083 
1084       */
1085       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1086       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1087       PetscCall(VecSetType(x, vectype));
1088       PetscCall(VecSetType(y, vectype));
1089 
1090       bjac->x[i]      = x;
1091       bjac->y[i]      = y;
1092       bjac->starts[i] = start;
1093 
1094       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1095       bjac->is[i] = is;
1096 
1097       start += m;
1098     }
1099   } else {
1100     bjac = (PC_BJacobi_Multiblock *)jac->data;
1101     /*
1102        Destroy the blocks from the previous iteration
1103     */
1104     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1105       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1106       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1107       if (pc->useAmat) {
1108         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1109         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1110       }
1111       scall = MAT_INITIAL_MATRIX;
1112     } else scall = MAT_REUSE_MATRIX;
1113   }
1114 
1115   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1116   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1117   if (pc->useAmat) {
1118     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1119     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1120   }
1121   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1122      different boundary conditions for the submatrices than for the global problem) */
1123   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1124 
1125   for (i = 0; i < n_local; i++) {
1126     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1127     if (pc->useAmat) {
1128       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1129       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1130     } else {
1131       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1132     }
1133     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1134     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1135   }
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 /*
1140       These are for a single block with multiple processes
1141 */
1142 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1143 {
1144   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1145   KSP                subksp = jac->ksp[0];
1146   KSPConvergedReason reason;
1147 
1148   PetscFunctionBegin;
1149   PetscCall(KSPSetUp(subksp));
1150   PetscCall(KSPGetConvergedReason(subksp, &reason));
1151   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1156 {
1157   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1158   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCall(VecDestroy(&mpjac->ysub));
1162   PetscCall(VecDestroy(&mpjac->xsub));
1163   PetscCall(MatDestroy(&mpjac->submats));
1164   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1169 {
1170   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1171   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1172 
1173   PetscFunctionBegin;
1174   PetscCall(PCReset_BJacobi_Multiproc(pc));
1175   PetscCall(KSPDestroy(&jac->ksp[0]));
1176   PetscCall(PetscFree(jac->ksp));
1177   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1178 
1179   PetscCall(PetscFree(mpjac));
1180   PetscCall(PCDestroy_BJacobi(pc));
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1185 {
1186   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1187   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1188   PetscScalar          *yarray;
1189   const PetscScalar    *xarray;
1190   KSPConvergedReason    reason;
1191 
1192   PetscFunctionBegin;
1193   /* place x's and y's local arrays into xsub and ysub */
1194   PetscCall(VecGetArrayRead(x, &xarray));
1195   PetscCall(VecGetArray(y, &yarray));
1196   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1197   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1198 
1199   /* apply preconditioner on each matrix block */
1200   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1201   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1202   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1203   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1204   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1205   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1206 
1207   PetscCall(VecResetArray(mpjac->xsub));
1208   PetscCall(VecResetArray(mpjac->ysub));
1209   PetscCall(VecRestoreArrayRead(x, &xarray));
1210   PetscCall(VecRestoreArray(y, &yarray));
1211   PetscFunctionReturn(PETSC_SUCCESS);
1212 }
1213 
1214 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1215 {
1216   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1217   KSPConvergedReason reason;
1218   Mat                sX, sY;
1219   const PetscScalar *x;
1220   PetscScalar       *y;
1221   PetscInt           m, N, lda, ldb;
1222 
1223   PetscFunctionBegin;
1224   /* apply preconditioner on each matrix block */
1225   PetscCall(MatGetLocalSize(X, &m, NULL));
1226   PetscCall(MatGetSize(X, NULL, &N));
1227   PetscCall(MatDenseGetLDA(X, &lda));
1228   PetscCall(MatDenseGetLDA(Y, &ldb));
1229   PetscCall(MatDenseGetArrayRead(X, &x));
1230   PetscCall(MatDenseGetArrayWrite(Y, &y));
1231   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1232   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1233   PetscCall(MatDenseSetLDA(sX, lda));
1234   PetscCall(MatDenseSetLDA(sY, ldb));
1235   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1236   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1237   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1238   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1239   PetscCall(MatDestroy(&sY));
1240   PetscCall(MatDestroy(&sX));
1241   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1242   PetscCall(MatDenseRestoreArrayRead(X, &x));
1243   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1244   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1245   PetscFunctionReturn(PETSC_SUCCESS);
1246 }
1247 
1248 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1249 {
1250   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1251   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1252   PetscInt              m, n;
1253   MPI_Comm              comm, subcomm = 0;
1254   const char           *prefix;
1255   PetscBool             wasSetup = PETSC_TRUE;
1256   VecType               vectype;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1260   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1261   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1262   if (!pc->setupcalled) {
1263     PetscInt nestlevel;
1264 
1265     wasSetup = PETSC_FALSE;
1266     PetscCall(PetscNew(&mpjac));
1267     jac->data = (void *)mpjac;
1268 
1269     /* initialize datastructure mpjac */
1270     if (!jac->psubcomm) {
1271       /* Create default contiguous subcommunicatiors if user does not provide them */
1272       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1273       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1274       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1275     }
1276     mpjac->psubcomm = jac->psubcomm;
1277     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1278 
1279     /* Get matrix blocks of pmat */
1280     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1281 
1282     /* create a new PC that processors in each subcomm have copy of */
1283     PetscCall(PetscMalloc1(1, &jac->ksp));
1284     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1285     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1286     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1287     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1288     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1289     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1290     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1291 
1292     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1293     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1294     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1295     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1296     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1297 
1298     /* create dummy vectors xsub and ysub */
1299     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1300     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1301     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1302     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1303     PetscCall(VecSetType(mpjac->xsub, vectype));
1304     PetscCall(VecSetType(mpjac->ysub, vectype));
1305 
1306     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1307     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1308     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1309     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1310     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1311   } else { /* pc->setupcalled */
1312     subcomm = PetscSubcommChild(mpjac->psubcomm);
1313     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1314       /* destroy old matrix blocks, then get new matrix blocks */
1315       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1316       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1317     } else {
1318       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1319     }
1320     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1321   }
1322 
1323   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1324   PetscFunctionReturn(PETSC_SUCCESS);
1325 }
1326