xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision eefb368e60eb43c3e08ca0927788b33650449484)
1 /*
2    Defines a block Jacobi preconditioner.
3 */
4 
5 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
6 
7 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
8 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
10 
11 static PetscErrorCode PCSetUp_BJacobi(PC pc)
12 {
13   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
14   Mat         mat = pc->mat, pmat = pc->pmat;
15   PetscBool   hasop;
16   PetscInt    N, M, start, i, sum, end;
17   PetscInt    bs, i_start = -1, i_end = -1;
18   PetscMPIInt rank, size;
19 
20   PetscFunctionBegin;
21   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
22   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
23   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
24   PetscCall(MatGetBlockSize(pc->pmat, &bs));
25 
26   if (jac->n > 0 && jac->n < size) {
27     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
28     PetscFunctionReturn(PETSC_SUCCESS);
29   }
30 
31   /*    Determines the number of blocks assigned to each processor */
32   /*   local block count  given */
33   if (jac->n_local > 0 && jac->n < 0) {
34     PetscCallMPI(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
35     if (jac->l_lens) { /* check that user set these correctly */
36       sum = 0;
37       for (i = 0; i < jac->n_local; i++) {
38         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
39         sum += jac->l_lens[i];
40       }
41       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
42     } else {
43       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
44       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
45     }
46   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
47     /* global blocks given: determine which ones are local */
48     if (jac->g_lens) {
49       /* check if the g_lens is has valid entries */
50       for (i = 0; i < jac->n; i++) {
51         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
52         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
53       }
54       if (size == 1) {
55         jac->n_local = jac->n;
56         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
57         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
58         /* check that user set these correctly */
59         sum = 0;
60         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
61         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
62       } else {
63         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
64         /* loop over blocks determining first one owned by me */
65         sum = 0;
66         for (i = 0; i < jac->n + 1; i++) {
67           if (sum == start) {
68             i_start = i;
69             goto start_1;
70           }
71           if (i < jac->n) sum += jac->g_lens[i];
72         }
73         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
74       start_1:
75         for (i = i_start; i < jac->n + 1; i++) {
76           if (sum == end) {
77             i_end = i;
78             goto end_1;
79           }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83       end_1:
84         jac->n_local = i_end - i_start;
85         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
86         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n / size + ((jac->n % size) > rank);
90       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
91       for (i = 0; i < jac->n_local; i++) {
92         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
93         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n       = size;
98     jac->n_local = 1;
99     PetscCall(PetscMalloc1(1, &jac->l_lens));
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
104       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105     }
106   }
107   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");
108 
109   /*    Determines mat and pmat */
110   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
121     } else pmat = mat;
122   }
123 
124   /*
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
131   }
132   PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
144   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
145   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
146   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
147   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
148   PetscCall(PetscFree(pc->data));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems PetscOptionsObject)
153 {
154   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
155   PetscInt    blocks, i;
156   PetscBool   flg;
157 
158   PetscFunctionBegin;
159   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
160   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
161   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
162   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
163   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
164   if (jac->ksp) {
165     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
166      * unless we had already been called. */
167     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
168   }
169   PetscOptionsHeadEnd();
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 #include <petscdraw.h>
174 static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
175 {
176   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
177   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
178   PetscMPIInt           rank;
179   PetscInt              i;
180   PetscBool             iascii, isstring, isdraw;
181   PetscViewer           sviewer;
182   PetscViewerFormat     format;
183   const char           *prefix;
184 
185   PetscFunctionBegin;
186   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
187   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
189   if (iascii) {
190     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
191     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
192     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
193     PetscCall(PetscViewerGetFormat(viewer, &format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       PetscCall(PCGetOptionsPrefix(pc, &prefix));
197       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
198       if (jac->ksp && !jac->psubcomm) {
199         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
200         if (rank == 0) {
201           PetscCall(PetscViewerASCIIPushTab(sviewer));
202           PetscCall(KSPView(jac->ksp[0], sviewer));
203           PetscCall(PetscViewerASCIIPopTab(sviewer));
204         }
205         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
206         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
207         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
208       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
209         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
210         if (!mpjac->psubcomm->color) {
211           PetscCall(PetscViewerASCIIPushTab(sviewer));
212           PetscCall(KSPView(*jac->ksp, sviewer));
213           PetscCall(PetscViewerASCIIPopTab(sviewer));
214         }
215         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
216         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
217         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
218       }
219     } else {
220       PetscInt n_global;
221       PetscCallMPI(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
222       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
223       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
224       PetscCall(PetscViewerASCIIPushTab(viewer));
225       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
226       PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
227       for (i = 0; i < jac->n_local; i++) {
228         PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
229         PetscCall(KSPView(jac->ksp[i], sviewer));
230         PetscCall(PetscViewerASCIIPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n"));
231       }
232       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233       PetscCall(PetscViewerASCIIPopTab(viewer));
234       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
235     }
236   } else if (isstring) {
237     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
238     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
239     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
240     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
241   } else if (isdraw) {
242     PetscDraw draw;
243     char      str[25];
244     PetscReal x, y, bottom, h;
245 
246     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
247     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
248     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
249     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
250     bottom = y - h;
251     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
252     /* warning the communicator on viewer is different then on ksp in parallel */
253     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
254     PetscCall(PetscDrawPopCurrentPoint(draw));
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
260 {
261   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");
265 
266   if (n_local) *n_local = jac->n_local;
267   if (first_local) *first_local = jac->first_local;
268   if (ksp) *ksp = jac->ksp;
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt *lens)
273 {
274   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
278   jac->n = blocks;
279   if (!lens) jac->g_lens = NULL;
280   else {
281     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
282     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
288 {
289   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
290 
291   PetscFunctionBegin;
292   *blocks = jac->n;
293   if (lens) *lens = jac->g_lens;
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
298 {
299   PC_BJacobi *jac;
300 
301   PetscFunctionBegin;
302   jac = (PC_BJacobi *)pc->data;
303 
304   jac->n_local = blocks;
305   if (!lens) jac->l_lens = NULL;
306   else {
307     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
308     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
309   }
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314 {
315   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
316 
317   PetscFunctionBegin;
318   *blocks = jac->n_local;
319   if (lens) *lens = jac->l_lens;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
325   this processor.
326 
327   Not Collective
328 
329   Input Parameter:
330 . pc - the preconditioner context
331 
332   Output Parameters:
333 + n_local     - the number of blocks on this processor, or NULL
334 . first_local - the global number of the first block on this processor, or NULL
335 - ksp         - the array of KSP contexts
336 
337   Level: advanced
338 
339   Notes:
340   After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.
341 
342   Currently for some matrix implementations only 1 block per processor
343   is supported.
344 
345   You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.
346 
347 .seealso: [](ch_ksp), `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
348 @*/
349 PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
350 {
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
353   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*@
358   PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
359   Jacobi preconditioner.
360 
361   Collective
362 
363   Input Parameters:
364 + pc     - the preconditioner context
365 . blocks - the number of blocks
366 - lens   - [optional] integer array containing the size of each block
367 
368   Options Database Key:
369 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
370 
371   Level: intermediate
372 
373   Note:
374   Currently only a limited number of blocking configurations are supported.
375   All processors sharing the `PC` must call this routine with the same data.
376 
377 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
378 @*/
379 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
380 {
381   PetscFunctionBegin;
382   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
383   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
384   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*@C
389   PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
390   Jacobi, `PCBJACOBI`, preconditioner.
391 
392   Not Collective
393 
394   Input Parameter:
395 . pc - the preconditioner context
396 
397   Output Parameters:
398 + blocks - the number of blocks
399 - lens   - integer array containing the size of each block
400 
401   Level: intermediate
402 
403 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
404 @*/
405 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
406 {
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
409   PetscAssertPointer(blocks, 2);
410   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 /*@
415   PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
416   Jacobi, `PCBJACOBI`,  preconditioner.
417 
418   Not Collective
419 
420   Input Parameters:
421 + pc     - the preconditioner context
422 . blocks - the number of blocks
423 - lens   - [optional] integer array containing size of each block
424 
425   Options Database Key:
426 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
427 
428   Level: intermediate
429 
430   Note:
431   Currently only a limited number of blocking configurations are supported.
432 
433 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
434 @*/
435 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
436 {
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
439   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
440   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@C
445   PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
446   Jacobi, `PCBJACOBI`, preconditioner.
447 
448   Not Collective
449 
450   Input Parameters:
451 + pc     - the preconditioner context
452 . blocks - the number of blocks
453 - lens   - [optional] integer array containing size of each block
454 
455   Level: intermediate
456 
457   Note:
458   Currently only a limited number of blocking configurations are supported.
459 
460 .seealso: [](ch_ksp), `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
461 @*/
462 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
463 {
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
466   PetscAssertPointer(blocks, 2);
467   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 /*MC
472    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473            its own `KSP` object.
474 
475    Options Database Keys:
476 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477 -  -pc_bjacobi_blocks <n> - use n total blocks
478 
479    Level: beginner
480 
481    Notes:
482     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block
483 
484     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
485 
486      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
487         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
488 
489      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
490          and set the options directly on the resulting `KSP` object (you can access its `PC`
491          `KSPGetPC()`)
492 
493      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
494          performance.  Different block partitioning may lead to additional data transfers
495          between host and GPU that lead to degraded performance.
496 
497      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
498 
499 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
505 {
506   PetscMPIInt rank;
507   PC_BJacobi *jac;
508 
509   PetscFunctionBegin;
510   PetscCall(PetscNew(&jac));
511   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
512 
513   pc->ops->apply             = NULL;
514   pc->ops->matapply          = NULL;
515   pc->ops->applytranspose    = NULL;
516   pc->ops->matapplytranspose = NULL;
517   pc->ops->setup             = PCSetUp_BJacobi;
518   pc->ops->destroy           = PCDestroy_BJacobi;
519   pc->ops->setfromoptions    = PCSetFromOptions_BJacobi;
520   pc->ops->view              = PCView_BJacobi;
521   pc->ops->applyrichardson   = NULL;
522 
523   pc->data         = (void *)jac;
524   jac->n           = -1;
525   jac->n_local     = -1;
526   jac->first_local = rank;
527   jac->ksp         = NULL;
528   jac->g_lens      = NULL;
529   jac->l_lens      = NULL;
530   jac->psubcomm    = NULL;
531 
532   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
533   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
534   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
535   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
536   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 /*
541         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
542 */
543 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
544 {
545   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
546   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
547 
548   PetscFunctionBegin;
549   PetscCall(KSPReset(jac->ksp[0]));
550   PetscCall(VecDestroy(&bjac->x));
551   PetscCall(VecDestroy(&bjac->y));
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
556 {
557   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
558   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
559 
560   PetscFunctionBegin;
561   PetscCall(PCReset_BJacobi_Singleblock(pc));
562   PetscCall(KSPDestroy(&jac->ksp[0]));
563   PetscCall(PetscFree(jac->ksp));
564   PetscCall(PetscFree(bjac));
565   PetscCall(PCDestroy_BJacobi(pc));
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
570 {
571   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
572   KSP                subksp = jac->ksp[0];
573   KSPConvergedReason reason;
574 
575   PetscFunctionBegin;
576   PetscCall(KSPSetUp(subksp));
577   PetscCall(KSPGetConvergedReason(subksp, &reason));
578   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
583 {
584   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
585   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
586 
587   PetscFunctionBegin;
588   PetscCall(VecGetLocalVectorRead(x, bjac->x));
589   PetscCall(VecGetLocalVector(y, bjac->y));
590   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
591      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
592      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
593   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
594   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
595   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
596   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
597   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
598   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
599   PetscCall(VecRestoreLocalVector(y, bjac->y));
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
603 static PetscErrorCode PCMatApply_BJacobi_Singleblock_Private(PC pc, Mat X, Mat Y, PetscBool transpose)
604 {
605   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
606   Mat         sX, sY;
607 
608   PetscFunctionBegin;
609   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
610      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
611      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
612   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
613   PetscCall(MatDenseGetLocalMatrix(X, &sX));
614   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
615   if (!transpose) PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
616   else PetscCall(KSPMatSolveTranspose(jac->ksp[0], sX, sY));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
621 {
622   PetscFunctionBegin;
623   PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode PCMatApplyTranspose_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
628 {
629   PetscFunctionBegin;
630   PetscCall(PCMatApply_BJacobi_Singleblock_Private(pc, X, Y, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
635 {
636   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
637   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
638   PetscScalar            *y_array;
639   const PetscScalar      *x_array;
640   PC                      subpc;
641 
642   PetscFunctionBegin;
643   /*
644       The VecPlaceArray() is to avoid having to copy the
645     y vector into the bjac->x vector. The reason for
646     the bjac->x vector is that we need a sequential vector
647     for the sequential solve.
648   */
649   PetscCall(VecGetArrayRead(x, &x_array));
650   PetscCall(VecGetArray(y, &y_array));
651   PetscCall(VecPlaceArray(bjac->x, x_array));
652   PetscCall(VecPlaceArray(bjac->y, y_array));
653   /* apply the symmetric left portion of the inner PC operator */
654   /* note this bypasses the inner KSP and its options completely */
655   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
656   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
657   PetscCall(VecResetArray(bjac->x));
658   PetscCall(VecResetArray(bjac->y));
659   PetscCall(VecRestoreArrayRead(x, &x_array));
660   PetscCall(VecRestoreArray(y, &y_array));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
665 {
666   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
667   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
668   PetscScalar            *y_array;
669   const PetscScalar      *x_array;
670   PC                      subpc;
671 
672   PetscFunctionBegin;
673   /*
674       The VecPlaceArray() is to avoid having to copy the
675     y vector into the bjac->x vector. The reason for
676     the bjac->x vector is that we need a sequential vector
677     for the sequential solve.
678   */
679   PetscCall(VecGetArrayRead(x, &x_array));
680   PetscCall(VecGetArray(y, &y_array));
681   PetscCall(VecPlaceArray(bjac->x, x_array));
682   PetscCall(VecPlaceArray(bjac->y, y_array));
683 
684   /* apply the symmetric right portion of the inner PC operator */
685   /* note this bypasses the inner KSP and its options completely */
686 
687   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
688   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));
689 
690   PetscCall(VecResetArray(bjac->x));
691   PetscCall(VecResetArray(bjac->y));
692   PetscCall(VecRestoreArrayRead(x, &x_array));
693   PetscCall(VecRestoreArray(y, &y_array));
694   PetscFunctionReturn(PETSC_SUCCESS);
695 }
696 
697 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
698 {
699   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
700   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
701   PetscScalar            *y_array;
702   const PetscScalar      *x_array;
703 
704   PetscFunctionBegin;
705   /*
706       The VecPlaceArray() is to avoid having to copy the
707     y vector into the bjac->x vector. The reason for
708     the bjac->x vector is that we need a sequential vector
709     for the sequential solve.
710   */
711   PetscCall(VecGetArrayRead(x, &x_array));
712   PetscCall(VecGetArray(y, &y_array));
713   PetscCall(VecPlaceArray(bjac->x, x_array));
714   PetscCall(VecPlaceArray(bjac->y, y_array));
715   PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
716   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
717   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
718   PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[0], bjac->x, bjac->y, 0));
719   PetscCall(VecResetArray(bjac->x));
720   PetscCall(VecResetArray(bjac->y));
721   PetscCall(VecRestoreArrayRead(x, &x_array));
722   PetscCall(VecRestoreArray(y, &y_array));
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
727 {
728   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
729   PetscInt                m;
730   KSP                     ksp;
731   PC_BJacobi_Singleblock *bjac;
732   PetscBool               wasSetup = PETSC_TRUE;
733   VecType                 vectype;
734   const char             *prefix;
735 
736   PetscFunctionBegin;
737   if (!pc->setupcalled) {
738     if (!jac->ksp) {
739       PetscInt nestlevel;
740 
741       wasSetup = PETSC_FALSE;
742 
743       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
744       PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
745       PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
746       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
747       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
748       PetscCall(KSPSetType(ksp, KSPPREONLY));
749       PetscCall(PCGetOptionsPrefix(pc, &prefix));
750       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
751       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
752 
753       pc->ops->reset               = PCReset_BJacobi_Singleblock;
754       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
755       pc->ops->apply               = PCApply_BJacobi_Singleblock;
756       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
757       pc->ops->matapplytranspose   = PCMatApplyTranspose_BJacobi_Singleblock;
758       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
759       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
760       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
761       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
762 
763       PetscCall(PetscMalloc1(1, &jac->ksp));
764       jac->ksp[0] = ksp;
765 
766       PetscCall(PetscNew(&bjac));
767       jac->data = (void *)bjac;
768     } else {
769       ksp  = jac->ksp[0];
770       bjac = (PC_BJacobi_Singleblock *)jac->data;
771     }
772 
773     /*
774       The reason we need to generate these vectors is to serve
775       as the right-hand side and solution vector for the solve on the
776       block. We do not need to allocate space for the vectors since
777       that is provided via VecPlaceArray() just before the call to
778       KSPSolve() on the block.
779     */
780     PetscCall(MatGetSize(pmat, &m, &m));
781     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
782     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
783     PetscCall(MatGetVecType(pmat, &vectype));
784     PetscCall(VecSetType(bjac->x, vectype));
785     PetscCall(VecSetType(bjac->y, vectype));
786   } else {
787     ksp  = jac->ksp[0];
788     bjac = (PC_BJacobi_Singleblock *)jac->data;
789   }
790   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
791   if (pc->useAmat) {
792     PetscCall(KSPSetOperators(ksp, mat, pmat));
793     PetscCall(MatSetOptionsPrefix(mat, prefix));
794   } else {
795     PetscCall(KSPSetOperators(ksp, pmat, pmat));
796   }
797   PetscCall(MatSetOptionsPrefix(pmat, prefix));
798   if (!wasSetup && pc->setfromoptionscalled) {
799     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
800     PetscCall(KSPSetFromOptions(ksp));
801   }
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
806 {
807   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
808   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
809   PetscInt               i;
810 
811   PetscFunctionBegin;
812   if (bjac && bjac->pmat) {
813     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
814     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
815   }
816 
817   for (i = 0; i < jac->n_local; i++) {
818     PetscCall(KSPReset(jac->ksp[i]));
819     if (bjac && bjac->x) {
820       PetscCall(VecDestroy(&bjac->x[i]));
821       PetscCall(VecDestroy(&bjac->y[i]));
822       PetscCall(ISDestroy(&bjac->is[i]));
823     }
824   }
825   PetscCall(PetscFree(jac->l_lens));
826   PetscCall(PetscFree(jac->g_lens));
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
830 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
831 {
832   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
833   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
834   PetscInt               i;
835 
836   PetscFunctionBegin;
837   PetscCall(PCReset_BJacobi_Multiblock(pc));
838   if (bjac) {
839     PetscCall(PetscFree2(bjac->x, bjac->y));
840     PetscCall(PetscFree(bjac->starts));
841     PetscCall(PetscFree(bjac->is));
842   }
843   PetscCall(PetscFree(jac->data));
844   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
845   PetscCall(PetscFree(jac->ksp));
846   PetscCall(PCDestroy_BJacobi(pc));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
851 {
852   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
853   PetscInt           i, n_local = jac->n_local;
854   KSPConvergedReason reason;
855 
856   PetscFunctionBegin;
857   for (i = 0; i < n_local; i++) {
858     PetscCall(KSPSetUp(jac->ksp[i]));
859     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
860     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
861   }
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
865 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
866 {
867   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
868   PetscInt               i, n_local = jac->n_local;
869   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
870   PetscScalar           *yin;
871   const PetscScalar     *xin;
872 
873   PetscFunctionBegin;
874   PetscCall(VecGetArrayRead(x, &xin));
875   PetscCall(VecGetArray(y, &yin));
876   for (i = 0; i < n_local; i++) {
877     /*
878        To avoid copying the subvector from x into a workspace we instead
879        make the workspace vector array point to the subpart of the array of
880        the global vector.
881     */
882     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
883     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
884 
885     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
886     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
887     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
888     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
889 
890     PetscCall(VecResetArray(bjac->x[i]));
891     PetscCall(VecResetArray(bjac->y[i]));
892   }
893   PetscCall(VecRestoreArrayRead(x, &xin));
894   PetscCall(VecRestoreArray(y, &yin));
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
899 {
900   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
901   PetscInt               i, n_local = jac->n_local;
902   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
903   PetscScalar           *yin;
904   const PetscScalar     *xin;
905   PC                     subpc;
906 
907   PetscFunctionBegin;
908   PetscCall(VecGetArrayRead(x, &xin));
909   PetscCall(VecGetArray(y, &yin));
910   for (i = 0; i < n_local; i++) {
911     /*
912        To avoid copying the subvector from x into a workspace we instead
913        make the workspace vector array point to the subpart of the array of
914        the global vector.
915     */
916     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
917     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
918 
919     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
920     /* apply the symmetric left portion of the inner PC operator */
921     /* note this bypasses the inner KSP and its options completely */
922     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
923     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
924     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
925 
926     PetscCall(VecResetArray(bjac->x[i]));
927     PetscCall(VecResetArray(bjac->y[i]));
928   }
929   PetscCall(VecRestoreArrayRead(x, &xin));
930   PetscCall(VecRestoreArray(y, &yin));
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
935 {
936   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
937   PetscInt               i, n_local = jac->n_local;
938   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
939   PetscScalar           *yin;
940   const PetscScalar     *xin;
941   PC                     subpc;
942 
943   PetscFunctionBegin;
944   PetscCall(VecGetArrayRead(x, &xin));
945   PetscCall(VecGetArray(y, &yin));
946   for (i = 0; i < n_local; i++) {
947     /*
948        To avoid copying the subvector from x into a workspace we instead
949        make the workspace vector array point to the subpart of the array of
950        the global vector.
951     */
952     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
953     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
954 
955     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
956     /* apply the symmetric left portion of the inner PC operator */
957     /* note this bypasses the inner KSP and its options completely */
958     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
959     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
960     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
961 
962     PetscCall(VecResetArray(bjac->x[i]));
963     PetscCall(VecResetArray(bjac->y[i]));
964   }
965   PetscCall(VecRestoreArrayRead(x, &xin));
966   PetscCall(VecRestoreArray(y, &yin));
967   PetscFunctionReturn(PETSC_SUCCESS);
968 }
969 
970 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
971 {
972   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
973   PetscInt               i, n_local = jac->n_local;
974   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
975   PetscScalar           *yin;
976   const PetscScalar     *xin;
977 
978   PetscFunctionBegin;
979   PetscCall(VecGetArrayRead(x, &xin));
980   PetscCall(VecGetArray(y, &yin));
981   for (i = 0; i < n_local; i++) {
982     /*
983        To avoid copying the subvector from x into a workspace we instead
984        make the workspace vector array point to the subpart of the array of
985        the global vector.
986     */
987     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
988     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));
989 
990     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
991     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
992     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
993     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
994 
995     PetscCall(VecResetArray(bjac->x[i]));
996     PetscCall(VecResetArray(bjac->y[i]));
997   }
998   PetscCall(VecRestoreArrayRead(x, &xin));
999   PetscCall(VecRestoreArray(y, &yin));
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
1004 {
1005   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
1006   PetscInt               m, n_local, N, M, start, i;
1007   const char            *prefix;
1008   KSP                    ksp;
1009   Vec                    x, y;
1010   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1011   PC                     subpc;
1012   IS                     is;
1013   MatReuse               scall;
1014   VecType                vectype;
1015   MatNullSpace          *nullsp_mat = NULL, *nullsp_pmat = NULL;
1016 
1017   PetscFunctionBegin;
1018   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
1019 
1020   n_local = jac->n_local;
1021 
1022   if (pc->useAmat) {
1023     PetscBool same;
1024     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1025     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1026   }
1027 
1028   if (!pc->setupcalled) {
1029     PetscInt nestlevel;
1030 
1031     scall = MAT_INITIAL_MATRIX;
1032 
1033     if (!jac->ksp) {
1034       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1035       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1036       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1037       pc->ops->matapply            = NULL;
1038       pc->ops->matapplytranspose   = NULL;
1039       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1040       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1041       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1042       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;
1043 
1044       PetscCall(PetscNew(&bjac));
1045       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1046       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1047       PetscCall(PetscMalloc1(n_local, &bjac->starts));
1048 
1049       jac->data = (void *)bjac;
1050       PetscCall(PetscMalloc1(n_local, &bjac->is));
1051 
1052       for (i = 0; i < n_local; i++) {
1053         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1054         PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1055         PetscCall(KSPSetNestLevel(ksp, nestlevel + 1));
1056         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1057         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1058         PetscCall(KSPSetType(ksp, KSPPREONLY));
1059         PetscCall(KSPGetPC(ksp, &subpc));
1060         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1061         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1062         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
1063 
1064         jac->ksp[i] = ksp;
1065       }
1066     } else {
1067       bjac = (PC_BJacobi_Multiblock *)jac->data;
1068     }
1069 
1070     start = 0;
1071     PetscCall(MatGetVecType(pmat, &vectype));
1072     for (i = 0; i < n_local; i++) {
1073       m = jac->l_lens[i];
1074       /*
1075       The reason we need to generate these vectors is to serve
1076       as the right-hand side and solution vector for the solve on the
1077       block. We do not need to allocate space for the vectors since
1078       that is provided via VecPlaceArray() just before the call to
1079       KSPSolve() on the block.
1080 
1081       */
1082       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1083       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1084       PetscCall(VecSetType(x, vectype));
1085       PetscCall(VecSetType(y, vectype));
1086 
1087       bjac->x[i]      = x;
1088       bjac->y[i]      = y;
1089       bjac->starts[i] = start;
1090 
1091       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1092       bjac->is[i] = is;
1093 
1094       start += m;
1095     }
1096   } else {
1097     bjac = (PC_BJacobi_Multiblock *)jac->data;
1098     /*
1099        Destroy the blocks from the previous iteration
1100     */
1101     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1102       PetscCall(MatGetNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1103       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1104       if (pc->useAmat) {
1105         PetscCall(MatGetNullSpaces(n_local, bjac->mat, &nullsp_mat));
1106         PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1107       }
1108       scall = MAT_INITIAL_MATRIX;
1109     } else scall = MAT_REUSE_MATRIX;
1110   }
1111 
1112   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1113   if (nullsp_pmat) PetscCall(MatRestoreNullSpaces(n_local, bjac->pmat, &nullsp_pmat));
1114   if (pc->useAmat) {
1115     PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1116     if (nullsp_mat) PetscCall(MatRestoreNullSpaces(n_local, bjac->mat, &nullsp_mat));
1117   }
1118   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1119      different boundary conditions for the submatrices than for the global problem) */
1120   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));
1121 
1122   for (i = 0; i < n_local; i++) {
1123     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1124     if (pc->useAmat) {
1125       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1126       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1127     } else {
1128       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1129     }
1130     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1131     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1132   }
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 /*
1137       These are for a single block with multiple processes
1138 */
1139 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1140 {
1141   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1142   KSP                subksp = jac->ksp[0];
1143   KSPConvergedReason reason;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(KSPSetUp(subksp));
1147   PetscCall(KSPGetConvergedReason(subksp, &reason));
1148   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1153 {
1154   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1155   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(VecDestroy(&mpjac->ysub));
1159   PetscCall(VecDestroy(&mpjac->xsub));
1160   PetscCall(MatDestroy(&mpjac->submats));
1161   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1166 {
1167   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1168   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(PCReset_BJacobi_Multiproc(pc));
1172   PetscCall(KSPDestroy(&jac->ksp[0]));
1173   PetscCall(PetscFree(jac->ksp));
1174   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1175 
1176   PetscCall(PetscFree(mpjac));
1177   PetscCall(PCDestroy_BJacobi(pc));
1178   PetscFunctionReturn(PETSC_SUCCESS);
1179 }
1180 
1181 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1182 {
1183   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1184   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1185   PetscScalar          *yarray;
1186   const PetscScalar    *xarray;
1187   KSPConvergedReason    reason;
1188 
1189   PetscFunctionBegin;
1190   /* place x's and y's local arrays into xsub and ysub */
1191   PetscCall(VecGetArrayRead(x, &xarray));
1192   PetscCall(VecGetArray(y, &yarray));
1193   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1194   PetscCall(VecPlaceArray(mpjac->ysub, yarray));
1195 
1196   /* apply preconditioner on each matrix block */
1197   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1198   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1199   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1200   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1201   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1202   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1203 
1204   PetscCall(VecResetArray(mpjac->xsub));
1205   PetscCall(VecResetArray(mpjac->ysub));
1206   PetscCall(VecRestoreArrayRead(x, &xarray));
1207   PetscCall(VecRestoreArray(y, &yarray));
1208   PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210 
1211 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1212 {
1213   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1214   KSPConvergedReason reason;
1215   Mat                sX, sY;
1216   const PetscScalar *x;
1217   PetscScalar       *y;
1218   PetscInt           m, N, lda, ldb;
1219 
1220   PetscFunctionBegin;
1221   /* apply preconditioner on each matrix block */
1222   PetscCall(MatGetLocalSize(X, &m, NULL));
1223   PetscCall(MatGetSize(X, NULL, &N));
1224   PetscCall(MatDenseGetLDA(X, &lda));
1225   PetscCall(MatDenseGetLDA(Y, &ldb));
1226   PetscCall(MatDenseGetArrayRead(X, &x));
1227   PetscCall(MatDenseGetArrayWrite(Y, &y));
1228   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1229   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1230   PetscCall(MatDenseSetLDA(sX, lda));
1231   PetscCall(MatDenseSetLDA(sY, ldb));
1232   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1233   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1234   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1235   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1236   PetscCall(MatDestroy(&sY));
1237   PetscCall(MatDestroy(&sX));
1238   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1239   PetscCall(MatDenseRestoreArrayRead(X, &x));
1240   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1241   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1242   PetscFunctionReturn(PETSC_SUCCESS);
1243 }
1244 
1245 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1246 {
1247   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1248   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1249   PetscInt              m, n;
1250   MPI_Comm              comm, subcomm = 0;
1251   const char           *prefix;
1252   PetscBool             wasSetup = PETSC_TRUE;
1253   VecType               vectype;
1254 
1255   PetscFunctionBegin;
1256   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1257   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1258   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1259   if (!pc->setupcalled) {
1260     PetscInt nestlevel;
1261 
1262     wasSetup = PETSC_FALSE;
1263     PetscCall(PetscNew(&mpjac));
1264     jac->data = (void *)mpjac;
1265 
1266     /* initialize datastructure mpjac */
1267     if (!jac->psubcomm) {
1268       /* Create default contiguous subcommunicatiors if user does not provide them */
1269       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1270       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1271       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1272     }
1273     mpjac->psubcomm = jac->psubcomm;
1274     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1275 
1276     /* Get matrix blocks of pmat */
1277     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1278 
1279     /* create a new PC that processors in each subcomm have copy of */
1280     PetscCall(PetscMalloc1(1, &jac->ksp));
1281     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1282     PetscCall(PCGetKSPNestLevel(pc, &nestlevel));
1283     PetscCall(KSPSetNestLevel(jac->ksp[0], nestlevel + 1));
1284     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1285     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1286     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1287     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));
1288 
1289     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1290     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1291     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1292     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1293     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));
1294 
1295     /* create dummy vectors xsub and ysub */
1296     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1297     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1298     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1299     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1300     PetscCall(VecSetType(mpjac->xsub, vectype));
1301     PetscCall(VecSetType(mpjac->ysub, vectype));
1302 
1303     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1304     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1305     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1306     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1307     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1308   } else { /* pc->setupcalled */
1309     subcomm = PetscSubcommChild(mpjac->psubcomm);
1310     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1311       /* destroy old matrix blocks, then get new matrix blocks */
1312       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1313       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1314     } else {
1315       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1316     }
1317     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1318   }
1319 
1320   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1321   PetscFunctionReturn(PETSC_SUCCESS);
1322 }
1323