xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 89669be4d29968dc8d4c19ce1b69194a6a561ea4)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc)
13 {
14   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
15   Mat            mat  = pc->mat,pmat = pc->pmat;
16   PetscBool      hasop;
17   PetscInt       N,M,start,i,sum,end;
18   PetscInt       bs,i_start=-1,i_end=-1;
19   PetscMPIInt    rank,size;
20 
21   PetscFunctionBegin;
22   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
23   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size));
24   PetscCall(MatGetLocalSize(pc->pmat,&M,&N));
25   PetscCall(MatGetBlockSize(pc->pmat,&bs));
26 
27   if (jac->n > 0 && jac->n < size) {
28     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
29     PetscFunctionReturn(0);
30   }
31 
32   /* --------------------------------------------------------------------------
33       Determines the number of blocks assigned to each processor
34   -----------------------------------------------------------------------------*/
35 
36   /*   local block count  given */
37   if (jac->n_local > 0 && jac->n < 0) {
38     PetscCall(MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc)));
39     if (jac->l_lens) { /* check that user set these correctly */
40       sum = 0;
41       for (i=0; i<jac->n_local; i++) {
42         PetscCheck(jac->l_lens[i]/bs*bs ==jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
43         sum += jac->l_lens[i];
44       }
45       PetscCheck(sum == M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
46     } else {
47       PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
48       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
49     }
50   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
51     /* global blocks given: determine which ones are local */
52     if (jac->g_lens) {
53       /* check if the g_lens is has valid entries */
54       for (i=0; i<jac->n; i++) {
55         PetscCheck(jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
56         PetscCheck(jac->g_lens[i]/bs*bs == jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
57       }
58       if (size == 1) {
59         jac->n_local = jac->n;
60         PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
61         PetscCall(PetscArraycpy(jac->l_lens,jac->g_lens,jac->n_local));
62         /* check that user set these correctly */
63         sum = 0;
64         for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
65         PetscCheck(sum == M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
66       } else {
67         PetscCall(MatGetOwnershipRange(pc->pmat,&start,&end));
68         /* loop over blocks determing first one owned by me */
69         sum = 0;
70         for (i=0; i<jac->n+1; i++) {
71           if (sum == start) { i_start = i; goto start_1;}
72           if (i < jac->n) sum += jac->g_lens[i];
73         }
74         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
75 start_1:
76         for (i=i_start; i<jac->n+1; i++) {
77           if (sum == end) { i_end = i; goto end_1; }
78           if (i < jac->n) sum += jac->g_lens[i];
79         }
80         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
81 end_1:
82         jac->n_local = i_end - i_start;
83         PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
84         PetscCall(PetscArraycpy(jac->l_lens,jac->g_lens+i_start,jac->n_local));
85       }
86     } else { /* no global blocks given, determine then using default layout */
87       jac->n_local = jac->n/size + ((jac->n % size) > rank);
88       PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
89       for (i=0; i<jac->n_local; i++) {
90         jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
91         PetscCheck(jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
92       }
93     }
94   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
95     jac->n         = size;
96     jac->n_local   = 1;
97     PetscCall(PetscMalloc1(1,&jac->l_lens));
98     jac->l_lens[0] = M;
99   } else { /* jac->n > 0 && jac->n_local > 0 */
100     if (!jac->l_lens) {
101       PetscCall(PetscMalloc1(jac->n_local,&jac->l_lens));
102       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
103     }
104   }
105   PetscCheck(jac->n_local >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");
106 
107   /* -------------------------
108       Determines mat and pmat
109   ---------------------------*/
110   PetscCall(MatHasOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       PetscCall(MatGetDiagonalBlock(pc->mat,&mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       PetscCall(MatGetDiagonalBlock(pc->pmat,&pmat));
121     } else pmat = mat;
122   }
123 
124   /* ------
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     PetscCall(PCSetUp_BJacobi_Singleblock(pc,mat,pmat));
129   } else {
130     PetscCall(PCSetUp_BJacobi_Multiblock(pc,mat,pmat));
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscFree(jac->g_lens));
142   PetscCall(PetscFree(jac->l_lens));
143   PetscCall(PetscFree(pc->data));
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
148 {
149   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
150   PetscInt       blocks,i;
151   PetscBool      flg;
152 
153   PetscFunctionBegin;
154   PetscOptionsHeadBegin(PetscOptionsObject,"Block Jacobi options");
155   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg));
156   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc,blocks,NULL));
157   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks","Local number of blocks","PCBJacobiSetLocalBlocks",jac->n_local,&blocks,&flg));
158   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc,blocks,NULL));
159   if (jac->ksp) {
160     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
161      * unless we had already been called. */
162     for (i=0; i<jac->n_local; i++) {
163       PetscCall(KSPSetFromOptions(jac->ksp[i]));
164     }
165   }
166   PetscOptionsHeadEnd();
167   PetscFunctionReturn(0);
168 }
169 
170 #include <petscdraw.h>
171 static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
172 {
173   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
174   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
175   PetscMPIInt          rank;
176   PetscInt             i;
177   PetscBool            iascii,isstring,isdraw;
178   PetscViewer          sviewer;
179   PetscViewerFormat    format;
180   const char           *prefix;
181 
182   PetscFunctionBegin;
183   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
184   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring));
185   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
186   if (iascii) {
187     if (pc->useAmat) {
188       PetscCall(PetscViewerASCIIPrintf(viewer,"  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n",jac->n));
189     }
190     PetscCall(PetscViewerASCIIPrintf(viewer,"  number of blocks = %" PetscInt_FMT "\n",jac->n));
191     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
192     PetscCall(PetscViewerGetFormat(viewer,&format));
193     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
194       PetscCall(PetscViewerASCIIPrintf(viewer,"  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
195       PetscCall(PCGetOptionsPrefix(pc,&prefix));
196       PetscCall(PetscViewerASCIIPrintf(viewer,"  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n",prefix?prefix:""));
197       if (jac->ksp && !jac->psubcomm) {
198         PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
199         if (rank == 0) {
200           PetscCall(PetscViewerASCIIPushTab(viewer));
201           PetscCall(KSPView(jac->ksp[0],sviewer));
202           PetscCall(PetscViewerASCIIPopTab(viewer));
203         }
204         PetscCall(PetscViewerFlush(sviewer));
205         PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
206         PetscCall(PetscViewerFlush(viewer));
207         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
208         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
209       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
210         PetscCall(PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer));
211         if (!mpjac->psubcomm->color) {
212           PetscCall(PetscViewerASCIIPushTab(viewer));
213           PetscCall(KSPView(*(jac->ksp),sviewer));
214           PetscCall(PetscViewerASCIIPopTab(viewer));
215         }
216         PetscCall(PetscViewerFlush(sviewer));
217         PetscCall(PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer));
218         PetscCall(PetscViewerFlush(viewer));
219         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
220         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
221       } else {
222         PetscCall(PetscViewerFlush(viewer));
223       }
224     } else {
225       PetscInt n_global;
226       PetscCall(MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc)));
227       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
228       PetscCall(PetscViewerASCIIPrintf(viewer,"  Local solver information for each block is in the following KSP and PC objects:\n"));
229       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n",
230                                                    rank,jac->n_local,jac->first_local));
231       PetscCall(PetscViewerASCIIPushTab(viewer));
232       PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
233       for (i=0; i<jac->n_local; i++) {
234         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %" PetscInt_FMT "\n",rank,i));
235         PetscCall(KSPView(jac->ksp[i],sviewer));
236         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n"));
237       }
238       PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
239       PetscCall(PetscViewerASCIIPopTab(viewer));
240       PetscCall(PetscViewerFlush(viewer));
241       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
242     }
243   } else if (isstring) {
244     PetscCall(PetscViewerStringSPrintf(viewer," blks=%" PetscInt_FMT,jac->n));
245     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
246     if (jac->ksp) PetscCall(KSPView(jac->ksp[0],sviewer));
247     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
248   } else if (isdraw) {
249     PetscDraw draw;
250     char      str[25];
251     PetscReal x,y,bottom,h;
252 
253     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
254     PetscCall(PetscDrawGetCurrentPoint(draw,&x,&y));
255     PetscCall(PetscSNPrintf(str,25,"Number blocks %" PetscInt_FMT,jac->n));
256     PetscCall(PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h));
257     bottom = y - h;
258     PetscCall(PetscDrawPushCurrentPoint(draw,x,bottom));
259     /* warning the communicator on viewer is different then on ksp in parallel */
260     if (jac->ksp) PetscCall(KSPView(jac->ksp[0],viewer));
261     PetscCall(PetscDrawPopCurrentPoint(draw));
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* -------------------------------------------------------------------------------------*/
267 
268 static PetscErrorCode  PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
269 {
270   PC_BJacobi *jac = (PC_BJacobi*)pc->data;
271 
272   PetscFunctionBegin;
273   PetscCheck(pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");
274 
275   if (n_local) *n_local = jac->n_local;
276   if (first_local) *first_local = jac->first_local;
277   if (ksp) *ksp                 = jac->ksp;
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode  PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
282 {
283   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
284 
285   PetscFunctionBegin;
286   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks,PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
287   jac->n = blocks;
288   if (!lens) jac->g_lens = NULL;
289   else {
290     PetscCall(PetscMalloc1(blocks,&jac->g_lens));
291     PetscCall(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt)));
292     PetscCall(PetscArraycpy(jac->g_lens,lens,blocks));
293   }
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode  PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
298 {
299   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
300 
301   PetscFunctionBegin;
302   *blocks = jac->n;
303   if (lens) *lens = jac->g_lens;
304   PetscFunctionReturn(0);
305 }
306 
307 static PetscErrorCode  PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
308 {
309   PC_BJacobi     *jac;
310 
311   PetscFunctionBegin;
312   jac = (PC_BJacobi*)pc->data;
313 
314   jac->n_local = blocks;
315   if (!lens) jac->l_lens = NULL;
316   else {
317     PetscCall(PetscMalloc1(blocks,&jac->l_lens));
318     PetscCall(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt)));
319     PetscCall(PetscArraycpy(jac->l_lens,lens,blocks));
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 static PetscErrorCode  PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
325 {
326   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
327 
328   PetscFunctionBegin;
329   *blocks = jac->n_local;
330   if (lens) *lens = jac->l_lens;
331   PetscFunctionReturn(0);
332 }
333 
334 /* -------------------------------------------------------------------------------------*/
335 
336 /*@C
337    PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
338    this processor.
339 
340    Not Collective
341 
342    Input Parameter:
343 .  pc - the preconditioner context
344 
345    Output Parameters:
346 +  n_local - the number of blocks on this processor, or NULL
347 .  first_local - the global number of the first block on this processor, or NULL
348 -  ksp - the array of KSP contexts
349 
350    Notes:
351    After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
352 
353    Currently for some matrix implementations only 1 block per processor
354    is supported.
355 
356    You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
357 
358    Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
359       You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
360       KSP array must be.
361 
362    Level: advanced
363 
364 .seealso: `PCASMGetSubKSP()`
365 @*/
366 PetscErrorCode  PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
367 {
368   PetscFunctionBegin;
369   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
370   PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
371   PetscFunctionReturn(0);
372 }
373 
374 /*@
375    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
376    Jacobi preconditioner.
377 
378    Collective on PC
379 
380    Input Parameters:
381 +  pc - the preconditioner context
382 .  blocks - the number of blocks
383 -  lens - [optional] integer array containing the size of each block
384 
385    Options Database Key:
386 .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
387 
388    Notes:
389    Currently only a limited number of blocking configurations are supported.
390    All processors sharing the PC must call this routine with the same data.
391 
392    Level: intermediate
393 
394 .seealso: `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
395 @*/
396 PetscErrorCode  PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
397 {
398   PetscFunctionBegin;
399   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
400   PetscCheck(blocks > 0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
401   PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens));
402   PetscFunctionReturn(0);
403 }
404 
405 /*@C
406    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
407    Jacobi preconditioner.
408 
409    Not Collective
410 
411    Input Parameter:
412 .  pc - the preconditioner context
413 
414    Output parameters:
415 +  blocks - the number of blocks
416 -  lens - integer array containing the size of each block
417 
418    Level: intermediate
419 
420 .seealso: `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
421 @*/
422 PetscErrorCode  PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
423 {
424   PetscFunctionBegin;
425   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
426   PetscValidIntPointer(blocks,2);
427   PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
428   PetscFunctionReturn(0);
429 }
430 
431 /*@
432    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
433    Jacobi preconditioner.
434 
435    Not Collective
436 
437    Input Parameters:
438 +  pc - the preconditioner context
439 .  blocks - the number of blocks
440 -  lens - [optional] integer array containing size of each block
441 
442    Options Database Key:
443 .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
444 
445    Note:
446    Currently only a limited number of blocking configurations are supported.
447 
448    Level: intermediate
449 
450 .seealso: `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
451 @*/
452 PetscErrorCode  PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
453 {
454   PetscFunctionBegin;
455   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
456   PetscCheck(blocks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
457   PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens));
458   PetscFunctionReturn(0);
459 }
460 
461 /*@C
462    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
463    Jacobi preconditioner.
464 
465    Not Collective
466 
467    Input Parameters:
468 +  pc - the preconditioner context
469 .  blocks - the number of blocks
470 -  lens - [optional] integer array containing size of each block
471 
472    Note:
473    Currently only a limited number of blocking configurations are supported.
474 
475    Level: intermediate
476 
477 .seealso: `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
478 @*/
479 PetscErrorCode  PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
480 {
481   PetscFunctionBegin;
482   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
483   PetscValidIntPointer(blocks,2);
484   PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
485   PetscFunctionReturn(0);
486 }
487 
488 /* -----------------------------------------------------------------------------------*/
489 
490 /*MC
491    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
492            its own KSP object.
493 
494    Options Database Keys:
495 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
496 -  -pc_bjacobi_blocks <n> - use n total blocks
497 
498    Notes:
499     See PCJACOBI for diagonal Jacobi, PCVPBJACOBI for variable point block, and PCPBJACOBI for fixed size point block
500 
501     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
502 
503      To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
504         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
505 
506      To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
507          and set the options directly on the resulting KSP object (you can access its PC
508          KSPGetPC())
509 
510      For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best
511          performance.  Different block partitioning may lead to additional data transfers
512          between host and GPU that lead to degraded performance.
513 
514      The options prefix for each block is sub_, for example -sub_pc_type lu.
515 
516      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
517 
518      See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCPBJACOBI for large blocks
519 
520    Level: beginner
521 
522 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
523           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
524           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
525 M*/
526 
527 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
528 {
529   PetscMPIInt    rank;
530   PC_BJacobi     *jac;
531 
532   PetscFunctionBegin;
533   PetscCall(PetscNewLog(pc,&jac));
534   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
535 
536   pc->ops->apply           = NULL;
537   pc->ops->matapply        = NULL;
538   pc->ops->applytranspose  = NULL;
539   pc->ops->setup           = PCSetUp_BJacobi;
540   pc->ops->destroy         = PCDestroy_BJacobi;
541   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
542   pc->ops->view            = PCView_BJacobi;
543   pc->ops->applyrichardson = NULL;
544 
545   pc->data               = (void*)jac;
546   jac->n                 = -1;
547   jac->n_local           = -1;
548   jac->first_local       = rank;
549   jac->ksp               = NULL;
550   jac->g_lens            = NULL;
551   jac->l_lens            = NULL;
552   jac->psubcomm          = NULL;
553 
554   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi));
555   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi));
556   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi));
557   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi));
558   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi));
559   PetscFunctionReturn(0);
560 }
561 
562 /* --------------------------------------------------------------------------------------------*/
563 /*
564         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
565 */
566 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
567 {
568   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
569   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
570 
571   PetscFunctionBegin;
572   PetscCall(KSPReset(jac->ksp[0]));
573   PetscCall(VecDestroy(&bjac->x));
574   PetscCall(VecDestroy(&bjac->y));
575   PetscFunctionReturn(0);
576 }
577 
578 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
579 {
580   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
581   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
582 
583   PetscFunctionBegin;
584   PetscCall(PCReset_BJacobi_Singleblock(pc));
585   PetscCall(KSPDestroy(&jac->ksp[0]));
586   PetscCall(PetscFree(jac->ksp));
587   PetscCall(PetscFree(jac->l_lens));
588   PetscCall(PetscFree(jac->g_lens));
589   PetscCall(PetscFree(bjac));
590   PetscCall(PetscFree(pc->data));
591   PetscFunctionReturn(0);
592 }
593 
594 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
595 {
596   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
597   KSP                subksp = jac->ksp[0];
598   KSPConvergedReason reason;
599 
600   PetscFunctionBegin;
601   PetscCall(KSPSetUp(subksp));
602   PetscCall(KSPGetConvergedReason(subksp,&reason));
603   if (reason == KSP_DIVERGED_PC_FAILED) {
604     pc->failedreason = PC_SUBPC_ERROR;
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
610 {
611   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
612   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
613 
614   PetscFunctionBegin;
615   PetscCall(VecGetLocalVectorRead(x, bjac->x));
616   PetscCall(VecGetLocalVector(y, bjac->y));
617   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
618      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
619      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
620   PetscCall(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner));
621   PetscCall(KSPSolve(jac->ksp[0],bjac->x,bjac->y));
622   PetscCall(KSPCheckSolve(jac->ksp[0],pc,bjac->y));
623   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
624   PetscCall(VecRestoreLocalVector(y, bjac->y));
625   PetscFunctionReturn(0);
626 }
627 
628 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc,Mat X,Mat Y)
629 {
630   PC_BJacobi     *jac  = (PC_BJacobi*)pc->data;
631   Mat            sX,sY;
632 
633   PetscFunctionBegin;
634   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
635      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
636      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
637   PetscCall(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner));
638   PetscCall(MatDenseGetLocalMatrix(X,&sX));
639   PetscCall(MatDenseGetLocalMatrix(Y,&sY));
640   PetscCall(KSPMatSolve(jac->ksp[0],sX,sY));
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
645 {
646   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
647   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
648   PetscScalar            *y_array;
649   const PetscScalar      *x_array;
650   PC                     subpc;
651 
652   PetscFunctionBegin;
653   /*
654       The VecPlaceArray() is to avoid having to copy the
655     y vector into the bjac->x vector. The reason for
656     the bjac->x vector is that we need a sequential vector
657     for the sequential solve.
658   */
659   PetscCall(VecGetArrayRead(x,&x_array));
660   PetscCall(VecGetArray(y,&y_array));
661   PetscCall(VecPlaceArray(bjac->x,x_array));
662   PetscCall(VecPlaceArray(bjac->y,y_array));
663   /* apply the symmetric left portion of the inner PC operator */
664   /* note this by-passes the inner KSP and its options completely */
665   PetscCall(KSPGetPC(jac->ksp[0],&subpc));
666   PetscCall(PCApplySymmetricLeft(subpc,bjac->x,bjac->y));
667   PetscCall(VecResetArray(bjac->x));
668   PetscCall(VecResetArray(bjac->y));
669   PetscCall(VecRestoreArrayRead(x,&x_array));
670   PetscCall(VecRestoreArray(y,&y_array));
671   PetscFunctionReturn(0);
672 }
673 
674 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
675 {
676   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
677   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
678   PetscScalar            *y_array;
679   const PetscScalar      *x_array;
680   PC                     subpc;
681 
682   PetscFunctionBegin;
683   /*
684       The VecPlaceArray() is to avoid having to copy the
685     y vector into the bjac->x vector. The reason for
686     the bjac->x vector is that we need a sequential vector
687     for the sequential solve.
688   */
689   PetscCall(VecGetArrayRead(x,&x_array));
690   PetscCall(VecGetArray(y,&y_array));
691   PetscCall(VecPlaceArray(bjac->x,x_array));
692   PetscCall(VecPlaceArray(bjac->y,y_array));
693 
694   /* apply the symmetric right portion of the inner PC operator */
695   /* note this by-passes the inner KSP and its options completely */
696 
697   PetscCall(KSPGetPC(jac->ksp[0],&subpc));
698   PetscCall(PCApplySymmetricRight(subpc,bjac->x,bjac->y));
699 
700   PetscCall(VecRestoreArrayRead(x,&x_array));
701   PetscCall(VecRestoreArray(y,&y_array));
702   PetscFunctionReturn(0);
703 }
704 
705 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
706 {
707   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
708   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
709   PetscScalar            *y_array;
710   const PetscScalar      *x_array;
711 
712   PetscFunctionBegin;
713   /*
714       The VecPlaceArray() is to avoid having to copy the
715     y vector into the bjac->x vector. The reason for
716     the bjac->x vector is that we need a sequential vector
717     for the sequential solve.
718   */
719   PetscCall(VecGetArrayRead(x,&x_array));
720   PetscCall(VecGetArray(y,&y_array));
721   PetscCall(VecPlaceArray(bjac->x,x_array));
722   PetscCall(VecPlaceArray(bjac->y,y_array));
723   PetscCall(KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y));
724   PetscCall(KSPCheckSolve(jac->ksp[0],pc,bjac->y));
725   PetscCall(VecResetArray(bjac->x));
726   PetscCall(VecResetArray(bjac->y));
727   PetscCall(VecRestoreArrayRead(x,&x_array));
728   PetscCall(VecRestoreArray(y,&y_array));
729   PetscFunctionReturn(0);
730 }
731 
732 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
733 {
734   PC_BJacobi             *jac = (PC_BJacobi*)pc->data;
735   PetscInt               m;
736   KSP                    ksp;
737   PC_BJacobi_Singleblock *bjac;
738   PetscBool              wasSetup = PETSC_TRUE;
739   VecType                vectype;
740   const char             *prefix;
741 
742   PetscFunctionBegin;
743   if (!pc->setupcalled) {
744     if (!jac->ksp) {
745       wasSetup = PETSC_FALSE;
746 
747       PetscCall(KSPCreate(PETSC_COMM_SELF,&ksp));
748       PetscCall(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure));
749       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1));
750       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp));
751       PetscCall(KSPSetType(ksp,KSPPREONLY));
752       PetscCall(PCGetOptionsPrefix(pc,&prefix));
753       PetscCall(KSPSetOptionsPrefix(ksp,prefix));
754       PetscCall(KSPAppendOptionsPrefix(ksp,"sub_"));
755 
756       pc->ops->reset               = PCReset_BJacobi_Singleblock;
757       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
758       pc->ops->apply               = PCApply_BJacobi_Singleblock;
759       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
760       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
761       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
762       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
763       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
764 
765       PetscCall(PetscMalloc1(1,&jac->ksp));
766       jac->ksp[0] = ksp;
767 
768       PetscCall(PetscNewLog(pc,&bjac));
769       jac->data = (void*)bjac;
770     } else {
771       ksp  = jac->ksp[0];
772       bjac = (PC_BJacobi_Singleblock*)jac->data;
773     }
774 
775     /*
776       The reason we need to generate these vectors is to serve
777       as the right-hand side and solution vector for the solve on the
778       block. We do not need to allocate space for the vectors since
779       that is provided via VecPlaceArray() just before the call to
780       KSPSolve() on the block.
781     */
782     PetscCall(MatGetSize(pmat,&m,&m));
783     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x));
784     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y));
785     PetscCall(MatGetVecType(pmat,&vectype));
786     PetscCall(VecSetType(bjac->x,vectype));
787     PetscCall(VecSetType(bjac->y,vectype));
788     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x));
789     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y));
790   } else {
791     ksp  = jac->ksp[0];
792     bjac = (PC_BJacobi_Singleblock*)jac->data;
793   }
794   PetscCall(KSPGetOptionsPrefix(ksp,&prefix));
795   if (pc->useAmat) {
796     PetscCall(KSPSetOperators(ksp,mat,pmat));
797     PetscCall(MatSetOptionsPrefix(mat,prefix));
798   } else {
799     PetscCall(KSPSetOperators(ksp,pmat,pmat));
800   }
801   PetscCall(MatSetOptionsPrefix(pmat,prefix));
802   if (!wasSetup && pc->setfromoptionscalled) {
803     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
804     PetscCall(KSPSetFromOptions(ksp));
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 /* ---------------------------------------------------------------------------------------------*/
810 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
811 {
812   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
813   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
814   PetscInt              i;
815 
816   PetscFunctionBegin;
817   if (bjac && bjac->pmat) {
818     PetscCall(MatDestroyMatrices(jac->n_local,&bjac->pmat));
819     if (pc->useAmat) {
820       PetscCall(MatDestroyMatrices(jac->n_local,&bjac->mat));
821     }
822   }
823 
824   for (i=0; i<jac->n_local; i++) {
825     PetscCall(KSPReset(jac->ksp[i]));
826     if (bjac && bjac->x) {
827       PetscCall(VecDestroy(&bjac->x[i]));
828       PetscCall(VecDestroy(&bjac->y[i]));
829       PetscCall(ISDestroy(&bjac->is[i]));
830     }
831   }
832   PetscCall(PetscFree(jac->l_lens));
833   PetscCall(PetscFree(jac->g_lens));
834   PetscFunctionReturn(0);
835 }
836 
837 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
838 {
839   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
840   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
841   PetscInt              i;
842 
843   PetscFunctionBegin;
844   PetscCall(PCReset_BJacobi_Multiblock(pc));
845   if (bjac) {
846     PetscCall(PetscFree2(bjac->x,bjac->y));
847     PetscCall(PetscFree(bjac->starts));
848     PetscCall(PetscFree(bjac->is));
849   }
850   PetscCall(PetscFree(jac->data));
851   for (i=0; i<jac->n_local; i++) {
852     PetscCall(KSPDestroy(&jac->ksp[i]));
853   }
854   PetscCall(PetscFree(jac->ksp));
855   PetscCall(PetscFree(pc->data));
856   PetscFunctionReturn(0);
857 }
858 
859 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
860 {
861   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
862   PetscInt           i,n_local = jac->n_local;
863   KSPConvergedReason reason;
864 
865   PetscFunctionBegin;
866   for (i=0; i<n_local; i++) {
867     PetscCall(KSPSetUp(jac->ksp[i]));
868     PetscCall(KSPGetConvergedReason(jac->ksp[i],&reason));
869     if (reason == KSP_DIVERGED_PC_FAILED) {
870       pc->failedreason = PC_SUBPC_ERROR;
871     }
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 /*
877       Preconditioner for block Jacobi
878 */
879 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
880 {
881   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
882   PetscInt              i,n_local = jac->n_local;
883   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
884   PetscScalar           *yin;
885   const PetscScalar     *xin;
886 
887   PetscFunctionBegin;
888   PetscCall(VecGetArrayRead(x,&xin));
889   PetscCall(VecGetArray(y,&yin));
890   for (i=0; i<n_local; i++) {
891     /*
892        To avoid copying the subvector from x into a workspace we instead
893        make the workspace vector array point to the subpart of the array of
894        the global vector.
895     */
896     PetscCall(VecPlaceArray(bjac->x[i],xin+bjac->starts[i]));
897     PetscCall(VecPlaceArray(bjac->y[i],yin+bjac->starts[i]));
898 
899     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
900     PetscCall(KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]));
901     PetscCall(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]));
902     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
903 
904     PetscCall(VecResetArray(bjac->x[i]));
905     PetscCall(VecResetArray(bjac->y[i]));
906   }
907   PetscCall(VecRestoreArrayRead(x,&xin));
908   PetscCall(VecRestoreArray(y,&yin));
909   PetscFunctionReturn(0);
910 }
911 
912 /*
913       Preconditioner for block Jacobi
914 */
915 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
916 {
917   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
918   PetscInt              i,n_local = jac->n_local;
919   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
920   PetscScalar           *yin;
921   const PetscScalar     *xin;
922 
923   PetscFunctionBegin;
924   PetscCall(VecGetArrayRead(x,&xin));
925   PetscCall(VecGetArray(y,&yin));
926   for (i=0; i<n_local; i++) {
927     /*
928        To avoid copying the subvector from x into a workspace we instead
929        make the workspace vector array point to the subpart of the array of
930        the global vector.
931     */
932     PetscCall(VecPlaceArray(bjac->x[i],xin+bjac->starts[i]));
933     PetscCall(VecPlaceArray(bjac->y[i],yin+bjac->starts[i]));
934 
935     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
936     PetscCall(KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]));
937     PetscCall(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]));
938     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
939 
940     PetscCall(VecResetArray(bjac->x[i]));
941     PetscCall(VecResetArray(bjac->y[i]));
942   }
943   PetscCall(VecRestoreArrayRead(x,&xin));
944   PetscCall(VecRestoreArray(y,&yin));
945   PetscFunctionReturn(0);
946 }
947 
948 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
949 {
950   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
951   PetscInt              m,n_local,N,M,start,i;
952   const char            *prefix;
953   KSP                   ksp;
954   Vec                   x,y;
955   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
956   PC                    subpc;
957   IS                    is;
958   MatReuse              scall;
959   VecType               vectype;
960 
961   PetscFunctionBegin;
962   PetscCall(MatGetLocalSize(pc->pmat,&M,&N));
963 
964   n_local = jac->n_local;
965 
966   if (pc->useAmat) {
967     PetscBool same;
968     PetscCall(PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same));
969     PetscCheck(same,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
970   }
971 
972   if (!pc->setupcalled) {
973     scall = MAT_INITIAL_MATRIX;
974 
975     if (!jac->ksp) {
976       pc->ops->reset         = PCReset_BJacobi_Multiblock;
977       pc->ops->destroy       = PCDestroy_BJacobi_Multiblock;
978       pc->ops->apply         = PCApply_BJacobi_Multiblock;
979       pc->ops->matapply      = NULL;
980       pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
981       pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;
982 
983       PetscCall(PetscNewLog(pc,&bjac));
984       PetscCall(PetscMalloc1(n_local,&jac->ksp));
985       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP))));
986       PetscCall(PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y));
987       PetscCall(PetscMalloc1(n_local,&bjac->starts));
988       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar))));
989 
990       jac->data = (void*)bjac;
991       PetscCall(PetscMalloc1(n_local,&bjac->is));
992       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS))));
993 
994       for (i=0; i<n_local; i++) {
995         PetscCall(KSPCreate(PETSC_COMM_SELF,&ksp));
996         PetscCall(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure));
997         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1));
998         PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp));
999         PetscCall(KSPSetType(ksp,KSPPREONLY));
1000         PetscCall(KSPGetPC(ksp,&subpc));
1001         PetscCall(PCGetOptionsPrefix(pc,&prefix));
1002         PetscCall(KSPSetOptionsPrefix(ksp,prefix));
1003         PetscCall(KSPAppendOptionsPrefix(ksp,"sub_"));
1004 
1005         jac->ksp[i] = ksp;
1006       }
1007     } else {
1008       bjac = (PC_BJacobi_Multiblock*)jac->data;
1009     }
1010 
1011     start = 0;
1012     PetscCall(MatGetVecType(pmat,&vectype));
1013     for (i=0; i<n_local; i++) {
1014       m = jac->l_lens[i];
1015       /*
1016       The reason we need to generate these vectors is to serve
1017       as the right-hand side and solution vector for the solve on the
1018       block. We do not need to allocate space for the vectors since
1019       that is provided via VecPlaceArray() just before the call to
1020       KSPSolve() on the block.
1021 
1022       */
1023       PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&x));
1024       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y));
1025       PetscCall(VecSetType(x,vectype));
1026       PetscCall(VecSetType(y,vectype));
1027       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)x));
1028       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)y));
1029 
1030       bjac->x[i]      = x;
1031       bjac->y[i]      = y;
1032       bjac->starts[i] = start;
1033 
1034       PetscCall(ISCreateStride(PETSC_COMM_SELF,m,start,1,&is));
1035       bjac->is[i] = is;
1036       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)is));
1037 
1038       start += m;
1039     }
1040   } else {
1041     bjac = (PC_BJacobi_Multiblock*)jac->data;
1042     /*
1043        Destroy the blocks from the previous iteration
1044     */
1045     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1046       PetscCall(MatDestroyMatrices(n_local,&bjac->pmat));
1047       if (pc->useAmat) {
1048         PetscCall(MatDestroyMatrices(n_local,&bjac->mat));
1049       }
1050       scall = MAT_INITIAL_MATRIX;
1051     } else scall = MAT_REUSE_MATRIX;
1052   }
1053 
1054   PetscCall(MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat));
1055   if (pc->useAmat) {
1056     PetscCall(MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat));
1057   }
1058   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1059      different boundary conditions for the submatrices than for the global problem) */
1060   PetscCall(PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP));
1061 
1062   for (i=0; i<n_local; i++) {
1063     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]));
1064     PetscCall(KSPGetOptionsPrefix(jac->ksp[i],&prefix));
1065     if (pc->useAmat) {
1066       PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]));
1067       PetscCall(KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]));
1068       PetscCall(MatSetOptionsPrefix(bjac->mat[i],prefix));
1069     } else {
1070       PetscCall(KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]));
1071     }
1072     PetscCall(MatSetOptionsPrefix(bjac->pmat[i],prefix));
1073     if (pc->setfromoptionscalled) {
1074       PetscCall(KSPSetFromOptions(jac->ksp[i]));
1075     }
1076   }
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /* ---------------------------------------------------------------------------------------------*/
1081 /*
1082       These are for a single block with multiple processes
1083 */
1084 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1085 {
1086   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
1087   KSP                subksp = jac->ksp[0];
1088   KSPConvergedReason reason;
1089 
1090   PetscFunctionBegin;
1091   PetscCall(KSPSetUp(subksp));
1092   PetscCall(KSPGetConvergedReason(subksp,&reason));
1093   if (reason == KSP_DIVERGED_PC_FAILED) {
1094     pc->failedreason = PC_SUBPC_ERROR;
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1100 {
1101   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1102   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(VecDestroy(&mpjac->ysub));
1106   PetscCall(VecDestroy(&mpjac->xsub));
1107   PetscCall(MatDestroy(&mpjac->submats));
1108   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1113 {
1114   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1115   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1116 
1117   PetscFunctionBegin;
1118   PetscCall(PCReset_BJacobi_Multiproc(pc));
1119   PetscCall(KSPDestroy(&jac->ksp[0]));
1120   PetscCall(PetscFree(jac->ksp));
1121   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));
1122 
1123   PetscCall(PetscFree(mpjac));
1124   PetscCall(PetscFree(pc->data));
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1129 {
1130   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1131   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1132   PetscScalar          *yarray;
1133   const PetscScalar    *xarray;
1134   KSPConvergedReason   reason;
1135 
1136   PetscFunctionBegin;
1137   /* place x's and y's local arrays into xsub and ysub */
1138   PetscCall(VecGetArrayRead(x,&xarray));
1139   PetscCall(VecGetArray(y,&yarray));
1140   PetscCall(VecPlaceArray(mpjac->xsub,xarray));
1141   PetscCall(VecPlaceArray(mpjac->ysub,yarray));
1142 
1143   /* apply preconditioner on each matrix block */
1144   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0));
1145   PetscCall(KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub));
1146   PetscCall(KSPCheckSolve(jac->ksp[0],pc,mpjac->ysub));
1147   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0));
1148   PetscCall(KSPGetConvergedReason(jac->ksp[0],&reason));
1149   if (reason == KSP_DIVERGED_PC_FAILED) {
1150     pc->failedreason = PC_SUBPC_ERROR;
1151   }
1152 
1153   PetscCall(VecResetArray(mpjac->xsub));
1154   PetscCall(VecResetArray(mpjac->ysub));
1155   PetscCall(VecRestoreArrayRead(x,&xarray));
1156   PetscCall(VecRestoreArray(y,&yarray));
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc,Mat X,Mat Y)
1161 {
1162   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1163   KSPConvergedReason   reason;
1164   Mat                  sX,sY;
1165   const PetscScalar    *x;
1166   PetscScalar          *y;
1167   PetscInt             m,N,lda,ldb;
1168 
1169   PetscFunctionBegin;
1170   /* apply preconditioner on each matrix block */
1171   PetscCall(MatGetLocalSize(X,&m,NULL));
1172   PetscCall(MatGetSize(X,NULL,&N));
1173   PetscCall(MatDenseGetLDA(X,&lda));
1174   PetscCall(MatDenseGetLDA(Y,&ldb));
1175   PetscCall(MatDenseGetArrayRead(X,&x));
1176   PetscCall(MatDenseGetArrayWrite(Y,&y));
1177   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,(PetscScalar*)x,&sX));
1178   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,y,&sY));
1179   PetscCall(MatDenseSetLDA(sX,lda));
1180   PetscCall(MatDenseSetLDA(sY,ldb));
1181   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0));
1182   PetscCall(KSPMatSolve(jac->ksp[0],sX,sY));
1183   PetscCall(KSPCheckSolve(jac->ksp[0],pc,NULL));
1184   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0));
1185   PetscCall(MatDestroy(&sY));
1186   PetscCall(MatDestroy(&sX));
1187   PetscCall(MatDenseRestoreArrayWrite(Y,&y));
1188   PetscCall(MatDenseRestoreArrayRead(X,&x));
1189   PetscCall(KSPGetConvergedReason(jac->ksp[0],&reason));
1190   if (reason == KSP_DIVERGED_PC_FAILED) {
1191     pc->failedreason = PC_SUBPC_ERROR;
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1197 {
1198   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1199   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1200   PetscInt             m,n;
1201   MPI_Comm             comm,subcomm=0;
1202   const char           *prefix;
1203   PetscBool            wasSetup = PETSC_TRUE;
1204   VecType              vectype;
1205 
1206   PetscFunctionBegin;
1207   PetscCall(PetscObjectGetComm((PetscObject)pc,&comm));
1208   PetscCheck(jac->n_local <= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1209   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1210   if (!pc->setupcalled) {
1211     wasSetup  = PETSC_FALSE;
1212     PetscCall(PetscNewLog(pc,&mpjac));
1213     jac->data = (void*)mpjac;
1214 
1215     /* initialize datastructure mpjac */
1216     if (!jac->psubcomm) {
1217       /* Create default contiguous subcommunicatiors if user does not provide them */
1218       PetscCall(PetscSubcommCreate(comm,&jac->psubcomm));
1219       PetscCall(PetscSubcommSetNumber(jac->psubcomm,jac->n));
1220       PetscCall(PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS));
1221       PetscCall(PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm)));
1222     }
1223     mpjac->psubcomm = jac->psubcomm;
1224     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1225 
1226     /* Get matrix blocks of pmat */
1227     PetscCall(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats));
1228 
1229     /* create a new PC that processors in each subcomm have copy of */
1230     PetscCall(PetscMalloc1(1,&jac->ksp));
1231     PetscCall(KSPCreate(subcomm,&jac->ksp[0]));
1232     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure));
1233     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1));
1234     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]));
1235     PetscCall(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats));
1236     PetscCall(KSPGetPC(jac->ksp[0],&mpjac->pc));
1237 
1238     PetscCall(PCGetOptionsPrefix(pc,&prefix));
1239     PetscCall(KSPSetOptionsPrefix(jac->ksp[0],prefix));
1240     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0],"sub_"));
1241     PetscCall(KSPGetOptionsPrefix(jac->ksp[0],&prefix));
1242     PetscCall(MatSetOptionsPrefix(mpjac->submats,prefix));
1243 
1244     /* create dummy vectors xsub and ysub */
1245     PetscCall(MatGetLocalSize(mpjac->submats,&m,&n));
1246     PetscCall(VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub));
1247     PetscCall(VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub));
1248     PetscCall(MatGetVecType(mpjac->submats,&vectype));
1249     PetscCall(VecSetType(mpjac->xsub,vectype));
1250     PetscCall(VecSetType(mpjac->ysub,vectype));
1251     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub));
1252     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub));
1253 
1254     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1255     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1256     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1257     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1258     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1259   } else { /* pc->setupcalled */
1260     subcomm = PetscSubcommChild(mpjac->psubcomm);
1261     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1262       /* destroy old matrix blocks, then get new matrix blocks */
1263       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1264       PetscCall(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats));
1265     } else {
1266       PetscCall(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats));
1267     }
1268     PetscCall(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats));
1269   }
1270 
1271   if (!wasSetup && pc->setfromoptionscalled) {
1272     PetscCall(KSPSetFromOptions(jac->ksp[0]));
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276