xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc)
13 {
14   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
15   Mat            mat  = pc->mat,pmat = pc->pmat;
16   PetscBool      hasop;
17   PetscInt       N,M,start,i,sum,end;
18   PetscInt       bs,i_start=-1,i_end=-1;
19   PetscMPIInt    rank,size;
20 
21   PetscFunctionBegin;
22   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
23   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size));
24   CHKERRQ(MatGetLocalSize(pc->pmat,&M,&N));
25   CHKERRQ(MatGetBlockSize(pc->pmat,&bs));
26 
27   if (jac->n > 0 && jac->n < size) {
28     CHKERRQ(PCSetUp_BJacobi_Multiproc(pc));
29     PetscFunctionReturn(0);
30   }
31 
32   /* --------------------------------------------------------------------------
33       Determines the number of blocks assigned to each processor
34   -----------------------------------------------------------------------------*/
35 
36   /*   local block count  given */
37   if (jac->n_local > 0 && jac->n < 0) {
38     CHKERRMPI(MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc)));
39     if (jac->l_lens) { /* check that user set these correctly */
40       sum = 0;
41       for (i=0; i<jac->n_local; i++) {
42         PetscCheckFalse(jac->l_lens[i]/bs*bs !=jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
43         sum += jac->l_lens[i];
44       }
45       PetscCheckFalse(sum != M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
46     } else {
47       CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens));
48       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
49     }
50   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
51     /* global blocks given: determine which ones are local */
52     if (jac->g_lens) {
53       /* check if the g_lens is has valid entries */
54       for (i=0; i<jac->n; i++) {
55         PetscCheckFalse(!jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
56         PetscCheckFalse(jac->g_lens[i]/bs*bs != jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
57       }
58       if (size == 1) {
59         jac->n_local = jac->n;
60         CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens));
61         CHKERRQ(PetscArraycpy(jac->l_lens,jac->g_lens,jac->n_local));
62         /* check that user set these correctly */
63         sum = 0;
64         for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
65         PetscCheckFalse(sum != M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
66       } else {
67         CHKERRQ(MatGetOwnershipRange(pc->pmat,&start,&end));
68         /* loop over blocks determing first one owned by me */
69         sum = 0;
70         for (i=0; i<jac->n+1; i++) {
71           if (sum == start) { i_start = i; goto start_1;}
72           if (i < jac->n) sum += jac->g_lens[i];
73         }
74         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
75 start_1:
76         for (i=i_start; i<jac->n+1; i++) {
77           if (sum == end) { i_end = i; goto end_1; }
78           if (i < jac->n) sum += jac->g_lens[i];
79         }
80         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
81 end_1:
82         jac->n_local = i_end - i_start;
83         CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens));
84         CHKERRQ(PetscArraycpy(jac->l_lens,jac->g_lens+i_start,jac->n_local));
85       }
86     } else { /* no global blocks given, determine then using default layout */
87       jac->n_local = jac->n/size + ((jac->n % size) > rank);
88       CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens));
89       for (i=0; i<jac->n_local; i++) {
90         jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
91         PetscCheckFalse(!jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
92       }
93     }
94   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
95     jac->n         = size;
96     jac->n_local   = 1;
97     CHKERRQ(PetscMalloc1(1,&jac->l_lens));
98     jac->l_lens[0] = M;
99   } else { /* jac->n > 0 && jac->n_local > 0 */
100     if (!jac->l_lens) {
101       CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens));
102       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
103     }
104   }
105   PetscCheckFalse(jac->n_local < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");
106 
107   /* -------------------------
108       Determines mat and pmat
109   ---------------------------*/
110   CHKERRQ(MatHasOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&hasop));
111   if (!hasop && size == 1) {
112     mat  = pc->mat;
113     pmat = pc->pmat;
114   } else {
115     if (pc->useAmat) {
116       /* use block from Amat matrix, not Pmat for local MatMult() */
117       CHKERRQ(MatGetDiagonalBlock(pc->mat,&mat));
118     }
119     if (pc->pmat != pc->mat || !pc->useAmat) {
120       CHKERRQ(MatGetDiagonalBlock(pc->pmat,&pmat));
121     } else pmat = mat;
122   }
123 
124   /* ------
125      Setup code depends on the number of blocks
126   */
127   if (jac->n_local == 1) {
128     CHKERRQ(PCSetUp_BJacobi_Singleblock(pc,mat,pmat));
129   } else {
130     CHKERRQ(PCSetUp_BJacobi_Multiblock(pc,mat,pmat));
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 /* Default destroy, if it has never been setup */
136 static PetscErrorCode PCDestroy_BJacobi(PC pc)
137 {
138   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
139 
140   PetscFunctionBegin;
141   CHKERRQ(PetscFree(jac->g_lens));
142   CHKERRQ(PetscFree(jac->l_lens));
143   CHKERRQ(PetscFree(pc->data));
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
148 {
149   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
150   PetscInt       blocks,i;
151   PetscBool      flg;
152 
153   PetscFunctionBegin;
154   CHKERRQ(PetscOptionsHead(PetscOptionsObject,"Block Jacobi options"));
155   CHKERRQ(PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg));
156   if (flg) CHKERRQ(PCBJacobiSetTotalBlocks(pc,blocks,NULL));
157   CHKERRQ(PetscOptionsInt("-pc_bjacobi_local_blocks","Local number of blocks","PCBJacobiSetLocalBlocks",jac->n_local,&blocks,&flg));
158   if (flg) CHKERRQ(PCBJacobiSetLocalBlocks(pc,blocks,NULL));
159   if (jac->ksp) {
160     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
161      * unless we had already been called. */
162     for (i=0; i<jac->n_local; i++) {
163       CHKERRQ(KSPSetFromOptions(jac->ksp[i]));
164     }
165   }
166   CHKERRQ(PetscOptionsTail());
167   PetscFunctionReturn(0);
168 }
169 
170 #include <petscdraw.h>
171 static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
172 {
173   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
174   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
175   PetscErrorCode       ierr;
176   PetscMPIInt          rank;
177   PetscInt             i;
178   PetscBool            iascii,isstring,isdraw;
179   PetscViewer          sviewer;
180   PetscViewerFormat    format;
181   const char           *prefix;
182 
183   PetscFunctionBegin;
184   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
185   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring));
186   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
187   if (iascii) {
188     if (pc->useAmat) {
189       CHKERRQ(PetscViewerASCIIPrintf(viewer,"  using Amat local matrix, number of blocks = %D\n",jac->n));
190     }
191     CHKERRQ(PetscViewerASCIIPrintf(viewer,"  number of blocks = %D\n",jac->n));
192     CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
193     CHKERRQ(PetscViewerGetFormat(viewer,&format));
194     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
195       CHKERRQ(PetscViewerASCIIPrintf(viewer,"  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
196       CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
197       CHKERRQ(PetscViewerASCIIPrintf(viewer,"  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n",prefix?prefix:""));
198       if (jac->ksp && !jac->psubcomm) {
199         CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
200         if (rank == 0) {
201           CHKERRQ(PetscViewerASCIIPushTab(viewer));
202           CHKERRQ(KSPView(jac->ksp[0],sviewer));
203           CHKERRQ(PetscViewerASCIIPopTab(viewer));
204         }
205         CHKERRQ(PetscViewerFlush(sviewer));
206         CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
207         CHKERRQ(PetscViewerFlush(viewer));
208         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
209         CHKERRQ(PetscViewerASCIIPopSynchronized(viewer));
210       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
211         CHKERRQ(PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer));
212         if (!mpjac->psubcomm->color) {
213           CHKERRQ(PetscViewerASCIIPushTab(viewer));
214           CHKERRQ(KSPView(*(jac->ksp),sviewer));
215           CHKERRQ(PetscViewerASCIIPopTab(viewer));
216         }
217         CHKERRQ(PetscViewerFlush(sviewer));
218         CHKERRQ(PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer));
219         CHKERRQ(PetscViewerFlush(viewer));
220         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
221         CHKERRQ(PetscViewerASCIIPopSynchronized(viewer));
222       } else {
223         CHKERRQ(PetscViewerFlush(viewer));
224       }
225     } else {
226       PetscInt n_global;
227       CHKERRMPI(MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc)));
228       CHKERRQ(PetscViewerASCIIPushSynchronized(viewer));
229       CHKERRQ(PetscViewerASCIIPrintf(viewer,"  Local solver information for each block is in the following KSP and PC objects:\n"));
230       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D, first local block number = %D\n",
231                                                 rank,jac->n_local,jac->first_local);CHKERRQ(ierr);
232       CHKERRQ(PetscViewerASCIIPushTab(viewer));
233       CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
234       for (i=0; i<jac->n_local; i++) {
235         CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %D\n",rank,i));
236         CHKERRQ(KSPView(jac->ksp[i],sviewer));
237         CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n"));
238       }
239       CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
240       CHKERRQ(PetscViewerASCIIPopTab(viewer));
241       CHKERRQ(PetscViewerFlush(viewer));
242       CHKERRQ(PetscViewerASCIIPopSynchronized(viewer));
243     }
244   } else if (isstring) {
245     CHKERRQ(PetscViewerStringSPrintf(viewer," blks=%D",jac->n));
246     CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
247     if (jac->ksp) CHKERRQ(KSPView(jac->ksp[0],sviewer));
248     CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
249   } else if (isdraw) {
250     PetscDraw draw;
251     char      str[25];
252     PetscReal x,y,bottom,h;
253 
254     CHKERRQ(PetscViewerDrawGetDraw(viewer,0,&draw));
255     CHKERRQ(PetscDrawGetCurrentPoint(draw,&x,&y));
256     CHKERRQ(PetscSNPrintf(str,25,"Number blocks %D",jac->n));
257     CHKERRQ(PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h));
258     bottom = y - h;
259     CHKERRQ(PetscDrawPushCurrentPoint(draw,x,bottom));
260     /* warning the communicator on viewer is different then on ksp in parallel */
261     if (jac->ksp) CHKERRQ(KSPView(jac->ksp[0],viewer));
262     CHKERRQ(PetscDrawPopCurrentPoint(draw));
263   }
264   PetscFunctionReturn(0);
265 }
266 
267 /* -------------------------------------------------------------------------------------*/
268 
269 static PetscErrorCode  PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
270 {
271   PC_BJacobi *jac = (PC_BJacobi*)pc->data;
272 
273   PetscFunctionBegin;
274   PetscCheckFalse(!pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");
275 
276   if (n_local) *n_local = jac->n_local;
277   if (first_local) *first_local = jac->first_local;
278   if (ksp) *ksp                 = jac->ksp;
279   PetscFunctionReturn(0);
280 }
281 
282 static PetscErrorCode  PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
283 {
284   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
285 
286   PetscFunctionBegin;
287   PetscCheckFalse(pc->setupcalled > 0 && jac->n!=blocks,PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
288   jac->n = blocks;
289   if (!lens) jac->g_lens = NULL;
290   else {
291     CHKERRQ(PetscMalloc1(blocks,&jac->g_lens));
292     CHKERRQ(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt)));
293     CHKERRQ(PetscArraycpy(jac->g_lens,lens,blocks));
294   }
295   PetscFunctionReturn(0);
296 }
297 
298 static PetscErrorCode  PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
299 {
300   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
301 
302   PetscFunctionBegin;
303   *blocks = jac->n;
304   if (lens) *lens = jac->g_lens;
305   PetscFunctionReturn(0);
306 }
307 
308 static PetscErrorCode  PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
309 {
310   PC_BJacobi     *jac;
311 
312   PetscFunctionBegin;
313   jac = (PC_BJacobi*)pc->data;
314 
315   jac->n_local = blocks;
316   if (!lens) jac->l_lens = NULL;
317   else {
318     CHKERRQ(PetscMalloc1(blocks,&jac->l_lens));
319     CHKERRQ(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt)));
320     CHKERRQ(PetscArraycpy(jac->l_lens,lens,blocks));
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 static PetscErrorCode  PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
326 {
327   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
328 
329   PetscFunctionBegin;
330   *blocks = jac->n_local;
331   if (lens) *lens = jac->l_lens;
332   PetscFunctionReturn(0);
333 }
334 
335 /* -------------------------------------------------------------------------------------*/
336 
337 /*@C
338    PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
339    this processor.
340 
341    Not Collective
342 
343    Input Parameter:
344 .  pc - the preconditioner context
345 
346    Output Parameters:
347 +  n_local - the number of blocks on this processor, or NULL
348 .  first_local - the global number of the first block on this processor, or NULL
349 -  ksp - the array of KSP contexts
350 
351    Notes:
352    After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
353 
354    Currently for some matrix implementations only 1 block per processor
355    is supported.
356 
357    You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
358 
359    Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
360       You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
361       KSP array must be.
362 
363    Level: advanced
364 
365 .seealso: PCASMGetSubKSP()
366 @*/
367 PetscErrorCode  PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
368 {
369   PetscFunctionBegin;
370   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
371   CHKERRQ(PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp)));
372   PetscFunctionReturn(0);
373 }
374 
375 /*@
376    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
377    Jacobi preconditioner.
378 
379    Collective on PC
380 
381    Input Parameters:
382 +  pc - the preconditioner context
383 .  blocks - the number of blocks
384 -  lens - [optional] integer array containing the size of each block
385 
386    Options Database Key:
387 .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
388 
389    Notes:
390    Currently only a limited number of blocking configurations are supported.
391    All processors sharing the PC must call this routine with the same data.
392 
393    Level: intermediate
394 
395 .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks()
396 @*/
397 PetscErrorCode  PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
398 {
399   PetscFunctionBegin;
400   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
401   PetscCheckFalse(blocks <= 0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
402   CHKERRQ(PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens)));
403   PetscFunctionReturn(0);
404 }
405 
406 /*@C
407    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
408    Jacobi preconditioner.
409 
410    Not Collective
411 
412    Input Parameter:
413 .  pc - the preconditioner context
414 
415    Output parameters:
416 +  blocks - the number of blocks
417 -  lens - integer array containing the size of each block
418 
419    Level: intermediate
420 
421 .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks()
422 @*/
423 PetscErrorCode  PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
424 {
425   PetscFunctionBegin;
426   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
427   PetscValidIntPointer(blocks,2);
428   CHKERRQ(PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens)));
429   PetscFunctionReturn(0);
430 }
431 
432 /*@
433    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
434    Jacobi preconditioner.
435 
436    Not Collective
437 
438    Input Parameters:
439 +  pc - the preconditioner context
440 .  blocks - the number of blocks
441 -  lens - [optional] integer array containing size of each block
442 
443    Options Database Key:
444 .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
445 
446    Note:
447    Currently only a limited number of blocking configurations are supported.
448 
449    Level: intermediate
450 
451 .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks()
452 @*/
453 PetscErrorCode  PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
454 {
455   PetscFunctionBegin;
456   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
457   PetscCheckFalse(blocks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
458   CHKERRQ(PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens)));
459   PetscFunctionReturn(0);
460 }
461 
462 /*@C
463    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
464    Jacobi preconditioner.
465 
466    Not Collective
467 
468    Input Parameters:
469 +  pc - the preconditioner context
470 .  blocks - the number of blocks
471 -  lens - [optional] integer array containing size of each block
472 
473    Note:
474    Currently only a limited number of blocking configurations are supported.
475 
476    Level: intermediate
477 
478 .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks()
479 @*/
480 PetscErrorCode  PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
481 {
482   PetscFunctionBegin;
483   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
484   PetscValidIntPointer(blocks,2);
485   CHKERRQ(PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens)));
486   PetscFunctionReturn(0);
487 }
488 
489 /* -----------------------------------------------------------------------------------*/
490 
491 /*MC
492    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
493            its own KSP object.
494 
495    Options Database Keys:
496 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
497 -  -pc_bjacobi_blocks <n> - use n total blocks
498 
499    Notes:
500      Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
501 
502      To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
503         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
504 
505      To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
506          and set the options directly on the resulting KSP object (you can access its PC
507          KSPGetPC())
508 
509      For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best
510          performance.  Different block partitioning may lead to additional data transfers
511          between host and GPU that lead to degraded performance.
512 
513      The options prefix for each block is sub_, for example -sub_pc_type lu.
514 
515      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
516 
517      See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCPBJACOBI for large blocks
518 
519    Level: beginner
520 
521 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
522            PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(),
523            PCBJacobiSetLocalBlocks(), PCSetModifySubMatrices(), PCJACOBI, PCVPBJACOBI, PCPBJACOBI
524 M*/
525 
526 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
527 {
528   PetscMPIInt    rank;
529   PC_BJacobi     *jac;
530 
531   PetscFunctionBegin;
532   CHKERRQ(PetscNewLog(pc,&jac));
533   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
534 
535   pc->ops->apply           = NULL;
536   pc->ops->matapply        = NULL;
537   pc->ops->applytranspose  = NULL;
538   pc->ops->setup           = PCSetUp_BJacobi;
539   pc->ops->destroy         = PCDestroy_BJacobi;
540   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
541   pc->ops->view            = PCView_BJacobi;
542   pc->ops->applyrichardson = NULL;
543 
544   pc->data               = (void*)jac;
545   jac->n                 = -1;
546   jac->n_local           = -1;
547   jac->first_local       = rank;
548   jac->ksp               = NULL;
549   jac->g_lens            = NULL;
550   jac->l_lens            = NULL;
551   jac->psubcomm          = NULL;
552 
553   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi));
554   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi));
555   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi));
556   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi));
557   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi));
558   PetscFunctionReturn(0);
559 }
560 
561 /* --------------------------------------------------------------------------------------------*/
562 /*
563         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
564 */
565 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
566 {
567   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
568   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
569 
570   PetscFunctionBegin;
571   CHKERRQ(KSPReset(jac->ksp[0]));
572   CHKERRQ(VecDestroy(&bjac->x));
573   CHKERRQ(VecDestroy(&bjac->y));
574   PetscFunctionReturn(0);
575 }
576 
577 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
578 {
579   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
580   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
581 
582   PetscFunctionBegin;
583   CHKERRQ(PCReset_BJacobi_Singleblock(pc));
584   CHKERRQ(KSPDestroy(&jac->ksp[0]));
585   CHKERRQ(PetscFree(jac->ksp));
586   CHKERRQ(PetscFree(jac->l_lens));
587   CHKERRQ(PetscFree(jac->g_lens));
588   CHKERRQ(PetscFree(bjac));
589   CHKERRQ(PetscFree(pc->data));
590   PetscFunctionReturn(0);
591 }
592 
593 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
594 {
595   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
596   KSP                subksp = jac->ksp[0];
597   KSPConvergedReason reason;
598 
599   PetscFunctionBegin;
600   CHKERRQ(KSPSetUp(subksp));
601   CHKERRQ(KSPGetConvergedReason(subksp,&reason));
602   if (reason == KSP_DIVERGED_PC_FAILED) {
603     pc->failedreason = PC_SUBPC_ERROR;
604   }
605   PetscFunctionReturn(0);
606 }
607 
608 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
609 {
610   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
611   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
612 
613   PetscFunctionBegin;
614   CHKERRQ(VecGetLocalVectorRead(x, bjac->x));
615   CHKERRQ(VecGetLocalVector(y, bjac->y));
616   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
617      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
618      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
619   CHKERRQ(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner));
620   CHKERRQ(KSPSolve(jac->ksp[0],bjac->x,bjac->y));
621   CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,bjac->y));
622   CHKERRQ(VecRestoreLocalVectorRead(x, bjac->x));
623   CHKERRQ(VecRestoreLocalVector(y, bjac->y));
624   PetscFunctionReturn(0);
625 }
626 
627 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc,Mat X,Mat Y)
628 {
629   PC_BJacobi     *jac  = (PC_BJacobi*)pc->data;
630   Mat            sX,sY;
631 
632   PetscFunctionBegin;
633   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
634      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
635      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
636   CHKERRQ(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner));
637   CHKERRQ(MatDenseGetLocalMatrix(X,&sX));
638   CHKERRQ(MatDenseGetLocalMatrix(Y,&sY));
639   CHKERRQ(KSPMatSolve(jac->ksp[0],sX,sY));
640   PetscFunctionReturn(0);
641 }
642 
643 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
644 {
645   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
646   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
647   PetscScalar            *y_array;
648   const PetscScalar      *x_array;
649   PC                     subpc;
650 
651   PetscFunctionBegin;
652   /*
653       The VecPlaceArray() is to avoid having to copy the
654     y vector into the bjac->x vector. The reason for
655     the bjac->x vector is that we need a sequential vector
656     for the sequential solve.
657   */
658   CHKERRQ(VecGetArrayRead(x,&x_array));
659   CHKERRQ(VecGetArray(y,&y_array));
660   CHKERRQ(VecPlaceArray(bjac->x,x_array));
661   CHKERRQ(VecPlaceArray(bjac->y,y_array));
662   /* apply the symmetric left portion of the inner PC operator */
663   /* note this by-passes the inner KSP and its options completely */
664   CHKERRQ(KSPGetPC(jac->ksp[0],&subpc));
665   CHKERRQ(PCApplySymmetricLeft(subpc,bjac->x,bjac->y));
666   CHKERRQ(VecResetArray(bjac->x));
667   CHKERRQ(VecResetArray(bjac->y));
668   CHKERRQ(VecRestoreArrayRead(x,&x_array));
669   CHKERRQ(VecRestoreArray(y,&y_array));
670   PetscFunctionReturn(0);
671 }
672 
673 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
674 {
675   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
676   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
677   PetscScalar            *y_array;
678   const PetscScalar      *x_array;
679   PC                     subpc;
680 
681   PetscFunctionBegin;
682   /*
683       The VecPlaceArray() is to avoid having to copy the
684     y vector into the bjac->x vector. The reason for
685     the bjac->x vector is that we need a sequential vector
686     for the sequential solve.
687   */
688   CHKERRQ(VecGetArrayRead(x,&x_array));
689   CHKERRQ(VecGetArray(y,&y_array));
690   CHKERRQ(VecPlaceArray(bjac->x,x_array));
691   CHKERRQ(VecPlaceArray(bjac->y,y_array));
692 
693   /* apply the symmetric right portion of the inner PC operator */
694   /* note this by-passes the inner KSP and its options completely */
695 
696   CHKERRQ(KSPGetPC(jac->ksp[0],&subpc));
697   CHKERRQ(PCApplySymmetricRight(subpc,bjac->x,bjac->y));
698 
699   CHKERRQ(VecRestoreArrayRead(x,&x_array));
700   CHKERRQ(VecRestoreArray(y,&y_array));
701   PetscFunctionReturn(0);
702 }
703 
704 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
705 {
706   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
707   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
708   PetscScalar            *y_array;
709   const PetscScalar      *x_array;
710 
711   PetscFunctionBegin;
712   /*
713       The VecPlaceArray() is to avoid having to copy the
714     y vector into the bjac->x vector. The reason for
715     the bjac->x vector is that we need a sequential vector
716     for the sequential solve.
717   */
718   CHKERRQ(VecGetArrayRead(x,&x_array));
719   CHKERRQ(VecGetArray(y,&y_array));
720   CHKERRQ(VecPlaceArray(bjac->x,x_array));
721   CHKERRQ(VecPlaceArray(bjac->y,y_array));
722   CHKERRQ(KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y));
723   CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,bjac->y));
724   CHKERRQ(VecResetArray(bjac->x));
725   CHKERRQ(VecResetArray(bjac->y));
726   CHKERRQ(VecRestoreArrayRead(x,&x_array));
727   CHKERRQ(VecRestoreArray(y,&y_array));
728   PetscFunctionReturn(0);
729 }
730 
731 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
732 {
733   PC_BJacobi             *jac = (PC_BJacobi*)pc->data;
734   PetscInt               m;
735   KSP                    ksp;
736   PC_BJacobi_Singleblock *bjac;
737   PetscBool              wasSetup = PETSC_TRUE;
738   VecType                vectype;
739   const char             *prefix;
740 
741   PetscFunctionBegin;
742   if (!pc->setupcalled) {
743     if (!jac->ksp) {
744       wasSetup = PETSC_FALSE;
745 
746       CHKERRQ(KSPCreate(PETSC_COMM_SELF,&ksp));
747       CHKERRQ(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure));
748       CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1));
749       CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp));
750       CHKERRQ(KSPSetType(ksp,KSPPREONLY));
751       CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
752       CHKERRQ(KSPSetOptionsPrefix(ksp,prefix));
753       CHKERRQ(KSPAppendOptionsPrefix(ksp,"sub_"));
754 
755       pc->ops->reset               = PCReset_BJacobi_Singleblock;
756       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
757       pc->ops->apply               = PCApply_BJacobi_Singleblock;
758       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
759       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
760       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
761       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
762       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
763 
764       CHKERRQ(PetscMalloc1(1,&jac->ksp));
765       jac->ksp[0] = ksp;
766 
767       CHKERRQ(PetscNewLog(pc,&bjac));
768       jac->data = (void*)bjac;
769     } else {
770       ksp  = jac->ksp[0];
771       bjac = (PC_BJacobi_Singleblock*)jac->data;
772     }
773 
774     /*
775       The reason we need to generate these vectors is to serve
776       as the right-hand side and solution vector for the solve on the
777       block. We do not need to allocate space for the vectors since
778       that is provided via VecPlaceArray() just before the call to
779       KSPSolve() on the block.
780     */
781     CHKERRQ(MatGetSize(pmat,&m,&m));
782     CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x));
783     CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y));
784     CHKERRQ(MatGetVecType(pmat,&vectype));
785     CHKERRQ(VecSetType(bjac->x,vectype));
786     CHKERRQ(VecSetType(bjac->y,vectype));
787     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x));
788     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y));
789   } else {
790     ksp  = jac->ksp[0];
791     bjac = (PC_BJacobi_Singleblock*)jac->data;
792   }
793   CHKERRQ(KSPGetOptionsPrefix(ksp,&prefix));
794   if (pc->useAmat) {
795     CHKERRQ(KSPSetOperators(ksp,mat,pmat));
796     CHKERRQ(MatSetOptionsPrefix(mat,prefix));
797   } else {
798     CHKERRQ(KSPSetOperators(ksp,pmat,pmat));
799   }
800   CHKERRQ(MatSetOptionsPrefix(pmat,prefix));
801   if (!wasSetup && pc->setfromoptionscalled) {
802     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
803     CHKERRQ(KSPSetFromOptions(ksp));
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 /* ---------------------------------------------------------------------------------------------*/
809 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
810 {
811   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
812   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
813   PetscInt              i;
814 
815   PetscFunctionBegin;
816   if (bjac && bjac->pmat) {
817     CHKERRQ(MatDestroyMatrices(jac->n_local,&bjac->pmat));
818     if (pc->useAmat) {
819       CHKERRQ(MatDestroyMatrices(jac->n_local,&bjac->mat));
820     }
821   }
822 
823   for (i=0; i<jac->n_local; i++) {
824     CHKERRQ(KSPReset(jac->ksp[i]));
825     if (bjac && bjac->x) {
826       CHKERRQ(VecDestroy(&bjac->x[i]));
827       CHKERRQ(VecDestroy(&bjac->y[i]));
828       CHKERRQ(ISDestroy(&bjac->is[i]));
829     }
830   }
831   CHKERRQ(PetscFree(jac->l_lens));
832   CHKERRQ(PetscFree(jac->g_lens));
833   PetscFunctionReturn(0);
834 }
835 
836 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
837 {
838   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
839   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
840   PetscInt              i;
841 
842   PetscFunctionBegin;
843   CHKERRQ(PCReset_BJacobi_Multiblock(pc));
844   if (bjac) {
845     CHKERRQ(PetscFree2(bjac->x,bjac->y));
846     CHKERRQ(PetscFree(bjac->starts));
847     CHKERRQ(PetscFree(bjac->is));
848   }
849   CHKERRQ(PetscFree(jac->data));
850   for (i=0; i<jac->n_local; i++) {
851     CHKERRQ(KSPDestroy(&jac->ksp[i]));
852   }
853   CHKERRQ(PetscFree(jac->ksp));
854   CHKERRQ(PetscFree(pc->data));
855   PetscFunctionReturn(0);
856 }
857 
858 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
859 {
860   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
861   PetscInt           i,n_local = jac->n_local;
862   KSPConvergedReason reason;
863 
864   PetscFunctionBegin;
865   for (i=0; i<n_local; i++) {
866     CHKERRQ(KSPSetUp(jac->ksp[i]));
867     CHKERRQ(KSPGetConvergedReason(jac->ksp[i],&reason));
868     if (reason == KSP_DIVERGED_PC_FAILED) {
869       pc->failedreason = PC_SUBPC_ERROR;
870     }
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 /*
876       Preconditioner for block Jacobi
877 */
878 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
879 {
880   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
881   PetscInt              i,n_local = jac->n_local;
882   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
883   PetscScalar           *yin;
884   const PetscScalar     *xin;
885 
886   PetscFunctionBegin;
887   CHKERRQ(VecGetArrayRead(x,&xin));
888   CHKERRQ(VecGetArray(y,&yin));
889   for (i=0; i<n_local; i++) {
890     /*
891        To avoid copying the subvector from x into a workspace we instead
892        make the workspace vector array point to the subpart of the array of
893        the global vector.
894     */
895     CHKERRQ(VecPlaceArray(bjac->x[i],xin+bjac->starts[i]));
896     CHKERRQ(VecPlaceArray(bjac->y[i],yin+bjac->starts[i]));
897 
898     CHKERRQ(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
899     CHKERRQ(KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]));
900     CHKERRQ(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]));
901     CHKERRQ(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
902 
903     CHKERRQ(VecResetArray(bjac->x[i]));
904     CHKERRQ(VecResetArray(bjac->y[i]));
905   }
906   CHKERRQ(VecRestoreArrayRead(x,&xin));
907   CHKERRQ(VecRestoreArray(y,&yin));
908   PetscFunctionReturn(0);
909 }
910 
911 /*
912       Preconditioner for block Jacobi
913 */
914 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
915 {
916   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
917   PetscInt              i,n_local = jac->n_local;
918   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
919   PetscScalar           *yin;
920   const PetscScalar     *xin;
921 
922   PetscFunctionBegin;
923   CHKERRQ(VecGetArrayRead(x,&xin));
924   CHKERRQ(VecGetArray(y,&yin));
925   for (i=0; i<n_local; i++) {
926     /*
927        To avoid copying the subvector from x into a workspace we instead
928        make the workspace vector array point to the subpart of the array of
929        the global vector.
930     */
931     CHKERRQ(VecPlaceArray(bjac->x[i],xin+bjac->starts[i]));
932     CHKERRQ(VecPlaceArray(bjac->y[i],yin+bjac->starts[i]));
933 
934     CHKERRQ(PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
935     CHKERRQ(KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]));
936     CHKERRQ(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]));
937     CHKERRQ(PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0));
938 
939     CHKERRQ(VecResetArray(bjac->x[i]));
940     CHKERRQ(VecResetArray(bjac->y[i]));
941   }
942   CHKERRQ(VecRestoreArrayRead(x,&xin));
943   CHKERRQ(VecRestoreArray(y,&yin));
944   PetscFunctionReturn(0);
945 }
946 
947 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
948 {
949   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
950   PetscInt              m,n_local,N,M,start,i;
951   const char            *prefix;
952   KSP                   ksp;
953   Vec                   x,y;
954   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
955   PC                    subpc;
956   IS                    is;
957   MatReuse              scall;
958   VecType               vectype;
959 
960   PetscFunctionBegin;
961   CHKERRQ(MatGetLocalSize(pc->pmat,&M,&N));
962 
963   n_local = jac->n_local;
964 
965   if (pc->useAmat) {
966     PetscBool same;
967     CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same));
968     PetscCheckFalse(!same,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
969   }
970 
971   if (!pc->setupcalled) {
972     scall = MAT_INITIAL_MATRIX;
973 
974     if (!jac->ksp) {
975       pc->ops->reset         = PCReset_BJacobi_Multiblock;
976       pc->ops->destroy       = PCDestroy_BJacobi_Multiblock;
977       pc->ops->apply         = PCApply_BJacobi_Multiblock;
978       pc->ops->matapply      = NULL;
979       pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
980       pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;
981 
982       CHKERRQ(PetscNewLog(pc,&bjac));
983       CHKERRQ(PetscMalloc1(n_local,&jac->ksp));
984       CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP))));
985       CHKERRQ(PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y));
986       CHKERRQ(PetscMalloc1(n_local,&bjac->starts));
987       CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar))));
988 
989       jac->data = (void*)bjac;
990       CHKERRQ(PetscMalloc1(n_local,&bjac->is));
991       CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS))));
992 
993       for (i=0; i<n_local; i++) {
994         CHKERRQ(KSPCreate(PETSC_COMM_SELF,&ksp));
995         CHKERRQ(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure));
996         CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1));
997         CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp));
998         CHKERRQ(KSPSetType(ksp,KSPPREONLY));
999         CHKERRQ(KSPGetPC(ksp,&subpc));
1000         CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
1001         CHKERRQ(KSPSetOptionsPrefix(ksp,prefix));
1002         CHKERRQ(KSPAppendOptionsPrefix(ksp,"sub_"));
1003 
1004         jac->ksp[i] = ksp;
1005       }
1006     } else {
1007       bjac = (PC_BJacobi_Multiblock*)jac->data;
1008     }
1009 
1010     start = 0;
1011     CHKERRQ(MatGetVecType(pmat,&vectype));
1012     for (i=0; i<n_local; i++) {
1013       m = jac->l_lens[i];
1014       /*
1015       The reason we need to generate these vectors is to serve
1016       as the right-hand side and solution vector for the solve on the
1017       block. We do not need to allocate space for the vectors since
1018       that is provided via VecPlaceArray() just before the call to
1019       KSPSolve() on the block.
1020 
1021       */
1022       CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,m,&x));
1023       CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y));
1024       CHKERRQ(VecSetType(x,vectype));
1025       CHKERRQ(VecSetType(y,vectype));
1026       CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)x));
1027       CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)y));
1028 
1029       bjac->x[i]      = x;
1030       bjac->y[i]      = y;
1031       bjac->starts[i] = start;
1032 
1033       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,m,start,1,&is));
1034       bjac->is[i] = is;
1035       CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)is));
1036 
1037       start += m;
1038     }
1039   } else {
1040     bjac = (PC_BJacobi_Multiblock*)jac->data;
1041     /*
1042        Destroy the blocks from the previous iteration
1043     */
1044     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1045       CHKERRQ(MatDestroyMatrices(n_local,&bjac->pmat));
1046       if (pc->useAmat) {
1047         CHKERRQ(MatDestroyMatrices(n_local,&bjac->mat));
1048       }
1049       scall = MAT_INITIAL_MATRIX;
1050     } else scall = MAT_REUSE_MATRIX;
1051   }
1052 
1053   CHKERRQ(MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat));
1054   if (pc->useAmat) {
1055     CHKERRQ(MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat));
1056   }
1057   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1058      different boundary conditions for the submatrices than for the global problem) */
1059   CHKERRQ(PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP));
1060 
1061   for (i=0; i<n_local; i++) {
1062     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]));
1063     CHKERRQ(KSPGetOptionsPrefix(jac->ksp[i],&prefix));
1064     if (pc->useAmat) {
1065       CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]));
1066       CHKERRQ(KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]));
1067       CHKERRQ(MatSetOptionsPrefix(bjac->mat[i],prefix));
1068     } else {
1069       CHKERRQ(KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]));
1070     }
1071     CHKERRQ(MatSetOptionsPrefix(bjac->pmat[i],prefix));
1072     if (pc->setfromoptionscalled) {
1073       CHKERRQ(KSPSetFromOptions(jac->ksp[i]));
1074     }
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /* ---------------------------------------------------------------------------------------------*/
1080 /*
1081       These are for a single block with multiple processes
1082 */
1083 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1084 {
1085   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
1086   KSP                subksp = jac->ksp[0];
1087   KSPConvergedReason reason;
1088 
1089   PetscFunctionBegin;
1090   CHKERRQ(KSPSetUp(subksp));
1091   CHKERRQ(KSPGetConvergedReason(subksp,&reason));
1092   if (reason == KSP_DIVERGED_PC_FAILED) {
1093     pc->failedreason = PC_SUBPC_ERROR;
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1099 {
1100   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1101   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1102 
1103   PetscFunctionBegin;
1104   CHKERRQ(VecDestroy(&mpjac->ysub));
1105   CHKERRQ(VecDestroy(&mpjac->xsub));
1106   CHKERRQ(MatDestroy(&mpjac->submats));
1107   if (jac->ksp) CHKERRQ(KSPReset(jac->ksp[0]));
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1112 {
1113   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1114   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1115 
1116   PetscFunctionBegin;
1117   CHKERRQ(PCReset_BJacobi_Multiproc(pc));
1118   CHKERRQ(KSPDestroy(&jac->ksp[0]));
1119   CHKERRQ(PetscFree(jac->ksp));
1120   CHKERRQ(PetscSubcommDestroy(&mpjac->psubcomm));
1121 
1122   CHKERRQ(PetscFree(mpjac));
1123   CHKERRQ(PetscFree(pc->data));
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1128 {
1129   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1130   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1131   PetscScalar          *yarray;
1132   const PetscScalar    *xarray;
1133   KSPConvergedReason   reason;
1134 
1135   PetscFunctionBegin;
1136   /* place x's and y's local arrays into xsub and ysub */
1137   CHKERRQ(VecGetArrayRead(x,&xarray));
1138   CHKERRQ(VecGetArray(y,&yarray));
1139   CHKERRQ(VecPlaceArray(mpjac->xsub,xarray));
1140   CHKERRQ(VecPlaceArray(mpjac->ysub,yarray));
1141 
1142   /* apply preconditioner on each matrix block */
1143   CHKERRQ(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0));
1144   CHKERRQ(KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub));
1145   CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,mpjac->ysub));
1146   CHKERRQ(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0));
1147   CHKERRQ(KSPGetConvergedReason(jac->ksp[0],&reason));
1148   if (reason == KSP_DIVERGED_PC_FAILED) {
1149     pc->failedreason = PC_SUBPC_ERROR;
1150   }
1151 
1152   CHKERRQ(VecResetArray(mpjac->xsub));
1153   CHKERRQ(VecResetArray(mpjac->ysub));
1154   CHKERRQ(VecRestoreArrayRead(x,&xarray));
1155   CHKERRQ(VecRestoreArray(y,&yarray));
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc,Mat X,Mat Y)
1160 {
1161   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1162   KSPConvergedReason   reason;
1163   Mat                  sX,sY;
1164   const PetscScalar    *x;
1165   PetscScalar          *y;
1166   PetscInt             m,N,lda,ldb;
1167 
1168   PetscFunctionBegin;
1169   /* apply preconditioner on each matrix block */
1170   CHKERRQ(MatGetLocalSize(X,&m,NULL));
1171   CHKERRQ(MatGetSize(X,NULL,&N));
1172   CHKERRQ(MatDenseGetLDA(X,&lda));
1173   CHKERRQ(MatDenseGetLDA(Y,&ldb));
1174   CHKERRQ(MatDenseGetArrayRead(X,&x));
1175   CHKERRQ(MatDenseGetArrayWrite(Y,&y));
1176   CHKERRQ(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,(PetscScalar*)x,&sX));
1177   CHKERRQ(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,y,&sY));
1178   CHKERRQ(MatDenseSetLDA(sX,lda));
1179   CHKERRQ(MatDenseSetLDA(sY,ldb));
1180   CHKERRQ(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0));
1181   CHKERRQ(KSPMatSolve(jac->ksp[0],sX,sY));
1182   CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,NULL));
1183   CHKERRQ(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0));
1184   CHKERRQ(MatDestroy(&sY));
1185   CHKERRQ(MatDestroy(&sX));
1186   CHKERRQ(MatDenseRestoreArrayWrite(Y,&y));
1187   CHKERRQ(MatDenseRestoreArrayRead(X,&x));
1188   CHKERRQ(KSPGetConvergedReason(jac->ksp[0],&reason));
1189   if (reason == KSP_DIVERGED_PC_FAILED) {
1190     pc->failedreason = PC_SUBPC_ERROR;
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1196 {
1197   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1198   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1199   PetscInt             m,n;
1200   MPI_Comm             comm,subcomm=0;
1201   const char           *prefix;
1202   PetscBool            wasSetup = PETSC_TRUE;
1203   VecType              vectype;
1204 
1205   PetscFunctionBegin;
1206   CHKERRQ(PetscObjectGetComm((PetscObject)pc,&comm));
1207   PetscCheckFalse(jac->n_local > 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1208   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1209   if (!pc->setupcalled) {
1210     wasSetup  = PETSC_FALSE;
1211     CHKERRQ(PetscNewLog(pc,&mpjac));
1212     jac->data = (void*)mpjac;
1213 
1214     /* initialize datastructure mpjac */
1215     if (!jac->psubcomm) {
1216       /* Create default contiguous subcommunicatiors if user does not provide them */
1217       CHKERRQ(PetscSubcommCreate(comm,&jac->psubcomm));
1218       CHKERRQ(PetscSubcommSetNumber(jac->psubcomm,jac->n));
1219       CHKERRQ(PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS));
1220       CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm)));
1221     }
1222     mpjac->psubcomm = jac->psubcomm;
1223     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1224 
1225     /* Get matrix blocks of pmat */
1226     CHKERRQ(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats));
1227 
1228     /* create a new PC that processors in each subcomm have copy of */
1229     CHKERRQ(PetscMalloc1(1,&jac->ksp));
1230     CHKERRQ(KSPCreate(subcomm,&jac->ksp[0]));
1231     CHKERRQ(KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure));
1232     CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1));
1233     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]));
1234     CHKERRQ(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats));
1235     CHKERRQ(KSPGetPC(jac->ksp[0],&mpjac->pc));
1236 
1237     CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
1238     CHKERRQ(KSPSetOptionsPrefix(jac->ksp[0],prefix));
1239     CHKERRQ(KSPAppendOptionsPrefix(jac->ksp[0],"sub_"));
1240     CHKERRQ(KSPGetOptionsPrefix(jac->ksp[0],&prefix));
1241     CHKERRQ(MatSetOptionsPrefix(mpjac->submats,prefix));
1242 
1243     /* create dummy vectors xsub and ysub */
1244     CHKERRQ(MatGetLocalSize(mpjac->submats,&m,&n));
1245     CHKERRQ(VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub));
1246     CHKERRQ(VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub));
1247     CHKERRQ(MatGetVecType(mpjac->submats,&vectype));
1248     CHKERRQ(VecSetType(mpjac->xsub,vectype));
1249     CHKERRQ(VecSetType(mpjac->ysub,vectype));
1250     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub));
1251     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub));
1252 
1253     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1254     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1255     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1256     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1257     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1258   } else { /* pc->setupcalled */
1259     subcomm = PetscSubcommChild(mpjac->psubcomm);
1260     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1261       /* destroy old matrix blocks, then get new matrix blocks */
1262       if (mpjac->submats) CHKERRQ(MatDestroy(&mpjac->submats));
1263       CHKERRQ(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats));
1264     } else {
1265       CHKERRQ(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats));
1266     }
1267     CHKERRQ(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats));
1268   }
1269 
1270   if (!wasSetup && pc->setfromoptionscalled) {
1271     CHKERRQ(KSPSetFromOptions(jac->ksp[0]));
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275