xref: /petsc/src/ksp/pc/impls/pbjacobi/pbjacobi.c (revision dc1da017a8339fb6aa6e09b86af2a5ecc718e906)
1dba47a55SKris Buschelman 
24b9ad928SBarry Smith /*
34b9ad928SBarry Smith    Include files needed for the PBJacobi preconditioner:
44b9ad928SBarry Smith      pcimpl.h - private include file intended for use by all preconditioners
54b9ad928SBarry Smith */
64b9ad928SBarry Smith 
7af0996ceSBarry Smith #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
84b9ad928SBarry Smith 
94b9ad928SBarry Smith /*
104b9ad928SBarry Smith    Private context (data structure) for the PBJacobi preconditioner.
114b9ad928SBarry Smith */
124b9ad928SBarry Smith typedef struct {
13713ccfa9SJed Brown   const MatScalar *diag;
14c1ac3661SBarry Smith   PetscInt         bs, mbs;
154b9ad928SBarry Smith } PC_PBJacobi;
164b9ad928SBarry Smith 
171f713683SJunchao Zhang static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y)
18d71ae5a4SJacob Faibussowitsch {
19bbead8a2SBarry Smith   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
201f713683SJunchao Zhang   PetscInt           i, ib, jb;
211f713683SJunchao Zhang   const PetscInt     m    = jac->mbs;
221f713683SJunchao Zhang   const PetscInt     bs   = jac->bs;
23bbead8a2SBarry Smith   const MatScalar   *diag = jac->diag;
241f713683SJunchao Zhang   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
25bbead8a2SBarry Smith   const PetscScalar *xx;
26bbead8a2SBarry Smith 
27bbead8a2SBarry Smith   PetscFunctionBegin;
289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
301f713683SJunchao Zhang   switch (bs) {
311f713683SJunchao Zhang   case 1:
322fa5cd67SKarl Rupp     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
331f713683SJunchao Zhang     break;
341f713683SJunchao Zhang   case 2:
354b9ad928SBarry Smith     for (i = 0; i < m; i++) {
369371c9d4SSatish Balay       x0            = xx[2 * i];
379371c9d4SSatish Balay       x1            = xx[2 * i + 1];
384b9ad928SBarry Smith       yy[2 * i]     = diag[0] * x0 + diag[2] * x1;
394b9ad928SBarry Smith       yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
404b9ad928SBarry Smith       diag += 4;
414b9ad928SBarry Smith     }
421f713683SJunchao Zhang     break;
431f713683SJunchao Zhang   case 3:
444b9ad928SBarry Smith     for (i = 0; i < m; i++) {
459371c9d4SSatish Balay       x0 = xx[3 * i];
469371c9d4SSatish Balay       x1 = xx[3 * i + 1];
479371c9d4SSatish Balay       x2 = xx[3 * i + 2];
482fa5cd67SKarl Rupp 
494b9ad928SBarry Smith       yy[3 * i]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
504b9ad928SBarry Smith       yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
514b9ad928SBarry Smith       yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
524b9ad928SBarry Smith       diag += 9;
534b9ad928SBarry Smith     }
541f713683SJunchao Zhang     break;
551f713683SJunchao Zhang   case 4:
564b9ad928SBarry Smith     for (i = 0; i < m; i++) {
579371c9d4SSatish Balay       x0 = xx[4 * i];
589371c9d4SSatish Balay       x1 = xx[4 * i + 1];
599371c9d4SSatish Balay       x2 = xx[4 * i + 2];
609371c9d4SSatish Balay       x3 = xx[4 * i + 3];
612fa5cd67SKarl Rupp 
624b9ad928SBarry Smith       yy[4 * i]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
634b9ad928SBarry Smith       yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
644b9ad928SBarry Smith       yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
654b9ad928SBarry Smith       yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
664b9ad928SBarry Smith       diag += 16;
674b9ad928SBarry Smith     }
681f713683SJunchao Zhang     break;
691f713683SJunchao Zhang   case 5:
704b9ad928SBarry Smith     for (i = 0; i < m; i++) {
719371c9d4SSatish Balay       x0 = xx[5 * i];
729371c9d4SSatish Balay       x1 = xx[5 * i + 1];
739371c9d4SSatish Balay       x2 = xx[5 * i + 2];
749371c9d4SSatish Balay       x3 = xx[5 * i + 3];
759371c9d4SSatish Balay       x4 = xx[5 * i + 4];
762fa5cd67SKarl Rupp 
774b9ad928SBarry Smith       yy[5 * i]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
784b9ad928SBarry Smith       yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
794b9ad928SBarry Smith       yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
804b9ad928SBarry Smith       yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
814b9ad928SBarry Smith       yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
824b9ad928SBarry Smith       diag += 25;
834b9ad928SBarry Smith     }
841f713683SJunchao Zhang     break;
851f713683SJunchao Zhang   case 6:
860e1b4bd6SMark F. Adams     for (i = 0; i < m; i++) {
879371c9d4SSatish Balay       x0 = xx[6 * i];
889371c9d4SSatish Balay       x1 = xx[6 * i + 1];
899371c9d4SSatish Balay       x2 = xx[6 * i + 2];
909371c9d4SSatish Balay       x3 = xx[6 * i + 3];
919371c9d4SSatish Balay       x4 = xx[6 * i + 4];
929371c9d4SSatish Balay       x5 = xx[6 * i + 5];
932fa5cd67SKarl Rupp 
940e1b4bd6SMark F. Adams       yy[6 * i]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
950e1b4bd6SMark F. Adams       yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
960e1b4bd6SMark F. Adams       yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
970e1b4bd6SMark F. Adams       yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
980e1b4bd6SMark F. Adams       yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
990e1b4bd6SMark F. Adams       yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
1000e1b4bd6SMark F. Adams       diag += 36;
1010e1b4bd6SMark F. Adams     }
1021f713683SJunchao Zhang     break;
1031f713683SJunchao Zhang   case 7:
1040a4ca348SMatthew G Knepley     for (i = 0; i < m; i++) {
1059371c9d4SSatish Balay       x0 = xx[7 * i];
1069371c9d4SSatish Balay       x1 = xx[7 * i + 1];
1079371c9d4SSatish Balay       x2 = xx[7 * i + 2];
1089371c9d4SSatish Balay       x3 = xx[7 * i + 3];
1099371c9d4SSatish Balay       x4 = xx[7 * i + 4];
1109371c9d4SSatish Balay       x5 = xx[7 * i + 5];
1119371c9d4SSatish Balay       x6 = xx[7 * i + 6];
1122fa5cd67SKarl Rupp 
1130a4ca348SMatthew G Knepley       yy[7 * i]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
1140a4ca348SMatthew G Knepley       yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
1150a4ca348SMatthew G Knepley       yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
1160a4ca348SMatthew G Knepley       yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
1170a4ca348SMatthew G Knepley       yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
1180a4ca348SMatthew G Knepley       yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
1190a4ca348SMatthew G Knepley       yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
1200a4ca348SMatthew G Knepley       diag += 49;
1210a4ca348SMatthew G Knepley     }
1221f713683SJunchao Zhang     break;
1231f713683SJunchao Zhang   default:
124f79daf70SMark Lohry     for (i = 0; i < m; i++) {
125f79daf70SMark Lohry       for (ib = 0; ib < bs; ib++) {
126f79daf70SMark Lohry         PetscScalar rowsum = 0;
127ad540459SPierre Jolivet         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
128f79daf70SMark Lohry         yy[bs * i + ib] = rowsum;
129f79daf70SMark Lohry       }
130f79daf70SMark Lohry       diag += bs * bs;
131f79daf70SMark Lohry     }
1321f713683SJunchao Zhang   }
1339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
1349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
1359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */
136f79daf70SMark Lohry   PetscFunctionReturn(0);
137f79daf70SMark Lohry }
138faff7d23SJed Brown 
139*dc1da017SJunchao Zhang static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y)
140d71ae5a4SJacob Faibussowitsch {
1418cd12025SJed Brown   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
142*dc1da017SJunchao Zhang   PetscInt           i, ib, jb;
143*dc1da017SJunchao Zhang   const PetscInt     m    = jac->mbs;
144*dc1da017SJunchao Zhang   const PetscInt     bs   = jac->bs;
1458cd12025SJed Brown   const MatScalar   *diag = jac->diag;
146*dc1da017SJunchao Zhang   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
1478cd12025SJed Brown   const PetscScalar *xx;
1488cd12025SJed Brown 
1498cd12025SJed Brown   PetscFunctionBegin;
1509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
1519566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
152*dc1da017SJunchao Zhang   switch (bs) {
153*dc1da017SJunchao Zhang   case 1:
154*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
155*dc1da017SJunchao Zhang     break;
156*dc1da017SJunchao Zhang   case 2:
1578cd12025SJed Brown     for (i = 0; i < m; i++) {
158*dc1da017SJunchao Zhang       x0            = xx[2 * i];
159*dc1da017SJunchao Zhang       x1            = xx[2 * i + 1];
160*dc1da017SJunchao Zhang       yy[2 * i]     = diag[0] * x0 + diag[1] * x1;
161*dc1da017SJunchao Zhang       yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1;
162*dc1da017SJunchao Zhang       diag += 4;
163*dc1da017SJunchao Zhang     }
164*dc1da017SJunchao Zhang     break;
165*dc1da017SJunchao Zhang   case 3:
166*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
167*dc1da017SJunchao Zhang       x0 = xx[3 * i];
168*dc1da017SJunchao Zhang       x1 = xx[3 * i + 1];
169*dc1da017SJunchao Zhang       x2 = xx[3 * i + 2];
170*dc1da017SJunchao Zhang 
171*dc1da017SJunchao Zhang       yy[3 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
172*dc1da017SJunchao Zhang       yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
173*dc1da017SJunchao Zhang       yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
174*dc1da017SJunchao Zhang       diag += 9;
175*dc1da017SJunchao Zhang     }
176*dc1da017SJunchao Zhang     break;
177*dc1da017SJunchao Zhang   case 4:
178*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
179*dc1da017SJunchao Zhang       x0 = xx[4 * i];
180*dc1da017SJunchao Zhang       x1 = xx[4 * i + 1];
181*dc1da017SJunchao Zhang       x2 = xx[4 * i + 2];
182*dc1da017SJunchao Zhang       x3 = xx[4 * i + 3];
183*dc1da017SJunchao Zhang 
184*dc1da017SJunchao Zhang       yy[4 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
185*dc1da017SJunchao Zhang       yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
186*dc1da017SJunchao Zhang       yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
187*dc1da017SJunchao Zhang       yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
188*dc1da017SJunchao Zhang       diag += 16;
189*dc1da017SJunchao Zhang     }
190*dc1da017SJunchao Zhang     break;
191*dc1da017SJunchao Zhang   case 5:
192*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
193*dc1da017SJunchao Zhang       x0 = xx[5 * i];
194*dc1da017SJunchao Zhang       x1 = xx[5 * i + 1];
195*dc1da017SJunchao Zhang       x2 = xx[5 * i + 2];
196*dc1da017SJunchao Zhang       x3 = xx[5 * i + 3];
197*dc1da017SJunchao Zhang       x4 = xx[5 * i + 4];
198*dc1da017SJunchao Zhang 
199*dc1da017SJunchao Zhang       yy[5 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
200*dc1da017SJunchao Zhang       yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
201*dc1da017SJunchao Zhang       yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
202*dc1da017SJunchao Zhang       yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
203*dc1da017SJunchao Zhang       yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
204*dc1da017SJunchao Zhang       diag += 25;
205*dc1da017SJunchao Zhang     }
206*dc1da017SJunchao Zhang     break;
207*dc1da017SJunchao Zhang   case 6:
208*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
209*dc1da017SJunchao Zhang       x0 = xx[6 * i];
210*dc1da017SJunchao Zhang       x1 = xx[6 * i + 1];
211*dc1da017SJunchao Zhang       x2 = xx[6 * i + 2];
212*dc1da017SJunchao Zhang       x3 = xx[6 * i + 3];
213*dc1da017SJunchao Zhang       x4 = xx[6 * i + 4];
214*dc1da017SJunchao Zhang       x5 = xx[6 * i + 5];
215*dc1da017SJunchao Zhang 
216*dc1da017SJunchao Zhang       yy[6 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
217*dc1da017SJunchao Zhang       yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
218*dc1da017SJunchao Zhang       yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
219*dc1da017SJunchao Zhang       yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
220*dc1da017SJunchao Zhang       yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
221*dc1da017SJunchao Zhang       yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
222*dc1da017SJunchao Zhang       diag += 36;
223*dc1da017SJunchao Zhang     }
224*dc1da017SJunchao Zhang     break;
225*dc1da017SJunchao Zhang   case 7:
226*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
227*dc1da017SJunchao Zhang       x0 = xx[7 * i];
228*dc1da017SJunchao Zhang       x1 = xx[7 * i + 1];
229*dc1da017SJunchao Zhang       x2 = xx[7 * i + 2];
230*dc1da017SJunchao Zhang       x3 = xx[7 * i + 3];
231*dc1da017SJunchao Zhang       x4 = xx[7 * i + 4];
232*dc1da017SJunchao Zhang       x5 = xx[7 * i + 5];
233*dc1da017SJunchao Zhang       x6 = xx[7 * i + 6];
234*dc1da017SJunchao Zhang 
235*dc1da017SJunchao Zhang       yy[7 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
236*dc1da017SJunchao Zhang       yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
237*dc1da017SJunchao Zhang       yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
238*dc1da017SJunchao Zhang       yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
239*dc1da017SJunchao Zhang       yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
240*dc1da017SJunchao Zhang       yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
241*dc1da017SJunchao Zhang       yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
242*dc1da017SJunchao Zhang       diag += 49;
243*dc1da017SJunchao Zhang     }
244*dc1da017SJunchao Zhang     break;
245*dc1da017SJunchao Zhang   default:
246*dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
247*dc1da017SJunchao Zhang       for (ib = 0; ib < bs; ib++) {
248*dc1da017SJunchao Zhang         PetscScalar rowsum = 0;
249*dc1da017SJunchao Zhang         for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb];
250*dc1da017SJunchao Zhang         yy[bs * i + ib] = rowsum;
2518cd12025SJed Brown       }
2528cd12025SJed Brown       diag += bs * bs;
2538cd12025SJed Brown     }
254*dc1da017SJunchao Zhang   }
2559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
2569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
257*dc1da017SJunchao Zhang   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m));
2588cd12025SJed Brown   PetscFunctionReturn(0);
2598cd12025SJed Brown }
2608cd12025SJed Brown 
261d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_PBJacobi(PC pc)
262d71ae5a4SJacob Faibussowitsch {
2634b9ad928SBarry Smith   PC_PBJacobi   *jac = (PC_PBJacobi *)pc->data;
2644b9ad928SBarry Smith   Mat            A   = pc->pmat;
26500e125f8SBarry Smith   MatFactorError err;
266539c167fSBarry Smith   PetscInt       nlocal;
2674b9ad928SBarry Smith 
2684b9ad928SBarry Smith   PetscFunctionBegin;
2699566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(A, &jac->diag));
2709566063dSJacob Faibussowitsch   PetscCall(MatFactorGetError(A, &err));
27100e125f8SBarry Smith   if (err) pc->failedreason = (PCFailedReason)err;
272d7806518SHong Zhang 
2739566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &jac->bs));
2749566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlocal, NULL));
275539c167fSBarry Smith   jac->mbs                = nlocal / jac->bs;
2761f713683SJunchao Zhang   pc->ops->apply          = PCApply_PBJacobi;
277*dc1da017SJunchao Zhang   pc->ops->applytranspose = PCApplyTranspose_PBJacobi;
2784b9ad928SBarry Smith   PetscFunctionReturn(0);
2794b9ad928SBarry Smith }
280f1580f4eSBarry Smith 
281d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_PBJacobi(PC pc)
282d71ae5a4SJacob Faibussowitsch {
2834b9ad928SBarry Smith   PetscFunctionBegin;
2844b9ad928SBarry Smith   /*
2854b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
2864b9ad928SBarry Smith   */
2879566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
2884b9ad928SBarry Smith   PetscFunctionReturn(0);
2894b9ad928SBarry Smith }
290fa939db7SJed Brown 
291d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
292d71ae5a4SJacob Faibussowitsch {
293fa939db7SJed Brown   PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
294fa939db7SJed Brown   PetscBool    iascii;
295fa939db7SJed Brown 
296fa939db7SJed Brown   PetscFunctionBegin;
2979566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
29848a46eb9SPierre Jolivet   if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, "  point-block size %" PetscInt_FMT "\n", jac->bs));
299fa939db7SJed Brown   PetscFunctionReturn(0);
300fa939db7SJed Brown }
301fa939db7SJed Brown 
30237a17b4dSBarry Smith /*MC
303422a814eSBarry Smith      PCPBJACOBI - Point block Jacobi preconditioner
304422a814eSBarry Smith 
30595452b02SPatrick Sanan    Notes:
306f1580f4eSBarry Smith     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
307422a814eSBarry Smith 
308f1580f4eSBarry Smith    This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
309422a814eSBarry Smith 
310422a814eSBarry Smith    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
311422a814eSBarry Smith    is detected a PETSc error is generated.
312422a814eSBarry Smith 
31395452b02SPatrick Sanan    Developer Notes:
314f1580f4eSBarry Smith      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
315422a814eSBarry Smith      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
316f1580f4eSBarry Smith      terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing
317422a814eSBarry Smith      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
318422a814eSBarry Smith 
31990dfcc32SBarry Smith      Perhaps should provide an option that allows generation of a valid preconditioner
320f1580f4eSBarry Smith      even if a block is singular as the `PCJACOBI` does.
32137a17b4dSBarry Smith 
32237a17b4dSBarry Smith    Level: beginner
32337a17b4dSBarry Smith 
324db781477SPatrick Sanan .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
32537a17b4dSBarry Smith M*/
32637a17b4dSBarry Smith 
327d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
328d71ae5a4SJacob Faibussowitsch {
3294b9ad928SBarry Smith   PC_PBJacobi *jac;
3304b9ad928SBarry Smith 
3314b9ad928SBarry Smith   PetscFunctionBegin;
3324b9ad928SBarry Smith   /*
3334b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
3344b9ad928SBarry Smith      attach it to the PC object.
3354b9ad928SBarry Smith   */
3364dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&jac));
3374b9ad928SBarry Smith   pc->data = (void *)jac;
3384b9ad928SBarry Smith 
3394b9ad928SBarry Smith   /*
3404b9ad928SBarry Smith      Initialize the pointers to vectors to ZERO; these will be used to store
3414b9ad928SBarry Smith      diagonal entries of the matrix for fast preconditioner application.
3424b9ad928SBarry Smith   */
3430a545947SLisandro Dalcin   jac->diag = NULL;
3444b9ad928SBarry Smith 
3454b9ad928SBarry Smith   /*
3464b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
3474b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
3484b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
3494b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
3504b9ad928SBarry Smith       not needed.
3514b9ad928SBarry Smith   */
3520a545947SLisandro Dalcin   pc->ops->apply               = NULL; /*set depending on the block size */
3530a545947SLisandro Dalcin   pc->ops->applytranspose      = NULL;
3544b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_PBJacobi;
3554b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_PBJacobi;
3560a545947SLisandro Dalcin   pc->ops->setfromoptions      = NULL;
357fa939db7SJed Brown   pc->ops->view                = PCView_PBJacobi;
3580a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
3590a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
3600a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
3614b9ad928SBarry Smith   PetscFunctionReturn(0);
3624b9ad928SBarry Smith }
363