xref: /petsc/src/ksp/pc/impls/pbjacobi/pbjacobi.c (revision bd89dbf26d8a5efecb980364933175da61864cd7)
112facf1bSJunchao Zhang #include <../src/ksp/pc/impls/pbjacobi/pbjacobi.h>
242ce410bSJunchao Zhang #include <petsc/private/matimpl.h>
34b9ad928SBarry Smith 
PCApply_PBJacobi(PC pc,Vec x,Vec y)41f713683SJunchao Zhang static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y)
5d71ae5a4SJacob Faibussowitsch {
6bbead8a2SBarry Smith   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
71f713683SJunchao Zhang   PetscInt           i, ib, jb;
81f713683SJunchao Zhang   const PetscInt     m    = jac->mbs;
91f713683SJunchao Zhang   const PetscInt     bs   = jac->bs;
10bbead8a2SBarry Smith   const MatScalar   *diag = jac->diag;
111f713683SJunchao Zhang   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
12bbead8a2SBarry Smith   const PetscScalar *xx;
13bbead8a2SBarry Smith 
14bbead8a2SBarry Smith   PetscFunctionBegin;
159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
171f713683SJunchao Zhang   switch (bs) {
181f713683SJunchao Zhang   case 1:
192fa5cd67SKarl Rupp     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
201f713683SJunchao Zhang     break;
211f713683SJunchao Zhang   case 2:
224b9ad928SBarry Smith     for (i = 0; i < m; i++) {
239371c9d4SSatish Balay       x0            = xx[2 * i];
249371c9d4SSatish Balay       x1            = xx[2 * i + 1];
254b9ad928SBarry Smith       yy[2 * i]     = diag[0] * x0 + diag[2] * x1;
264b9ad928SBarry Smith       yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
274b9ad928SBarry Smith       diag += 4;
284b9ad928SBarry Smith     }
291f713683SJunchao Zhang     break;
301f713683SJunchao Zhang   case 3:
314b9ad928SBarry Smith     for (i = 0; i < m; i++) {
329371c9d4SSatish Balay       x0 = xx[3 * i];
339371c9d4SSatish Balay       x1 = xx[3 * i + 1];
349371c9d4SSatish Balay       x2 = xx[3 * i + 2];
352fa5cd67SKarl Rupp 
364b9ad928SBarry Smith       yy[3 * i]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
374b9ad928SBarry Smith       yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
384b9ad928SBarry Smith       yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
394b9ad928SBarry Smith       diag += 9;
404b9ad928SBarry Smith     }
411f713683SJunchao Zhang     break;
421f713683SJunchao Zhang   case 4:
434b9ad928SBarry Smith     for (i = 0; i < m; i++) {
449371c9d4SSatish Balay       x0 = xx[4 * i];
459371c9d4SSatish Balay       x1 = xx[4 * i + 1];
469371c9d4SSatish Balay       x2 = xx[4 * i + 2];
479371c9d4SSatish Balay       x3 = xx[4 * i + 3];
482fa5cd67SKarl Rupp 
494b9ad928SBarry Smith       yy[4 * i]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
504b9ad928SBarry Smith       yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
514b9ad928SBarry Smith       yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
524b9ad928SBarry Smith       yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
534b9ad928SBarry Smith       diag += 16;
544b9ad928SBarry Smith     }
551f713683SJunchao Zhang     break;
561f713683SJunchao Zhang   case 5:
574b9ad928SBarry Smith     for (i = 0; i < m; i++) {
589371c9d4SSatish Balay       x0 = xx[5 * i];
599371c9d4SSatish Balay       x1 = xx[5 * i + 1];
609371c9d4SSatish Balay       x2 = xx[5 * i + 2];
619371c9d4SSatish Balay       x3 = xx[5 * i + 3];
629371c9d4SSatish Balay       x4 = xx[5 * i + 4];
632fa5cd67SKarl Rupp 
644b9ad928SBarry Smith       yy[5 * i]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
654b9ad928SBarry Smith       yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
664b9ad928SBarry Smith       yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
674b9ad928SBarry Smith       yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
684b9ad928SBarry Smith       yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
694b9ad928SBarry Smith       diag += 25;
704b9ad928SBarry Smith     }
711f713683SJunchao Zhang     break;
721f713683SJunchao Zhang   case 6:
730e1b4bd6SMark F. Adams     for (i = 0; i < m; i++) {
749371c9d4SSatish Balay       x0 = xx[6 * i];
759371c9d4SSatish Balay       x1 = xx[6 * i + 1];
769371c9d4SSatish Balay       x2 = xx[6 * i + 2];
779371c9d4SSatish Balay       x3 = xx[6 * i + 3];
789371c9d4SSatish Balay       x4 = xx[6 * i + 4];
799371c9d4SSatish Balay       x5 = xx[6 * i + 5];
802fa5cd67SKarl Rupp 
810e1b4bd6SMark F. Adams       yy[6 * i]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
820e1b4bd6SMark F. Adams       yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
830e1b4bd6SMark F. Adams       yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
840e1b4bd6SMark F. Adams       yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
850e1b4bd6SMark F. Adams       yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
860e1b4bd6SMark F. Adams       yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
870e1b4bd6SMark F. Adams       diag += 36;
880e1b4bd6SMark F. Adams     }
891f713683SJunchao Zhang     break;
901f713683SJunchao Zhang   case 7:
910a4ca348SMatthew G Knepley     for (i = 0; i < m; i++) {
929371c9d4SSatish Balay       x0 = xx[7 * i];
939371c9d4SSatish Balay       x1 = xx[7 * i + 1];
949371c9d4SSatish Balay       x2 = xx[7 * i + 2];
959371c9d4SSatish Balay       x3 = xx[7 * i + 3];
969371c9d4SSatish Balay       x4 = xx[7 * i + 4];
979371c9d4SSatish Balay       x5 = xx[7 * i + 5];
989371c9d4SSatish Balay       x6 = xx[7 * i + 6];
992fa5cd67SKarl Rupp 
1000a4ca348SMatthew G Knepley       yy[7 * i]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
1010a4ca348SMatthew G Knepley       yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
1020a4ca348SMatthew G Knepley       yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
1030a4ca348SMatthew G Knepley       yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
1040a4ca348SMatthew G Knepley       yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
1050a4ca348SMatthew G Knepley       yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
1060a4ca348SMatthew G Knepley       yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
1070a4ca348SMatthew G Knepley       diag += 49;
1080a4ca348SMatthew G Knepley     }
1091f713683SJunchao Zhang     break;
1101f713683SJunchao Zhang   default:
111f79daf70SMark Lohry     for (i = 0; i < m; i++) {
112f79daf70SMark Lohry       for (ib = 0; ib < bs; ib++) {
113f79daf70SMark Lohry         PetscScalar rowsum = 0;
114ad540459SPierre Jolivet         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
115f79daf70SMark Lohry         yy[bs * i + ib] = rowsum;
116f79daf70SMark Lohry       }
117f79daf70SMark Lohry       diag += bs * bs;
118f79daf70SMark Lohry     }
1191f713683SJunchao Zhang   }
1209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
1219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
1229566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */
1233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
124f79daf70SMark Lohry }
125faff7d23SJed Brown 
PCApplyTranspose_PBJacobi(PC pc,Vec x,Vec y)126dc1da017SJunchao Zhang static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y)
127d71ae5a4SJacob Faibussowitsch {
1288cd12025SJed Brown   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
129dc1da017SJunchao Zhang   PetscInt           i, ib, jb;
130dc1da017SJunchao Zhang   const PetscInt     m    = jac->mbs;
131dc1da017SJunchao Zhang   const PetscInt     bs   = jac->bs;
1328cd12025SJed Brown   const MatScalar   *diag = jac->diag;
133dc1da017SJunchao Zhang   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
1348cd12025SJed Brown   const PetscScalar *xx;
1358cd12025SJed Brown 
1368cd12025SJed Brown   PetscFunctionBegin;
1379566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
1389566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
139dc1da017SJunchao Zhang   switch (bs) {
140dc1da017SJunchao Zhang   case 1:
141dc1da017SJunchao Zhang     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
142dc1da017SJunchao Zhang     break;
143dc1da017SJunchao Zhang   case 2:
1448cd12025SJed Brown     for (i = 0; i < m; i++) {
145dc1da017SJunchao Zhang       x0            = xx[2 * i];
146dc1da017SJunchao Zhang       x1            = xx[2 * i + 1];
147dc1da017SJunchao Zhang       yy[2 * i]     = diag[0] * x0 + diag[1] * x1;
148dc1da017SJunchao Zhang       yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1;
149dc1da017SJunchao Zhang       diag += 4;
150dc1da017SJunchao Zhang     }
151dc1da017SJunchao Zhang     break;
152dc1da017SJunchao Zhang   case 3:
153dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
154dc1da017SJunchao Zhang       x0 = xx[3 * i];
155dc1da017SJunchao Zhang       x1 = xx[3 * i + 1];
156dc1da017SJunchao Zhang       x2 = xx[3 * i + 2];
157dc1da017SJunchao Zhang 
158dc1da017SJunchao Zhang       yy[3 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
159dc1da017SJunchao Zhang       yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
160dc1da017SJunchao Zhang       yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
161dc1da017SJunchao Zhang       diag += 9;
162dc1da017SJunchao Zhang     }
163dc1da017SJunchao Zhang     break;
164dc1da017SJunchao Zhang   case 4:
165dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
166dc1da017SJunchao Zhang       x0 = xx[4 * i];
167dc1da017SJunchao Zhang       x1 = xx[4 * i + 1];
168dc1da017SJunchao Zhang       x2 = xx[4 * i + 2];
169dc1da017SJunchao Zhang       x3 = xx[4 * i + 3];
170dc1da017SJunchao Zhang 
171dc1da017SJunchao Zhang       yy[4 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
172dc1da017SJunchao Zhang       yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
173dc1da017SJunchao Zhang       yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
174dc1da017SJunchao Zhang       yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
175dc1da017SJunchao Zhang       diag += 16;
176dc1da017SJunchao Zhang     }
177dc1da017SJunchao Zhang     break;
178dc1da017SJunchao Zhang   case 5:
179dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
180dc1da017SJunchao Zhang       x0 = xx[5 * i];
181dc1da017SJunchao Zhang       x1 = xx[5 * i + 1];
182dc1da017SJunchao Zhang       x2 = xx[5 * i + 2];
183dc1da017SJunchao Zhang       x3 = xx[5 * i + 3];
184dc1da017SJunchao Zhang       x4 = xx[5 * i + 4];
185dc1da017SJunchao Zhang 
186dc1da017SJunchao Zhang       yy[5 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
187dc1da017SJunchao Zhang       yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
188dc1da017SJunchao Zhang       yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
189dc1da017SJunchao Zhang       yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
190dc1da017SJunchao Zhang       yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
191dc1da017SJunchao Zhang       diag += 25;
192dc1da017SJunchao Zhang     }
193dc1da017SJunchao Zhang     break;
194dc1da017SJunchao Zhang   case 6:
195dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
196dc1da017SJunchao Zhang       x0 = xx[6 * i];
197dc1da017SJunchao Zhang       x1 = xx[6 * i + 1];
198dc1da017SJunchao Zhang       x2 = xx[6 * i + 2];
199dc1da017SJunchao Zhang       x3 = xx[6 * i + 3];
200dc1da017SJunchao Zhang       x4 = xx[6 * i + 4];
201dc1da017SJunchao Zhang       x5 = xx[6 * i + 5];
202dc1da017SJunchao Zhang 
203dc1da017SJunchao Zhang       yy[6 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
204dc1da017SJunchao Zhang       yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
205dc1da017SJunchao Zhang       yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
206dc1da017SJunchao Zhang       yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
207dc1da017SJunchao Zhang       yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
208dc1da017SJunchao Zhang       yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
209dc1da017SJunchao Zhang       diag += 36;
210dc1da017SJunchao Zhang     }
211dc1da017SJunchao Zhang     break;
212dc1da017SJunchao Zhang   case 7:
213dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
214dc1da017SJunchao Zhang       x0 = xx[7 * i];
215dc1da017SJunchao Zhang       x1 = xx[7 * i + 1];
216dc1da017SJunchao Zhang       x2 = xx[7 * i + 2];
217dc1da017SJunchao Zhang       x3 = xx[7 * i + 3];
218dc1da017SJunchao Zhang       x4 = xx[7 * i + 4];
219dc1da017SJunchao Zhang       x5 = xx[7 * i + 5];
220dc1da017SJunchao Zhang       x6 = xx[7 * i + 6];
221dc1da017SJunchao Zhang 
222dc1da017SJunchao Zhang       yy[7 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
223dc1da017SJunchao Zhang       yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
224dc1da017SJunchao Zhang       yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
225dc1da017SJunchao Zhang       yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
226dc1da017SJunchao Zhang       yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
227dc1da017SJunchao Zhang       yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
228dc1da017SJunchao Zhang       yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
229dc1da017SJunchao Zhang       diag += 49;
230dc1da017SJunchao Zhang     }
231dc1da017SJunchao Zhang     break;
232dc1da017SJunchao Zhang   default:
233dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
234dc1da017SJunchao Zhang       for (ib = 0; ib < bs; ib++) {
235dc1da017SJunchao Zhang         PetscScalar rowsum = 0;
236dc1da017SJunchao Zhang         for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb];
237dc1da017SJunchao Zhang         yy[bs * i + ib] = rowsum;
2388cd12025SJed Brown       }
2398cd12025SJed Brown       diag += bs * bs;
2408cd12025SJed Brown     }
241dc1da017SJunchao Zhang   }
2429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
2439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
244dc1da017SJunchao Zhang   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m));
2453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2468cd12025SJed Brown }
2478cd12025SJed Brown 
PCSetUp_PBJacobi_Host(PC pc,Mat diagPB)24842ce410bSJunchao Zhang PETSC_INTERN PetscErrorCode PCSetUp_PBJacobi_Host(PC pc, Mat diagPB)
249d71ae5a4SJacob Faibussowitsch {
2504b9ad928SBarry Smith   PC_PBJacobi   *jac = (PC_PBJacobi *)pc->data;
25142ce410bSJunchao Zhang   Mat            A   = diagPB ? diagPB : pc->pmat;
25200e125f8SBarry Smith   MatFactorError err;
253539c167fSBarry Smith   PetscInt       nlocal;
2544b9ad928SBarry Smith 
2554b9ad928SBarry Smith   PetscFunctionBegin;
2569566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(A, &jac->diag));
2579566063dSJacob Faibussowitsch   PetscCall(MatFactorGetError(A, &err));
25800e125f8SBarry Smith   if (err) pc->failedreason = (PCFailedReason)err;
259d7806518SHong Zhang 
2609566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &jac->bs));
2619566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlocal, NULL));
262539c167fSBarry Smith   jac->mbs = nlocal / jac->bs;
2633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2644b9ad928SBarry Smith }
265f1580f4eSBarry Smith 
PCSetUp_PBJacobi(PC pc)26612facf1bSJunchao Zhang static PetscErrorCode PCSetUp_PBJacobi(PC pc)
26712facf1bSJunchao Zhang {
26842ce410bSJunchao Zhang   PetscBool flg;
26942ce410bSJunchao Zhang   Mat       diagPB = NULL;
27042ce410bSJunchao Zhang 
27112facf1bSJunchao Zhang   PetscFunctionBegin;
27212facf1bSJunchao Zhang   /* In PCCreate_PBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
27342ce410bSJunchao Zhang 
27442ce410bSJunchao Zhang   // pmat (e.g., MatCEED from libCEED) might have its own method to provide a matrix (diagPB)
27542ce410bSJunchao Zhang   // made of the diagonal blocks. So we check both pmat and diagVPB.
27642ce410bSJunchao Zhang   PetscCall(MatHasOperation(pc->pmat, MATOP_GET_BLOCK_DIAGONAL, &flg));
27742ce410bSJunchao Zhang   if (flg) PetscUseTypeMethod(pc->pmat, getblockdiagonal, &diagPB); // diagPB's reference count is increased upon return
27842ce410bSJunchao Zhang 
27912facf1bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
28012facf1bSJunchao Zhang   PetscBool isCuda;
28112facf1bSJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
28242ce410bSJunchao Zhang   if (!isCuda && diagPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagPB, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
28312facf1bSJunchao Zhang #endif
28412facf1bSJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
28512facf1bSJunchao Zhang   PetscBool isKok;
28612facf1bSJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
28742ce410bSJunchao Zhang   if (!isKok && diagPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagPB, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
28812facf1bSJunchao Zhang #endif
28912facf1bSJunchao Zhang 
29012facf1bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29142ce410bSJunchao Zhang   if (isCuda) PetscCall(PCSetUp_PBJacobi_CUDA(pc, diagPB));
29212facf1bSJunchao Zhang   else
29312facf1bSJunchao Zhang #endif
29412facf1bSJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
29512facf1bSJunchao Zhang     if (isKok)
29642ce410bSJunchao Zhang     PetscCall(PCSetUp_PBJacobi_Kokkos(pc, diagPB));
29712facf1bSJunchao Zhang   else
29812facf1bSJunchao Zhang #endif
29912facf1bSJunchao Zhang   {
30042ce410bSJunchao Zhang     PetscCall(PCSetUp_PBJacobi_Host(pc, diagPB));
30112facf1bSJunchao Zhang   }
30242ce410bSJunchao Zhang   PetscCall(MatDestroy(&diagPB)); // since we don't need it anymore, we don't stash it in PC_PBJacobi
3033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30412facf1bSJunchao Zhang }
30512facf1bSJunchao Zhang 
PCDestroy_PBJacobi(PC pc)30612facf1bSJunchao Zhang PetscErrorCode PCDestroy_PBJacobi(PC pc)
307d71ae5a4SJacob Faibussowitsch {
30842ce410bSJunchao Zhang   PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
30942ce410bSJunchao Zhang 
3104b9ad928SBarry Smith   PetscFunctionBegin;
3114b9ad928SBarry Smith   /*
3124b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
3134b9ad928SBarry Smith   */
31442ce410bSJunchao Zhang   // PetscCall(PetscFree(jac->diag)); // the memory is owned by e.g., a->ibdiag in Mat_SeqAIJ, so don't free it here.
31542ce410bSJunchao Zhang   PetscCall(MatDestroy(&jac->diagPB));
3169566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
3173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3184b9ad928SBarry Smith }
319fa939db7SJed Brown 
PCView_PBJacobi(PC pc,PetscViewer viewer)320d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
321d71ae5a4SJacob Faibussowitsch {
322fa939db7SJed Brown   PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
3239f196a02SMartin Diehl   PetscBool    isascii;
324fa939db7SJed Brown 
325fa939db7SJed Brown   PetscFunctionBegin;
3269f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3279f196a02SMartin Diehl   if (isascii) PetscCall(PetscViewerASCIIPrintf(viewer, "  point-block size %" PetscInt_FMT "\n", jac->bs));
3283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
329fa939db7SJed Brown }
330fa939db7SJed Brown 
33137a17b4dSBarry Smith /*MC
332422a814eSBarry Smith      PCPBJACOBI - Point block Jacobi preconditioner
333422a814eSBarry Smith 
33495452b02SPatrick Sanan    Notes:
335f1580f4eSBarry Smith     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
336422a814eSBarry Smith 
337f1580f4eSBarry Smith    This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
338422a814eSBarry Smith 
339422a814eSBarry Smith    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
340422a814eSBarry Smith    is detected a PETSc error is generated.
341422a814eSBarry Smith 
34295452b02SPatrick Sanan    Developer Notes:
343f1580f4eSBarry Smith      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
344*76c63389SBarry Smith      the factorization to continue even after a zero pivot is found resulting in a NaN and hence
34548773899SPierre Jolivet      terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
346422a814eSBarry Smith      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
347422a814eSBarry Smith 
34890dfcc32SBarry Smith      Perhaps should provide an option that allows generation of a valid preconditioner
349f1580f4eSBarry Smith      even if a block is singular as the `PCJACOBI` does.
35037a17b4dSBarry Smith 
35137a17b4dSBarry Smith    Level: beginner
35237a17b4dSBarry Smith 
353562efe2eSBarry Smith .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
35437a17b4dSBarry Smith M*/
35537a17b4dSBarry Smith 
PCCreate_PBJacobi(PC pc)356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
357d71ae5a4SJacob Faibussowitsch {
3584b9ad928SBarry Smith   PC_PBJacobi *jac;
3594b9ad928SBarry Smith 
3604b9ad928SBarry Smith   PetscFunctionBegin;
3614b9ad928SBarry Smith   /*
3624b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
3634b9ad928SBarry Smith      attach it to the PC object.
3644b9ad928SBarry Smith   */
3654dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&jac));
3664b9ad928SBarry Smith   pc->data = (void *)jac;
3674b9ad928SBarry Smith 
3684b9ad928SBarry Smith   /*
3694b9ad928SBarry Smith      Initialize the pointers to vectors to ZERO; these will be used to store
3704b9ad928SBarry Smith      diagonal entries of the matrix for fast preconditioner application.
3714b9ad928SBarry Smith   */
3720a545947SLisandro Dalcin   jac->diag = NULL;
3734b9ad928SBarry Smith 
3744b9ad928SBarry Smith   /*
3754b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
3764b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
3774b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
3784b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
3794b9ad928SBarry Smith       not needed.
3804b9ad928SBarry Smith   */
38112facf1bSJunchao Zhang   pc->ops->apply               = PCApply_PBJacobi;
38212facf1bSJunchao Zhang   pc->ops->applytranspose      = PCApplyTranspose_PBJacobi;
3834b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_PBJacobi;
3844b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_PBJacobi;
3850a545947SLisandro Dalcin   pc->ops->setfromoptions      = NULL;
386fa939db7SJed Brown   pc->ops->view                = PCView_PBJacobi;
3870a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
3880a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
3890a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
3903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3914b9ad928SBarry Smith }
392