xref: /petsc/src/ksp/pc/impls/pbjacobi/pbjacobi.c (revision 562efe2ef922487c6beae96ba39e19afd4eefbe6)
112facf1bSJunchao Zhang #include <../src/ksp/pc/impls/pbjacobi/pbjacobi.h>
24b9ad928SBarry Smith 
31f713683SJunchao Zhang static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y)
4d71ae5a4SJacob Faibussowitsch {
5bbead8a2SBarry Smith   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
61f713683SJunchao Zhang   PetscInt           i, ib, jb;
71f713683SJunchao Zhang   const PetscInt     m    = jac->mbs;
81f713683SJunchao Zhang   const PetscInt     bs   = jac->bs;
9bbead8a2SBarry Smith   const MatScalar   *diag = jac->diag;
101f713683SJunchao Zhang   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
11bbead8a2SBarry Smith   const PetscScalar *xx;
12bbead8a2SBarry Smith 
13bbead8a2SBarry Smith   PetscFunctionBegin;
149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
161f713683SJunchao Zhang   switch (bs) {
171f713683SJunchao Zhang   case 1:
182fa5cd67SKarl Rupp     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
191f713683SJunchao Zhang     break;
201f713683SJunchao Zhang   case 2:
214b9ad928SBarry Smith     for (i = 0; i < m; i++) {
229371c9d4SSatish Balay       x0            = xx[2 * i];
239371c9d4SSatish Balay       x1            = xx[2 * i + 1];
244b9ad928SBarry Smith       yy[2 * i]     = diag[0] * x0 + diag[2] * x1;
254b9ad928SBarry Smith       yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
264b9ad928SBarry Smith       diag += 4;
274b9ad928SBarry Smith     }
281f713683SJunchao Zhang     break;
291f713683SJunchao Zhang   case 3:
304b9ad928SBarry Smith     for (i = 0; i < m; i++) {
319371c9d4SSatish Balay       x0 = xx[3 * i];
329371c9d4SSatish Balay       x1 = xx[3 * i + 1];
339371c9d4SSatish Balay       x2 = xx[3 * i + 2];
342fa5cd67SKarl Rupp 
354b9ad928SBarry Smith       yy[3 * i]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
364b9ad928SBarry Smith       yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
374b9ad928SBarry Smith       yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
384b9ad928SBarry Smith       diag += 9;
394b9ad928SBarry Smith     }
401f713683SJunchao Zhang     break;
411f713683SJunchao Zhang   case 4:
424b9ad928SBarry Smith     for (i = 0; i < m; i++) {
439371c9d4SSatish Balay       x0 = xx[4 * i];
449371c9d4SSatish Balay       x1 = xx[4 * i + 1];
459371c9d4SSatish Balay       x2 = xx[4 * i + 2];
469371c9d4SSatish Balay       x3 = xx[4 * i + 3];
472fa5cd67SKarl Rupp 
484b9ad928SBarry Smith       yy[4 * i]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
494b9ad928SBarry Smith       yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
504b9ad928SBarry Smith       yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
514b9ad928SBarry Smith       yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
524b9ad928SBarry Smith       diag += 16;
534b9ad928SBarry Smith     }
541f713683SJunchao Zhang     break;
551f713683SJunchao Zhang   case 5:
564b9ad928SBarry Smith     for (i = 0; i < m; i++) {
579371c9d4SSatish Balay       x0 = xx[5 * i];
589371c9d4SSatish Balay       x1 = xx[5 * i + 1];
599371c9d4SSatish Balay       x2 = xx[5 * i + 2];
609371c9d4SSatish Balay       x3 = xx[5 * i + 3];
619371c9d4SSatish Balay       x4 = xx[5 * i + 4];
622fa5cd67SKarl Rupp 
634b9ad928SBarry Smith       yy[5 * i]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
644b9ad928SBarry Smith       yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
654b9ad928SBarry Smith       yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
664b9ad928SBarry Smith       yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
674b9ad928SBarry Smith       yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
684b9ad928SBarry Smith       diag += 25;
694b9ad928SBarry Smith     }
701f713683SJunchao Zhang     break;
711f713683SJunchao Zhang   case 6:
720e1b4bd6SMark F. Adams     for (i = 0; i < m; i++) {
739371c9d4SSatish Balay       x0 = xx[6 * i];
749371c9d4SSatish Balay       x1 = xx[6 * i + 1];
759371c9d4SSatish Balay       x2 = xx[6 * i + 2];
769371c9d4SSatish Balay       x3 = xx[6 * i + 3];
779371c9d4SSatish Balay       x4 = xx[6 * i + 4];
789371c9d4SSatish Balay       x5 = xx[6 * i + 5];
792fa5cd67SKarl Rupp 
800e1b4bd6SMark F. Adams       yy[6 * i]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
810e1b4bd6SMark F. Adams       yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
820e1b4bd6SMark F. Adams       yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
830e1b4bd6SMark F. Adams       yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
840e1b4bd6SMark F. Adams       yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
850e1b4bd6SMark F. Adams       yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
860e1b4bd6SMark F. Adams       diag += 36;
870e1b4bd6SMark F. Adams     }
881f713683SJunchao Zhang     break;
891f713683SJunchao Zhang   case 7:
900a4ca348SMatthew G Knepley     for (i = 0; i < m; i++) {
919371c9d4SSatish Balay       x0 = xx[7 * i];
929371c9d4SSatish Balay       x1 = xx[7 * i + 1];
939371c9d4SSatish Balay       x2 = xx[7 * i + 2];
949371c9d4SSatish Balay       x3 = xx[7 * i + 3];
959371c9d4SSatish Balay       x4 = xx[7 * i + 4];
969371c9d4SSatish Balay       x5 = xx[7 * i + 5];
979371c9d4SSatish Balay       x6 = xx[7 * i + 6];
982fa5cd67SKarl Rupp 
990a4ca348SMatthew G Knepley       yy[7 * i]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
1000a4ca348SMatthew G Knepley       yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
1010a4ca348SMatthew G Knepley       yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
1020a4ca348SMatthew G Knepley       yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
1030a4ca348SMatthew G Knepley       yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
1040a4ca348SMatthew G Knepley       yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
1050a4ca348SMatthew G Knepley       yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
1060a4ca348SMatthew G Knepley       diag += 49;
1070a4ca348SMatthew G Knepley     }
1081f713683SJunchao Zhang     break;
1091f713683SJunchao Zhang   default:
110f79daf70SMark Lohry     for (i = 0; i < m; i++) {
111f79daf70SMark Lohry       for (ib = 0; ib < bs; ib++) {
112f79daf70SMark Lohry         PetscScalar rowsum = 0;
113ad540459SPierre Jolivet         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
114f79daf70SMark Lohry         yy[bs * i + ib] = rowsum;
115f79daf70SMark Lohry       }
116f79daf70SMark Lohry       diag += bs * bs;
117f79daf70SMark Lohry     }
1181f713683SJunchao Zhang   }
1199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
1209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
1219566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */
1223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
123f79daf70SMark Lohry }
124faff7d23SJed Brown 
125dc1da017SJunchao Zhang static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y)
126d71ae5a4SJacob Faibussowitsch {
1278cd12025SJed Brown   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
128dc1da017SJunchao Zhang   PetscInt           i, ib, jb;
129dc1da017SJunchao Zhang   const PetscInt     m    = jac->mbs;
130dc1da017SJunchao Zhang   const PetscInt     bs   = jac->bs;
1318cd12025SJed Brown   const MatScalar   *diag = jac->diag;
132dc1da017SJunchao Zhang   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
1338cd12025SJed Brown   const PetscScalar *xx;
1348cd12025SJed Brown 
1358cd12025SJed Brown   PetscFunctionBegin;
1369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
1379566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
138dc1da017SJunchao Zhang   switch (bs) {
139dc1da017SJunchao Zhang   case 1:
140dc1da017SJunchao Zhang     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
141dc1da017SJunchao Zhang     break;
142dc1da017SJunchao Zhang   case 2:
1438cd12025SJed Brown     for (i = 0; i < m; i++) {
144dc1da017SJunchao Zhang       x0            = xx[2 * i];
145dc1da017SJunchao Zhang       x1            = xx[2 * i + 1];
146dc1da017SJunchao Zhang       yy[2 * i]     = diag[0] * x0 + diag[1] * x1;
147dc1da017SJunchao Zhang       yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1;
148dc1da017SJunchao Zhang       diag += 4;
149dc1da017SJunchao Zhang     }
150dc1da017SJunchao Zhang     break;
151dc1da017SJunchao Zhang   case 3:
152dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
153dc1da017SJunchao Zhang       x0 = xx[3 * i];
154dc1da017SJunchao Zhang       x1 = xx[3 * i + 1];
155dc1da017SJunchao Zhang       x2 = xx[3 * i + 2];
156dc1da017SJunchao Zhang 
157dc1da017SJunchao Zhang       yy[3 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
158dc1da017SJunchao Zhang       yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
159dc1da017SJunchao Zhang       yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
160dc1da017SJunchao Zhang       diag += 9;
161dc1da017SJunchao Zhang     }
162dc1da017SJunchao Zhang     break;
163dc1da017SJunchao Zhang   case 4:
164dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
165dc1da017SJunchao Zhang       x0 = xx[4 * i];
166dc1da017SJunchao Zhang       x1 = xx[4 * i + 1];
167dc1da017SJunchao Zhang       x2 = xx[4 * i + 2];
168dc1da017SJunchao Zhang       x3 = xx[4 * i + 3];
169dc1da017SJunchao Zhang 
170dc1da017SJunchao Zhang       yy[4 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
171dc1da017SJunchao Zhang       yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
172dc1da017SJunchao Zhang       yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
173dc1da017SJunchao Zhang       yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
174dc1da017SJunchao Zhang       diag += 16;
175dc1da017SJunchao Zhang     }
176dc1da017SJunchao Zhang     break;
177dc1da017SJunchao Zhang   case 5:
178dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
179dc1da017SJunchao Zhang       x0 = xx[5 * i];
180dc1da017SJunchao Zhang       x1 = xx[5 * i + 1];
181dc1da017SJunchao Zhang       x2 = xx[5 * i + 2];
182dc1da017SJunchao Zhang       x3 = xx[5 * i + 3];
183dc1da017SJunchao Zhang       x4 = xx[5 * i + 4];
184dc1da017SJunchao Zhang 
185dc1da017SJunchao Zhang       yy[5 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
186dc1da017SJunchao Zhang       yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
187dc1da017SJunchao Zhang       yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
188dc1da017SJunchao Zhang       yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
189dc1da017SJunchao Zhang       yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
190dc1da017SJunchao Zhang       diag += 25;
191dc1da017SJunchao Zhang     }
192dc1da017SJunchao Zhang     break;
193dc1da017SJunchao Zhang   case 6:
194dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
195dc1da017SJunchao Zhang       x0 = xx[6 * i];
196dc1da017SJunchao Zhang       x1 = xx[6 * i + 1];
197dc1da017SJunchao Zhang       x2 = xx[6 * i + 2];
198dc1da017SJunchao Zhang       x3 = xx[6 * i + 3];
199dc1da017SJunchao Zhang       x4 = xx[6 * i + 4];
200dc1da017SJunchao Zhang       x5 = xx[6 * i + 5];
201dc1da017SJunchao Zhang 
202dc1da017SJunchao Zhang       yy[6 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
203dc1da017SJunchao Zhang       yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
204dc1da017SJunchao Zhang       yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
205dc1da017SJunchao Zhang       yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
206dc1da017SJunchao Zhang       yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
207dc1da017SJunchao Zhang       yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
208dc1da017SJunchao Zhang       diag += 36;
209dc1da017SJunchao Zhang     }
210dc1da017SJunchao Zhang     break;
211dc1da017SJunchao Zhang   case 7:
212dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
213dc1da017SJunchao Zhang       x0 = xx[7 * i];
214dc1da017SJunchao Zhang       x1 = xx[7 * i + 1];
215dc1da017SJunchao Zhang       x2 = xx[7 * i + 2];
216dc1da017SJunchao Zhang       x3 = xx[7 * i + 3];
217dc1da017SJunchao Zhang       x4 = xx[7 * i + 4];
218dc1da017SJunchao Zhang       x5 = xx[7 * i + 5];
219dc1da017SJunchao Zhang       x6 = xx[7 * i + 6];
220dc1da017SJunchao Zhang 
221dc1da017SJunchao Zhang       yy[7 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
222dc1da017SJunchao Zhang       yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
223dc1da017SJunchao Zhang       yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
224dc1da017SJunchao Zhang       yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
225dc1da017SJunchao Zhang       yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
226dc1da017SJunchao Zhang       yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
227dc1da017SJunchao Zhang       yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
228dc1da017SJunchao Zhang       diag += 49;
229dc1da017SJunchao Zhang     }
230dc1da017SJunchao Zhang     break;
231dc1da017SJunchao Zhang   default:
232dc1da017SJunchao Zhang     for (i = 0; i < m; i++) {
233dc1da017SJunchao Zhang       for (ib = 0; ib < bs; ib++) {
234dc1da017SJunchao Zhang         PetscScalar rowsum = 0;
235dc1da017SJunchao Zhang         for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb];
236dc1da017SJunchao Zhang         yy[bs * i + ib] = rowsum;
2378cd12025SJed Brown       }
2388cd12025SJed Brown       diag += bs * bs;
2398cd12025SJed Brown     }
240dc1da017SJunchao Zhang   }
2419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
2429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
243dc1da017SJunchao Zhang   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m));
2443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2458cd12025SJed Brown }
2468cd12025SJed Brown 
24712facf1bSJunchao Zhang PETSC_INTERN PetscErrorCode PCSetUp_PBJacobi_Host(PC pc)
248d71ae5a4SJacob Faibussowitsch {
2494b9ad928SBarry Smith   PC_PBJacobi   *jac = (PC_PBJacobi *)pc->data;
2504b9ad928SBarry Smith   Mat            A   = pc->pmat;
25100e125f8SBarry Smith   MatFactorError err;
252539c167fSBarry Smith   PetscInt       nlocal;
2534b9ad928SBarry Smith 
2544b9ad928SBarry Smith   PetscFunctionBegin;
2559566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(A, &jac->diag));
2569566063dSJacob Faibussowitsch   PetscCall(MatFactorGetError(A, &err));
25700e125f8SBarry Smith   if (err) pc->failedreason = (PCFailedReason)err;
258d7806518SHong Zhang 
2599566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &jac->bs));
2609566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlocal, NULL));
261539c167fSBarry Smith   jac->mbs = nlocal / jac->bs;
2623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2634b9ad928SBarry Smith }
264f1580f4eSBarry Smith 
26512facf1bSJunchao Zhang static PetscErrorCode PCSetUp_PBJacobi(PC pc)
26612facf1bSJunchao Zhang {
26712facf1bSJunchao Zhang   PetscFunctionBegin;
26812facf1bSJunchao Zhang   /* In PCCreate_PBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
26912facf1bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
27012facf1bSJunchao Zhang   PetscBool isCuda;
27112facf1bSJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
27212facf1bSJunchao Zhang #endif
27312facf1bSJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
27412facf1bSJunchao Zhang   PetscBool isKok;
27512facf1bSJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
27612facf1bSJunchao Zhang #endif
27712facf1bSJunchao Zhang 
27812facf1bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
27912facf1bSJunchao Zhang   if (isCuda) PetscCall(PCSetUp_PBJacobi_CUDA(pc));
28012facf1bSJunchao Zhang   else
28112facf1bSJunchao Zhang #endif
28212facf1bSJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
28312facf1bSJunchao Zhang     if (isKok)
28412facf1bSJunchao Zhang     PetscCall(PCSetUp_PBJacobi_Kokkos(pc));
28512facf1bSJunchao Zhang   else
28612facf1bSJunchao Zhang #endif
28712facf1bSJunchao Zhang   {
28812facf1bSJunchao Zhang     PetscCall(PCSetUp_PBJacobi_Host(pc));
28912facf1bSJunchao Zhang   }
2903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29112facf1bSJunchao Zhang }
29212facf1bSJunchao Zhang 
29312facf1bSJunchao Zhang PetscErrorCode PCDestroy_PBJacobi(PC pc)
294d71ae5a4SJacob Faibussowitsch {
2954b9ad928SBarry Smith   PetscFunctionBegin;
2964b9ad928SBarry Smith   /*
2974b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
2984b9ad928SBarry Smith   */
2999566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
3003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3014b9ad928SBarry Smith }
302fa939db7SJed Brown 
303d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
304d71ae5a4SJacob Faibussowitsch {
305fa939db7SJed Brown   PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
306fa939db7SJed Brown   PetscBool    iascii;
307fa939db7SJed Brown 
308fa939db7SJed Brown   PetscFunctionBegin;
3099566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
31048a46eb9SPierre Jolivet   if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, "  point-block size %" PetscInt_FMT "\n", jac->bs));
3113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
312fa939db7SJed Brown }
313fa939db7SJed Brown 
31437a17b4dSBarry Smith /*MC
315422a814eSBarry Smith      PCPBJACOBI - Point block Jacobi preconditioner
316422a814eSBarry Smith 
31795452b02SPatrick Sanan    Notes:
318f1580f4eSBarry Smith     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
319422a814eSBarry Smith 
320f1580f4eSBarry Smith    This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
321422a814eSBarry Smith 
322422a814eSBarry Smith    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
323422a814eSBarry Smith    is detected a PETSc error is generated.
324422a814eSBarry Smith 
32595452b02SPatrick Sanan    Developer Notes:
326f1580f4eSBarry Smith      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
327422a814eSBarry Smith      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
32848773899SPierre Jolivet      terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
329422a814eSBarry Smith      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
330422a814eSBarry Smith 
33190dfcc32SBarry Smith      Perhaps should provide an option that allows generation of a valid preconditioner
332f1580f4eSBarry Smith      even if a block is singular as the `PCJACOBI` does.
33337a17b4dSBarry Smith 
33437a17b4dSBarry Smith    Level: beginner
33537a17b4dSBarry Smith 
336*562efe2eSBarry Smith .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
33737a17b4dSBarry Smith M*/
33837a17b4dSBarry Smith 
339d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
340d71ae5a4SJacob Faibussowitsch {
3414b9ad928SBarry Smith   PC_PBJacobi *jac;
3424b9ad928SBarry Smith 
3434b9ad928SBarry Smith   PetscFunctionBegin;
3444b9ad928SBarry Smith   /*
3454b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
3464b9ad928SBarry Smith      attach it to the PC object.
3474b9ad928SBarry Smith   */
3484dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&jac));
3494b9ad928SBarry Smith   pc->data = (void *)jac;
3504b9ad928SBarry Smith 
3514b9ad928SBarry Smith   /*
3524b9ad928SBarry Smith      Initialize the pointers to vectors to ZERO; these will be used to store
3534b9ad928SBarry Smith      diagonal entries of the matrix for fast preconditioner application.
3544b9ad928SBarry Smith   */
3550a545947SLisandro Dalcin   jac->diag = NULL;
3564b9ad928SBarry Smith 
3574b9ad928SBarry Smith   /*
3584b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
3594b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
3604b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
3614b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
3624b9ad928SBarry Smith       not needed.
3634b9ad928SBarry Smith   */
36412facf1bSJunchao Zhang   pc->ops->apply               = PCApply_PBJacobi;
36512facf1bSJunchao Zhang   pc->ops->applytranspose      = PCApplyTranspose_PBJacobi;
3664b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_PBJacobi;
3674b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_PBJacobi;
3680a545947SLisandro Dalcin   pc->ops->setfromoptions      = NULL;
369fa939db7SJed Brown   pc->ops->view                = PCView_PBJacobi;
3700a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
3710a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
3720a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
3733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3744b9ad928SBarry Smith }
375