xref: /petsc/src/ksp/pc/impls/pbjacobi/pbjacobi.c (revision dc1da017a8339fb6aa6e09b86af2a5ecc718e906)
1 
2 /*
3    Include files needed for the PBJacobi preconditioner:
4      pcimpl.h - private include file intended for use by all preconditioners
5 */
6 
7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
8 
9 /*
10    Private context (data structure) for the PBJacobi preconditioner.
11 */
12 typedef struct {
13   const MatScalar *diag;
14   PetscInt         bs, mbs;
15 } PC_PBJacobi;
16 
17 static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y)
18 {
19   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
20   PetscInt           i, ib, jb;
21   const PetscInt     m    = jac->mbs;
22   const PetscInt     bs   = jac->bs;
23   const MatScalar   *diag = jac->diag;
24   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
25   const PetscScalar *xx;
26 
27   PetscFunctionBegin;
28   PetscCall(VecGetArrayRead(x, &xx));
29   PetscCall(VecGetArray(y, &yy));
30   switch (bs) {
31   case 1:
32     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
33     break;
34   case 2:
35     for (i = 0; i < m; i++) {
36       x0            = xx[2 * i];
37       x1            = xx[2 * i + 1];
38       yy[2 * i]     = diag[0] * x0 + diag[2] * x1;
39       yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
40       diag += 4;
41     }
42     break;
43   case 3:
44     for (i = 0; i < m; i++) {
45       x0 = xx[3 * i];
46       x1 = xx[3 * i + 1];
47       x2 = xx[3 * i + 2];
48 
49       yy[3 * i]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
50       yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
51       yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
52       diag += 9;
53     }
54     break;
55   case 4:
56     for (i = 0; i < m; i++) {
57       x0 = xx[4 * i];
58       x1 = xx[4 * i + 1];
59       x2 = xx[4 * i + 2];
60       x3 = xx[4 * i + 3];
61 
62       yy[4 * i]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
63       yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
64       yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
65       yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
66       diag += 16;
67     }
68     break;
69   case 5:
70     for (i = 0; i < m; i++) {
71       x0 = xx[5 * i];
72       x1 = xx[5 * i + 1];
73       x2 = xx[5 * i + 2];
74       x3 = xx[5 * i + 3];
75       x4 = xx[5 * i + 4];
76 
77       yy[5 * i]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
78       yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
79       yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
80       yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
81       yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
82       diag += 25;
83     }
84     break;
85   case 6:
86     for (i = 0; i < m; i++) {
87       x0 = xx[6 * i];
88       x1 = xx[6 * i + 1];
89       x2 = xx[6 * i + 2];
90       x3 = xx[6 * i + 3];
91       x4 = xx[6 * i + 4];
92       x5 = xx[6 * i + 5];
93 
94       yy[6 * i]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
95       yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
96       yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
97       yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
98       yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
99       yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
100       diag += 36;
101     }
102     break;
103   case 7:
104     for (i = 0; i < m; i++) {
105       x0 = xx[7 * i];
106       x1 = xx[7 * i + 1];
107       x2 = xx[7 * i + 2];
108       x3 = xx[7 * i + 3];
109       x4 = xx[7 * i + 4];
110       x5 = xx[7 * i + 5];
111       x6 = xx[7 * i + 6];
112 
113       yy[7 * i]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
114       yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
115       yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
116       yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
117       yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
118       yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
119       yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
120       diag += 49;
121     }
122     break;
123   default:
124     for (i = 0; i < m; i++) {
125       for (ib = 0; ib < bs; ib++) {
126         PetscScalar rowsum = 0;
127         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
128         yy[bs * i + ib] = rowsum;
129       }
130       diag += bs * bs;
131     }
132   }
133   PetscCall(VecRestoreArrayRead(x, &xx));
134   PetscCall(VecRestoreArray(y, &yy));
135   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */
136   PetscFunctionReturn(0);
137 }
138 
139 static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y)
140 {
141   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
142   PetscInt           i, ib, jb;
143   const PetscInt     m    = jac->mbs;
144   const PetscInt     bs   = jac->bs;
145   const MatScalar   *diag = jac->diag;
146   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
147   const PetscScalar *xx;
148 
149   PetscFunctionBegin;
150   PetscCall(VecGetArrayRead(x, &xx));
151   PetscCall(VecGetArray(y, &yy));
152   switch (bs) {
153   case 1:
154     for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
155     break;
156   case 2:
157     for (i = 0; i < m; i++) {
158       x0            = xx[2 * i];
159       x1            = xx[2 * i + 1];
160       yy[2 * i]     = diag[0] * x0 + diag[1] * x1;
161       yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1;
162       diag += 4;
163     }
164     break;
165   case 3:
166     for (i = 0; i < m; i++) {
167       x0 = xx[3 * i];
168       x1 = xx[3 * i + 1];
169       x2 = xx[3 * i + 2];
170 
171       yy[3 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
172       yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
173       yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
174       diag += 9;
175     }
176     break;
177   case 4:
178     for (i = 0; i < m; i++) {
179       x0 = xx[4 * i];
180       x1 = xx[4 * i + 1];
181       x2 = xx[4 * i + 2];
182       x3 = xx[4 * i + 3];
183 
184       yy[4 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
185       yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
186       yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
187       yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
188       diag += 16;
189     }
190     break;
191   case 5:
192     for (i = 0; i < m; i++) {
193       x0 = xx[5 * i];
194       x1 = xx[5 * i + 1];
195       x2 = xx[5 * i + 2];
196       x3 = xx[5 * i + 3];
197       x4 = xx[5 * i + 4];
198 
199       yy[5 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
200       yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
201       yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
202       yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
203       yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
204       diag += 25;
205     }
206     break;
207   case 6:
208     for (i = 0; i < m; i++) {
209       x0 = xx[6 * i];
210       x1 = xx[6 * i + 1];
211       x2 = xx[6 * i + 2];
212       x3 = xx[6 * i + 3];
213       x4 = xx[6 * i + 4];
214       x5 = xx[6 * i + 5];
215 
216       yy[6 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
217       yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
218       yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
219       yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
220       yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
221       yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
222       diag += 36;
223     }
224     break;
225   case 7:
226     for (i = 0; i < m; i++) {
227       x0 = xx[7 * i];
228       x1 = xx[7 * i + 1];
229       x2 = xx[7 * i + 2];
230       x3 = xx[7 * i + 3];
231       x4 = xx[7 * i + 4];
232       x5 = xx[7 * i + 5];
233       x6 = xx[7 * i + 6];
234 
235       yy[7 * i]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
236       yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
237       yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
238       yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
239       yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
240       yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
241       yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
242       diag += 49;
243     }
244     break;
245   default:
246     for (i = 0; i < m; i++) {
247       for (ib = 0; ib < bs; ib++) {
248         PetscScalar rowsum = 0;
249         for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb];
250         yy[bs * i + ib] = rowsum;
251       }
252       diag += bs * bs;
253     }
254   }
255   PetscCall(VecRestoreArrayRead(x, &xx));
256   PetscCall(VecRestoreArray(y, &yy));
257   PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m));
258   PetscFunctionReturn(0);
259 }
260 
261 static PetscErrorCode PCSetUp_PBJacobi(PC pc)
262 {
263   PC_PBJacobi   *jac = (PC_PBJacobi *)pc->data;
264   Mat            A   = pc->pmat;
265   MatFactorError err;
266   PetscInt       nlocal;
267 
268   PetscFunctionBegin;
269   PetscCall(MatInvertBlockDiagonal(A, &jac->diag));
270   PetscCall(MatFactorGetError(A, &err));
271   if (err) pc->failedreason = (PCFailedReason)err;
272 
273   PetscCall(MatGetBlockSize(A, &jac->bs));
274   PetscCall(MatGetLocalSize(A, &nlocal, NULL));
275   jac->mbs                = nlocal / jac->bs;
276   pc->ops->apply          = PCApply_PBJacobi;
277   pc->ops->applytranspose = PCApplyTranspose_PBJacobi;
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode PCDestroy_PBJacobi(PC pc)
282 {
283   PetscFunctionBegin;
284   /*
285       Free the private data structure that was hanging off the PC
286   */
287   PetscCall(PetscFree(pc->data));
288   PetscFunctionReturn(0);
289 }
290 
291 static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
292 {
293   PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
294   PetscBool    iascii;
295 
296   PetscFunctionBegin;
297   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
298   if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, "  point-block size %" PetscInt_FMT "\n", jac->bs));
299   PetscFunctionReturn(0);
300 }
301 
302 /*MC
303      PCPBJACOBI - Point block Jacobi preconditioner
304 
305    Notes:
306     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
307 
308    This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
309 
310    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
311    is detected a PETSc error is generated.
312 
313    Developer Notes:
314      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
315      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
316      terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing
317      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
318 
319      Perhaps should provide an option that allows generation of a valid preconditioner
320      even if a block is singular as the `PCJACOBI` does.
321 
322    Level: beginner
323 
324 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
325 M*/
326 
327 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
328 {
329   PC_PBJacobi *jac;
330 
331   PetscFunctionBegin;
332   /*
333      Creates the private data structure for this preconditioner and
334      attach it to the PC object.
335   */
336   PetscCall(PetscNew(&jac));
337   pc->data = (void *)jac;
338 
339   /*
340      Initialize the pointers to vectors to ZERO; these will be used to store
341      diagonal entries of the matrix for fast preconditioner application.
342   */
343   jac->diag = NULL;
344 
345   /*
346       Set the pointers for the functions that are provided above.
347       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
348       are called, they will automatically call these functions.  Note we
349       choose not to provide a couple of these functions since they are
350       not needed.
351   */
352   pc->ops->apply               = NULL; /*set depending on the block size */
353   pc->ops->applytranspose      = NULL;
354   pc->ops->setup               = PCSetUp_PBJacobi;
355   pc->ops->destroy             = PCDestroy_PBJacobi;
356   pc->ops->setfromoptions      = NULL;
357   pc->ops->view                = PCView_PBJacobi;
358   pc->ops->applyrichardson     = NULL;
359   pc->ops->applysymmetricleft  = NULL;
360   pc->ops->applysymmetricright = NULL;
361   PetscFunctionReturn(0);
362 }
363