xref: /petsc/src/ksp/pc/impls/pbjacobi/pbjacobi.c (revision 3fc23ce8277e00a65c4b1be6e2b4535b56ba7978)
1 
2 /*
3    Include files needed for the PBJacobi preconditioner:
4      pcimpl.h - private include file intended for use by all preconditioners
5 */
6 
7 #include <petsc/private/pcimpl.h>   /*I "petscpc.h" I*/
8 
9 /*
10    Private context (data structure) for the PBJacobi preconditioner.
11 */
12 typedef struct {
13   const MatScalar *diag;
14   PetscInt        bs,mbs;
15 } PC_PBJacobi;
16 
17 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y)
18 {
19   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
20   PetscInt          i,m = jac->mbs;
21   const MatScalar   *diag = jac->diag;
22   const PetscScalar *xx;
23   PetscScalar       *yy;
24 
25   PetscFunctionBegin;
26   PetscCall(VecGetArrayRead(x,&xx));
27   PetscCall(VecGetArray(y,&yy));
28   for (i=0; i<m; i++) yy[i] = diag[i]*xx[i];
29   PetscCall(VecRestoreArrayRead(x,&xx));
30   PetscCall(VecRestoreArray(y,&yy));
31   PetscCall(PetscLogFlops(m));
32   PetscFunctionReturn(0);
33 }
34 
35 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
36 {
37   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
38   PetscInt        i,m = jac->mbs;
39   const MatScalar *diag = jac->diag;
40   PetscScalar     x0,x1,*yy;
41   const PetscScalar *xx;
42 
43   PetscFunctionBegin;
44   PetscCall(VecGetArrayRead(x,&xx));
45   PetscCall(VecGetArray(y,&yy));
46   for (i=0; i<m; i++) {
47     x0        = xx[2*i]; x1 = xx[2*i+1];
48     yy[2*i]   = diag[0]*x0 + diag[2]*x1;
49     yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
50     diag     += 4;
51   }
52   PetscCall(VecRestoreArrayRead(x,&xx));
53   PetscCall(VecRestoreArray(y,&yy));
54   PetscCall(PetscLogFlops(6.0*m));
55   PetscFunctionReturn(0);
56 }
57 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
58 {
59   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
60   PetscInt        i,m = jac->mbs;
61   const MatScalar *diag = jac->diag;
62   PetscScalar     x0,x1,x2,*yy;
63   const PetscScalar *xx;
64 
65   PetscFunctionBegin;
66   PetscCall(VecGetArrayRead(x,&xx));
67   PetscCall(VecGetArray(y,&yy));
68   for (i=0; i<m; i++) {
69     x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];
70 
71     yy[3*i]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
72     yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
73     yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
74     diag     += 9;
75   }
76   PetscCall(VecRestoreArrayRead(x,&xx));
77   PetscCall(VecRestoreArray(y,&yy));
78   PetscCall(PetscLogFlops(15.0*m));
79   PetscFunctionReturn(0);
80 }
81 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
82 {
83   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
84   PetscInt        i,m = jac->mbs;
85   const MatScalar *diag = jac->diag;
86   PetscScalar     x0,x1,x2,x3,*yy;
87   const PetscScalar *xx;
88 
89   PetscFunctionBegin;
90   PetscCall(VecGetArrayRead(x,&xx));
91   PetscCall(VecGetArray(y,&yy));
92   for (i=0; i<m; i++) {
93     x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];
94 
95     yy[4*i]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
96     yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
97     yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
98     yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
99     diag     += 16;
100   }
101   PetscCall(VecRestoreArrayRead(x,&xx));
102   PetscCall(VecRestoreArray(y,&yy));
103   PetscCall(PetscLogFlops(28.0*m));
104   PetscFunctionReturn(0);
105 }
106 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
107 {
108   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
109   PetscInt        i,m = jac->mbs;
110   const MatScalar *diag = jac->diag;
111   PetscScalar     x0,x1,x2,x3,x4,*yy;
112   const PetscScalar *xx;
113 
114   PetscFunctionBegin;
115   PetscCall(VecGetArrayRead(x,&xx));
116   PetscCall(VecGetArray(y,&yy));
117   for (i=0; i<m; i++) {
118     x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];
119 
120     yy[5*i]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
121     yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
122     yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
123     yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
124     yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
125     diag     += 25;
126   }
127   PetscCall(VecRestoreArrayRead(x,&xx));
128   PetscCall(VecRestoreArray(y,&yy));
129   PetscCall(PetscLogFlops(45.0*m));
130   PetscFunctionReturn(0);
131 }
132 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y)
133 {
134   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
135   PetscInt        i,m = jac->mbs;
136   const MatScalar *diag = jac->diag;
137   PetscScalar     x0,x1,x2,x3,x4,x5,*yy;
138   const PetscScalar *xx;
139 
140   PetscFunctionBegin;
141   PetscCall(VecGetArrayRead(x,&xx));
142   PetscCall(VecGetArray(y,&yy));
143   for (i=0; i<m; i++) {
144     x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5];
145 
146     yy[6*i]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
147     yy[6*i+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
148     yy[6*i+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
149     yy[6*i+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
150     yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
151     yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
152     diag     += 36;
153   }
154   PetscCall(VecRestoreArrayRead(x,&xx));
155   PetscCall(VecRestoreArray(y,&yy));
156   PetscCall(PetscLogFlops(66.0*m));
157   PetscFunctionReturn(0);
158 }
159 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y)
160 {
161   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
162   PetscInt        i,m = jac->mbs;
163   const MatScalar *diag = jac->diag;
164   PetscScalar     x0,x1,x2,x3,x4,x5,x6,*yy;
165   const PetscScalar *xx;
166 
167   PetscFunctionBegin;
168   PetscCall(VecGetArrayRead(x,&xx));
169   PetscCall(VecGetArray(y,&yy));
170   for (i=0; i<m; i++) {
171     x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6];
172 
173     yy[7*i]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
174     yy[7*i+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
175     yy[7*i+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
176     yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
177     yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
178     yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
179     yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
180     diag     += 49;
181   }
182   PetscCall(VecRestoreArrayRead(x,&xx));
183   PetscCall(VecRestoreArray(y,&yy));
184   PetscCall(PetscLogFlops(91.0*m)); /* 2*bs2 - bs */
185   PetscFunctionReturn(0);
186 }
187 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y)
188 {
189   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
190   PetscInt          i,ib,jb;
191   const PetscInt    m = jac->mbs;
192   const PetscInt    bs = jac->bs;
193   const MatScalar   *diag = jac->diag;
194   PetscScalar       *yy;
195   const PetscScalar *xx;
196 
197   PetscFunctionBegin;
198   PetscCall(VecGetArrayRead(x,&xx));
199   PetscCall(VecGetArray(y,&yy));
200   for (i=0; i<m; i++) {
201     for (ib=0; ib<bs; ib++) {
202       PetscScalar rowsum = 0;
203       for (jb=0; jb<bs; jb++) {
204         rowsum += diag[ib+jb*bs] * xx[bs*i+jb];
205       }
206       yy[bs*i+ib] = rowsum;
207     }
208     diag += bs*bs;
209   }
210   PetscCall(VecRestoreArrayRead(x,&xx));
211   PetscCall(VecRestoreArray(y,&yy));
212   PetscCall(PetscLogFlops((2.0*bs*bs-bs)*m)); /* 2*bs2 - bs */
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y)
217 {
218   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
219   PetscInt          i,j,k,m = jac->mbs,bs=jac->bs;
220   const MatScalar   *diag = jac->diag;
221   const PetscScalar *xx;
222   PetscScalar       *yy;
223 
224   PetscFunctionBegin;
225   PetscCall(VecGetArrayRead(x,&xx));
226   PetscCall(VecGetArray(y,&yy));
227   for (i=0; i<m; i++) {
228     for (j=0; j<bs; j++) yy[i*bs+j] = 0.;
229     for (j=0; j<bs; j++) {
230       for (k=0; k<bs; k++) {
231         yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j];
232       }
233     }
234     diag += bs*bs;
235   }
236   PetscCall(VecRestoreArrayRead(x,&xx));
237   PetscCall(VecRestoreArray(y,&yy));
238   PetscCall(PetscLogFlops(m*bs*(2*bs-1)));
239   PetscFunctionReturn(0);
240 }
241 
242 /* -------------------------------------------------------------------------- */
243 static PetscErrorCode PCSetUp_PBJacobi(PC pc)
244 {
245   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
246   Mat            A = pc->pmat;
247   MatFactorError err;
248   PetscInt       nlocal;
249 
250   PetscFunctionBegin;
251   PetscCall(MatInvertBlockDiagonal(A,&jac->diag));
252   PetscCall(MatFactorGetError(A,&err));
253   if (err) pc->failedreason = (PCFailedReason)err;
254 
255   PetscCall(MatGetBlockSize(A,&jac->bs));
256   PetscCall(MatGetLocalSize(A,&nlocal,NULL));
257   jac->mbs = nlocal/jac->bs;
258   switch (jac->bs) {
259   case 1:
260     pc->ops->apply = PCApply_PBJacobi_1;
261     break;
262   case 2:
263     pc->ops->apply = PCApply_PBJacobi_2;
264     break;
265   case 3:
266     pc->ops->apply = PCApply_PBJacobi_3;
267     break;
268   case 4:
269     pc->ops->apply = PCApply_PBJacobi_4;
270     break;
271   case 5:
272     pc->ops->apply = PCApply_PBJacobi_5;
273     break;
274   case 6:
275     pc->ops->apply = PCApply_PBJacobi_6;
276     break;
277   case 7:
278     pc->ops->apply = PCApply_PBJacobi_7;
279     break;
280   default:
281     pc->ops->apply = PCApply_PBJacobi_N;
282     break;
283   }
284   pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
285   PetscFunctionReturn(0);
286 }
287 /* -------------------------------------------------------------------------- */
288 static PetscErrorCode PCDestroy_PBJacobi(PC pc)
289 {
290   PetscFunctionBegin;
291   /*
292       Free the private data structure that was hanging off the PC
293   */
294   PetscCall(PetscFree(pc->data));
295   PetscFunctionReturn(0);
296 }
297 
298 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer)
299 {
300   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
301   PetscBool      iascii;
302 
303   PetscFunctionBegin;
304   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
305   if (iascii) {
306     PetscCall(PetscViewerASCIIPrintf(viewer,"  point-block size %" PetscInt_FMT "\n",jac->bs));
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 /* -------------------------------------------------------------------------- */
312 /*MC
313      PCPBJACOBI - Point block Jacobi preconditioner
314 
315    Notes:
316      See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCBJACOBI for large blocks
317 
318      This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix
319 
320      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
321      is detected a PETSc error is generated.
322 
323    Developer Notes:
324      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
325      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
326      terminating KSP with a KSP_DIVERGED_NANORIF allowing
327      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
328 
329      Perhaps should provide an option that allows generation of a valid preconditioner
330      even if a block is singular as the PCJACOBI does.
331 
332    Level: beginner
333 
334 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCVPBJACOBI, PCBJACOBI
335 
336 M*/
337 
338 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
339 {
340   PC_PBJacobi    *jac;
341 
342   PetscFunctionBegin;
343   /*
344      Creates the private data structure for this preconditioner and
345      attach it to the PC object.
346   */
347   PetscCall(PetscNewLog(pc,&jac));
348   pc->data = (void*)jac;
349 
350   /*
351      Initialize the pointers to vectors to ZERO; these will be used to store
352      diagonal entries of the matrix for fast preconditioner application.
353   */
354   jac->diag = NULL;
355 
356   /*
357       Set the pointers for the functions that are provided above.
358       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
359       are called, they will automatically call these functions.  Note we
360       choose not to provide a couple of these functions since they are
361       not needed.
362   */
363   pc->ops->apply               = NULL; /*set depending on the block size */
364   pc->ops->applytranspose      = NULL;
365   pc->ops->setup               = PCSetUp_PBJacobi;
366   pc->ops->destroy             = PCDestroy_PBJacobi;
367   pc->ops->setfromoptions      = NULL;
368   pc->ops->view                = PCView_PBJacobi;
369   pc->ops->applyrichardson     = NULL;
370   pc->ops->applysymmetricleft  = NULL;
371   pc->ops->applysymmetricright = NULL;
372   PetscFunctionReturn(0);
373 }
374