xref: /petsc/src/ksp/pc/impls/vpbjacobi/vpbjacobi.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
2 
3 static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) {
4   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
5   PetscInt           i, ncnt = 0;
6   const MatScalar   *diag = jac->diag;
7   PetscInt           ib, jb, bs;
8   const PetscScalar *xx;
9   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
10   PetscInt           nblocks;
11   const PetscInt    *bsizes;
12 
13   PetscFunctionBegin;
14   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
15   PetscCall(VecGetArrayRead(x, &xx));
16   PetscCall(VecGetArray(y, &yy));
17   for (i = 0; i < nblocks; i++) {
18     bs = bsizes[i];
19     switch (bs) {
20     case 1: yy[ncnt] = *diag * xx[ncnt]; break;
21     case 2:
22       x0           = xx[ncnt];
23       x1           = xx[ncnt + 1];
24       yy[ncnt]     = diag[0] * x0 + diag[2] * x1;
25       yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
26       break;
27     case 3:
28       x0           = xx[ncnt];
29       x1           = xx[ncnt + 1];
30       x2           = xx[ncnt + 2];
31       yy[ncnt]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
32       yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
33       yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
34       break;
35     case 4:
36       x0           = xx[ncnt];
37       x1           = xx[ncnt + 1];
38       x2           = xx[ncnt + 2];
39       x3           = xx[ncnt + 3];
40       yy[ncnt]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
41       yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
42       yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
43       yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
44       break;
45     case 5:
46       x0           = xx[ncnt];
47       x1           = xx[ncnt + 1];
48       x2           = xx[ncnt + 2];
49       x3           = xx[ncnt + 3];
50       x4           = xx[ncnt + 4];
51       yy[ncnt]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
52       yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
53       yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
54       yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
55       yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
56       break;
57     case 6:
58       x0           = xx[ncnt];
59       x1           = xx[ncnt + 1];
60       x2           = xx[ncnt + 2];
61       x3           = xx[ncnt + 3];
62       x4           = xx[ncnt + 4];
63       x5           = xx[ncnt + 5];
64       yy[ncnt]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
65       yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
66       yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
67       yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
68       yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
69       yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
70       break;
71     case 7:
72       x0           = xx[ncnt];
73       x1           = xx[ncnt + 1];
74       x2           = xx[ncnt + 2];
75       x3           = xx[ncnt + 3];
76       x4           = xx[ncnt + 4];
77       x5           = xx[ncnt + 5];
78       x6           = xx[ncnt + 6];
79       yy[ncnt]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
80       yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
81       yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
82       yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
83       yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
84       yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
85       yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
86       break;
87     default:
88       for (ib = 0; ib < bs; ib++) {
89         PetscScalar rowsum = 0;
90         for (jb = 0; jb < bs; jb++) { rowsum += diag[ib + jb * bs] * xx[ncnt + jb]; }
91         yy[ncnt + ib] = rowsum;
92       }
93     }
94     ncnt += bsizes[i];
95     diag += bsizes[i] * bsizes[i];
96   }
97   PetscCall(VecRestoreArrayRead(x, &xx));
98   PetscCall(VecRestoreArray(y, &yy));
99   PetscFunctionReturn(0);
100 }
101 
102 PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) {
103   PC_VPBJacobi   *jac = (PC_VPBJacobi *)pc->data;
104   Mat             A   = pc->pmat;
105   MatFactorError  err;
106   PetscInt        i, nsize = 0, nlocal;
107   PetscInt        nblocks;
108   const PetscInt *bsizes;
109 
110   PetscFunctionBegin;
111   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
112   PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL));
113   PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
114   if (!jac->diag) {
115     for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i];
116     PetscCall(PetscMalloc1(nsize, &jac->diag));
117   }
118   PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag));
119   PetscCall(MatFactorGetError(A, &err));
120   if (err) pc->failedreason = (PCFailedReason)err;
121   pc->ops->apply = PCApply_VPBJacobi;
122   PetscFunctionReturn(0);
123 }
124 
125 static PetscErrorCode PCSetUp_VPBJacobi(PC pc) {
126   PetscFunctionBegin;
127   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
128 #if defined(PETSC_HAVE_CUDA)
129   PetscBool isCuda;
130   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
131 #endif
132 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
133   PetscBool isKok;
134   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
135 #endif
136 
137 #if defined(PETSC_HAVE_CUDA)
138   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc));
139   else
140 #endif
141 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
142     if (isKok)
143     PetscCall(PCSetUp_VPBJacobi_Kokkos(pc));
144   else
145 #endif
146   {
147     PetscCall(PCSetUp_VPBJacobi_Host(pc));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) {
153   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
154 
155   PetscFunctionBegin;
156   /*
157       Free the private data structure that was hanging off the PC
158   */
159   PetscCall(PetscFree(jac->diag));
160   PetscCall(PetscFree(pc->data));
161   PetscFunctionReturn(0);
162 }
163 
164 /*MC
165      PCVPBJACOBI - Variable size point block Jacobi preconditioner
166 
167    Notes:
168      See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks
169 
170      This works for AIJ matrices
171 
172      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
173      is detected a PETSc error is generated.
174 
175      One must call MatSetVariableBlockSizes() to use this preconditioner
176 
177    Developer Notes:
178      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
179      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
180      terminating KSP with a KSP_DIVERGED_NANORIF allowing
181      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
182 
183      Perhaps should provide an option that allows generation of a valid preconditioner
184      even if a block is singular as the PCJACOBI does.
185 
186    Level: beginner
187 
188 .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
189 
190 M*/
191 
192 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) {
193   PC_VPBJacobi *jac;
194 
195   PetscFunctionBegin;
196   /*
197      Creates the private data structure for this preconditioner and
198      attach it to the PC object.
199   */
200   PetscCall(PetscNewLog(pc, &jac));
201   pc->data = (void *)jac;
202 
203   /*
204      Initialize the pointers to vectors to ZERO; these will be used to store
205      diagonal entries of the matrix for fast preconditioner application.
206   */
207   jac->diag = NULL;
208 
209   /*
210       Set the pointers for the functions that are provided above.
211       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
212       are called, they will automatically call these functions.  Note we
213       choose not to provide a couple of these functions since they are
214       not needed.
215   */
216   pc->ops->apply               = PCApply_VPBJacobi;
217   pc->ops->applytranspose      = NULL;
218   pc->ops->setup               = PCSetUp_VPBJacobi;
219   pc->ops->destroy             = PCDestroy_VPBJacobi;
220   pc->ops->setfromoptions      = NULL;
221   pc->ops->applyrichardson     = NULL;
222   pc->ops->applysymmetricleft  = NULL;
223   pc->ops->applysymmetricright = NULL;
224   PetscFunctionReturn(0);
225 }
226