xref: /petsc/src/ksp/pc/impls/vpbjacobi/vpbjacobi.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
2 
3 static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) {
4   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
5   PetscInt           i, ncnt = 0;
6   const MatScalar   *diag = jac->diag;
7   PetscInt           ib, jb, bs;
8   const PetscScalar *xx;
9   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
10   PetscInt           nblocks;
11   const PetscInt    *bsizes;
12 
13   PetscFunctionBegin;
14   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
15   PetscCall(VecGetArrayRead(x, &xx));
16   PetscCall(VecGetArray(y, &yy));
17   for (i = 0; i < nblocks; i++) {
18     bs = bsizes[i];
19     switch (bs) {
20     case 1: yy[ncnt] = *diag * xx[ncnt]; break;
21     case 2:
22       x0           = xx[ncnt];
23       x1           = xx[ncnt + 1];
24       yy[ncnt]     = diag[0] * x0 + diag[2] * x1;
25       yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
26       break;
27     case 3:
28       x0           = xx[ncnt];
29       x1           = xx[ncnt + 1];
30       x2           = xx[ncnt + 2];
31       yy[ncnt]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
32       yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
33       yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
34       break;
35     case 4:
36       x0           = xx[ncnt];
37       x1           = xx[ncnt + 1];
38       x2           = xx[ncnt + 2];
39       x3           = xx[ncnt + 3];
40       yy[ncnt]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
41       yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
42       yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
43       yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
44       break;
45     case 5:
46       x0           = xx[ncnt];
47       x1           = xx[ncnt + 1];
48       x2           = xx[ncnt + 2];
49       x3           = xx[ncnt + 3];
50       x4           = xx[ncnt + 4];
51       yy[ncnt]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
52       yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
53       yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
54       yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
55       yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
56       break;
57     case 6:
58       x0           = xx[ncnt];
59       x1           = xx[ncnt + 1];
60       x2           = xx[ncnt + 2];
61       x3           = xx[ncnt + 3];
62       x4           = xx[ncnt + 4];
63       x5           = xx[ncnt + 5];
64       yy[ncnt]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
65       yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
66       yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
67       yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
68       yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
69       yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
70       break;
71     case 7:
72       x0           = xx[ncnt];
73       x1           = xx[ncnt + 1];
74       x2           = xx[ncnt + 2];
75       x3           = xx[ncnt + 3];
76       x4           = xx[ncnt + 4];
77       x5           = xx[ncnt + 5];
78       x6           = xx[ncnt + 6];
79       yy[ncnt]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
80       yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
81       yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
82       yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
83       yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
84       yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
85       yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
86       break;
87     default:
88       for (ib = 0; ib < bs; ib++) {
89         PetscScalar rowsum = 0;
90         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
91         yy[ncnt + ib] = rowsum;
92       }
93     }
94     ncnt += bsizes[i];
95     diag += bsizes[i] * bsizes[i];
96   }
97   PetscCall(VecRestoreArrayRead(x, &xx));
98   PetscCall(VecRestoreArray(y, &yy));
99   PetscFunctionReturn(0);
100 }
101 
102 PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) {
103   PC_VPBJacobi   *jac = (PC_VPBJacobi *)pc->data;
104   Mat             A   = pc->pmat;
105   MatFactorError  err;
106   PetscInt        i, nsize = 0, nlocal;
107   PetscInt        nblocks;
108   const PetscInt *bsizes;
109 
110   PetscFunctionBegin;
111   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
112   PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL));
113   PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
114   if (!jac->diag) {
115     for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i];
116     PetscCall(PetscMalloc1(nsize, &jac->diag));
117   }
118   PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag));
119   PetscCall(MatFactorGetError(A, &err));
120   if (err) pc->failedreason = (PCFailedReason)err;
121   pc->ops->apply = PCApply_VPBJacobi;
122   PetscFunctionReturn(0);
123 }
124 
125 static PetscErrorCode PCSetUp_VPBJacobi(PC pc) {
126   PetscFunctionBegin;
127   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
128 #if defined(PETSC_HAVE_CUDA)
129   PetscBool isCuda;
130   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
131 #endif
132 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
133   PetscBool isKok;
134   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
135 #endif
136 
137 #if defined(PETSC_HAVE_CUDA)
138   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc));
139   else
140 #endif
141 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
142     if (isKok)
143     PetscCall(PCSetUp_VPBJacobi_Kokkos(pc));
144   else
145 #endif
146   {
147     PetscCall(PCSetUp_VPBJacobi_Host(pc));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) {
153   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
154 
155   PetscFunctionBegin;
156   /*
157       Free the private data structure that was hanging off the PC
158   */
159   PetscCall(PetscFree(jac->diag));
160   PetscCall(PetscFree(pc->data));
161   PetscFunctionReturn(0);
162 }
163 
164 /*MC
165      PCVPBJACOBI - Variable size point block Jacobi preconditioner
166 
167    Level: beginner
168 
169    Notes:
170      See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks
171 
172      This works for `MATAIJ` matrices
173 
174      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
175      is detected a PETSc error is generated.
176 
177      One must call `MatSetVariableBlockSizes()` to use this preconditioner
178 
179    Developer Notes:
180      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
181      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
182      terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing
183      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
184 
185      Perhaps should provide an option that allows generation of a valid preconditioner
186      even if a block is singular as the `PCJACOBI` does.
187 
188 .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
189 M*/
190 
191 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) {
192   PC_VPBJacobi *jac;
193 
194   PetscFunctionBegin;
195   /*
196      Creates the private data structure for this preconditioner and
197      attach it to the PC object.
198   */
199   PetscCall(PetscNew(&jac));
200   pc->data = (void *)jac;
201 
202   /*
203      Initialize the pointers to vectors to ZERO; these will be used to store
204      diagonal entries of the matrix for fast preconditioner application.
205   */
206   jac->diag = NULL;
207 
208   /*
209       Set the pointers for the functions that are provided above.
210       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
211       are called, they will automatically call these functions.  Note we
212       choose not to provide a couple of these functions since they are
213       not needed.
214   */
215   pc->ops->apply               = PCApply_VPBJacobi;
216   pc->ops->applytranspose      = NULL;
217   pc->ops->setup               = PCSetUp_VPBJacobi;
218   pc->ops->destroy             = PCDestroy_VPBJacobi;
219   pc->ops->setfromoptions      = NULL;
220   pc->ops->applyrichardson     = NULL;
221   pc->ops->applysymmetricleft  = NULL;
222   pc->ops->applysymmetricright = NULL;
223   PetscFunctionReturn(0);
224 }
225