1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs, mbs; 15 } PC_PBJacobi; 16 17 static PetscErrorCode PCApply_PBJacobi_1(PC pc, Vec x, Vec y) { 18 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 19 PetscInt i, m = jac->mbs; 20 const MatScalar *diag = jac->diag; 21 const PetscScalar *xx; 22 PetscScalar *yy; 23 24 PetscFunctionBegin; 25 PetscCall(VecGetArrayRead(x, &xx)); 26 PetscCall(VecGetArray(y, &yy)); 27 for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 28 PetscCall(VecRestoreArrayRead(x, &xx)); 29 PetscCall(VecRestoreArray(y, &yy)); 30 PetscCall(PetscLogFlops(m)); 31 PetscFunctionReturn(0); 32 } 33 34 static PetscErrorCode PCApply_PBJacobi_2(PC pc, Vec x, Vec y) { 35 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 36 PetscInt i, m = jac->mbs; 37 const MatScalar *diag = jac->diag; 38 PetscScalar x0, x1, *yy; 39 const PetscScalar *xx; 40 41 PetscFunctionBegin; 42 PetscCall(VecGetArrayRead(x, &xx)); 43 PetscCall(VecGetArray(y, &yy)); 44 for (i = 0; i < m; i++) { 45 x0 = xx[2 * i]; 46 x1 = xx[2 * i + 1]; 47 yy[2 * i] = diag[0] * x0 + diag[2] * x1; 48 yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1; 49 diag += 4; 50 } 51 PetscCall(VecRestoreArrayRead(x, &xx)); 52 PetscCall(VecRestoreArray(y, &yy)); 53 PetscCall(PetscLogFlops(6.0 * m)); 54 PetscFunctionReturn(0); 55 } 56 static PetscErrorCode PCApply_PBJacobi_3(PC pc, Vec x, Vec y) { 57 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 58 PetscInt i, m = jac->mbs; 59 const MatScalar *diag = jac->diag; 60 PetscScalar x0, x1, x2, *yy; 61 const PetscScalar *xx; 62 63 PetscFunctionBegin; 64 PetscCall(VecGetArrayRead(x, &xx)); 65 PetscCall(VecGetArray(y, &yy)); 66 for (i = 0; i < m; i++) { 67 x0 = xx[3 * i]; 68 x1 = xx[3 * i + 1]; 69 x2 = xx[3 * i + 2]; 70 71 yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 72 yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 73 yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 74 diag += 9; 75 } 76 PetscCall(VecRestoreArrayRead(x, &xx)); 77 PetscCall(VecRestoreArray(y, &yy)); 78 PetscCall(PetscLogFlops(15.0 * m)); 79 PetscFunctionReturn(0); 80 } 81 static PetscErrorCode PCApply_PBJacobi_4(PC pc, Vec x, Vec y) { 82 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 83 PetscInt i, m = jac->mbs; 84 const MatScalar *diag = jac->diag; 85 PetscScalar x0, x1, x2, x3, *yy; 86 const PetscScalar *xx; 87 88 PetscFunctionBegin; 89 PetscCall(VecGetArrayRead(x, &xx)); 90 PetscCall(VecGetArray(y, &yy)); 91 for (i = 0; i < m; i++) { 92 x0 = xx[4 * i]; 93 x1 = xx[4 * i + 1]; 94 x2 = xx[4 * i + 2]; 95 x3 = xx[4 * i + 3]; 96 97 yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 98 yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 99 yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 100 yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 101 diag += 16; 102 } 103 PetscCall(VecRestoreArrayRead(x, &xx)); 104 PetscCall(VecRestoreArray(y, &yy)); 105 PetscCall(PetscLogFlops(28.0 * m)); 106 PetscFunctionReturn(0); 107 } 108 static PetscErrorCode PCApply_PBJacobi_5(PC pc, Vec x, Vec y) { 109 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 110 PetscInt i, m = jac->mbs; 111 const MatScalar *diag = jac->diag; 112 PetscScalar x0, x1, x2, x3, x4, *yy; 113 const PetscScalar *xx; 114 115 PetscFunctionBegin; 116 PetscCall(VecGetArrayRead(x, &xx)); 117 PetscCall(VecGetArray(y, &yy)); 118 for (i = 0; i < m; i++) { 119 x0 = xx[5 * i]; 120 x1 = xx[5 * i + 1]; 121 x2 = xx[5 * i + 2]; 122 x3 = xx[5 * i + 3]; 123 x4 = xx[5 * i + 4]; 124 125 yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 126 yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 127 yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 128 yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 129 yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 130 diag += 25; 131 } 132 PetscCall(VecRestoreArrayRead(x, &xx)); 133 PetscCall(VecRestoreArray(y, &yy)); 134 PetscCall(PetscLogFlops(45.0 * m)); 135 PetscFunctionReturn(0); 136 } 137 static PetscErrorCode PCApply_PBJacobi_6(PC pc, Vec x, Vec y) { 138 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 139 PetscInt i, m = jac->mbs; 140 const MatScalar *diag = jac->diag; 141 PetscScalar x0, x1, x2, x3, x4, x5, *yy; 142 const PetscScalar *xx; 143 144 PetscFunctionBegin; 145 PetscCall(VecGetArrayRead(x, &xx)); 146 PetscCall(VecGetArray(y, &yy)); 147 for (i = 0; i < m; i++) { 148 x0 = xx[6 * i]; 149 x1 = xx[6 * i + 1]; 150 x2 = xx[6 * i + 2]; 151 x3 = xx[6 * i + 3]; 152 x4 = xx[6 * i + 4]; 153 x5 = xx[6 * i + 5]; 154 155 yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 156 yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 157 yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 158 yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 159 yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 160 yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 161 diag += 36; 162 } 163 PetscCall(VecRestoreArrayRead(x, &xx)); 164 PetscCall(VecRestoreArray(y, &yy)); 165 PetscCall(PetscLogFlops(66.0 * m)); 166 PetscFunctionReturn(0); 167 } 168 static PetscErrorCode PCApply_PBJacobi_7(PC pc, Vec x, Vec y) { 169 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 170 PetscInt i, m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0, x1, x2, x3, x4, x5, x6, *yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 PetscCall(VecGetArrayRead(x, &xx)); 177 PetscCall(VecGetArray(y, &yy)); 178 for (i = 0; i < m; i++) { 179 x0 = xx[7 * i]; 180 x1 = xx[7 * i + 1]; 181 x2 = xx[7 * i + 2]; 182 x3 = xx[7 * i + 3]; 183 x4 = xx[7 * i + 4]; 184 x5 = xx[7 * i + 5]; 185 x6 = xx[7 * i + 6]; 186 187 yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 188 yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 189 yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 190 yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 191 yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 192 yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 193 yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 194 diag += 49; 195 } 196 PetscCall(VecRestoreArrayRead(x, &xx)); 197 PetscCall(VecRestoreArray(y, &yy)); 198 PetscCall(PetscLogFlops(91.0 * m)); /* 2*bs2 - bs */ 199 PetscFunctionReturn(0); 200 } 201 static PetscErrorCode PCApply_PBJacobi_N(PC pc, Vec x, Vec y) { 202 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 203 PetscInt i, ib, jb; 204 const PetscInt m = jac->mbs; 205 const PetscInt bs = jac->bs; 206 const MatScalar *diag = jac->diag; 207 PetscScalar *yy; 208 const PetscScalar *xx; 209 210 PetscFunctionBegin; 211 PetscCall(VecGetArrayRead(x, &xx)); 212 PetscCall(VecGetArray(y, &yy)); 213 for (i = 0; i < m; i++) { 214 for (ib = 0; ib < bs; ib++) { 215 PetscScalar rowsum = 0; 216 for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb]; 217 yy[bs * i + ib] = rowsum; 218 } 219 diag += bs * bs; 220 } 221 PetscCall(VecRestoreArrayRead(x, &xx)); 222 PetscCall(VecRestoreArray(y, &yy)); 223 PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */ 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc, Vec x, Vec y) { 228 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 229 PetscInt i, j, k, m = jac->mbs, bs = jac->bs; 230 const MatScalar *diag = jac->diag; 231 const PetscScalar *xx; 232 PetscScalar *yy; 233 234 PetscFunctionBegin; 235 PetscCall(VecGetArrayRead(x, &xx)); 236 PetscCall(VecGetArray(y, &yy)); 237 for (i = 0; i < m; i++) { 238 for (j = 0; j < bs; j++) yy[i * bs + j] = 0.; 239 for (j = 0; j < bs; j++) { 240 for (k = 0; k < bs; k++) yy[i * bs + k] += diag[k * bs + j] * xx[i * bs + j]; 241 } 242 diag += bs * bs; 243 } 244 PetscCall(VecRestoreArrayRead(x, &xx)); 245 PetscCall(VecRestoreArray(y, &yy)); 246 PetscCall(PetscLogFlops(m * bs * (2 * bs - 1))); 247 PetscFunctionReturn(0); 248 } 249 250 static PetscErrorCode PCSetUp_PBJacobi(PC pc) { 251 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 252 Mat A = pc->pmat; 253 MatFactorError err; 254 PetscInt nlocal; 255 256 PetscFunctionBegin; 257 PetscCall(MatInvertBlockDiagonal(A, &jac->diag)); 258 PetscCall(MatFactorGetError(A, &err)); 259 if (err) pc->failedreason = (PCFailedReason)err; 260 261 PetscCall(MatGetBlockSize(A, &jac->bs)); 262 PetscCall(MatGetLocalSize(A, &nlocal, NULL)); 263 jac->mbs = nlocal / jac->bs; 264 switch (jac->bs) { 265 case 1: pc->ops->apply = PCApply_PBJacobi_1; break; 266 case 2: pc->ops->apply = PCApply_PBJacobi_2; break; 267 case 3: pc->ops->apply = PCApply_PBJacobi_3; break; 268 case 4: pc->ops->apply = PCApply_PBJacobi_4; break; 269 case 5: pc->ops->apply = PCApply_PBJacobi_5; break; 270 case 6: pc->ops->apply = PCApply_PBJacobi_6; break; 271 case 7: pc->ops->apply = PCApply_PBJacobi_7; break; 272 default: pc->ops->apply = PCApply_PBJacobi_N; break; 273 } 274 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 275 PetscFunctionReturn(0); 276 } 277 278 static PetscErrorCode PCDestroy_PBJacobi(PC pc) { 279 PetscFunctionBegin; 280 /* 281 Free the private data structure that was hanging off the PC 282 */ 283 PetscCall(PetscFree(pc->data)); 284 PetscFunctionReturn(0); 285 } 286 287 static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer) { 288 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 289 PetscBool iascii; 290 291 PetscFunctionBegin; 292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 293 if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs)); 294 PetscFunctionReturn(0); 295 } 296 297 /*MC 298 PCPBJACOBI - Point block Jacobi preconditioner 299 300 Notes: 301 See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks 302 303 This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix 304 305 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 306 is detected a PETSc error is generated. 307 308 Developer Notes: 309 This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 310 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 311 terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing 312 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 313 314 Perhaps should provide an option that allows generation of a valid preconditioner 315 even if a block is singular as the `PCJACOBI` does. 316 317 Level: beginner 318 319 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI` 320 M*/ 321 322 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) { 323 PC_PBJacobi *jac; 324 325 PetscFunctionBegin; 326 /* 327 Creates the private data structure for this preconditioner and 328 attach it to the PC object. 329 */ 330 PetscCall(PetscNew(&jac)); 331 pc->data = (void *)jac; 332 333 /* 334 Initialize the pointers to vectors to ZERO; these will be used to store 335 diagonal entries of the matrix for fast preconditioner application. 336 */ 337 jac->diag = NULL; 338 339 /* 340 Set the pointers for the functions that are provided above. 341 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 342 are called, they will automatically call these functions. Note we 343 choose not to provide a couple of these functions since they are 344 not needed. 345 */ 346 pc->ops->apply = NULL; /*set depending on the block size */ 347 pc->ops->applytranspose = NULL; 348 pc->ops->setup = PCSetUp_PBJacobi; 349 pc->ops->destroy = PCDestroy_PBJacobi; 350 pc->ops->setfromoptions = NULL; 351 pc->ops->view = PCView_PBJacobi; 352 pc->ops->applyrichardson = NULL; 353 pc->ops->applysymmetricleft = NULL; 354 pc->ops->applysymmetricright = NULL; 355 PetscFunctionReturn(0); 356 } 357