1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs, mbs; 15 } PC_PBJacobi; 16 17 static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y) 18 { 19 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 20 PetscInt i, ib, jb; 21 const PetscInt m = jac->mbs; 22 const PetscInt bs = jac->bs; 23 const MatScalar *diag = jac->diag; 24 PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 25 const PetscScalar *xx; 26 27 PetscFunctionBegin; 28 PetscCall(VecGetArrayRead(x, &xx)); 29 PetscCall(VecGetArray(y, &yy)); 30 switch (bs) { 31 case 1: 32 for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 33 break; 34 case 2: 35 for (i = 0; i < m; i++) { 36 x0 = xx[2 * i]; 37 x1 = xx[2 * i + 1]; 38 yy[2 * i] = diag[0] * x0 + diag[2] * x1; 39 yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1; 40 diag += 4; 41 } 42 break; 43 case 3: 44 for (i = 0; i < m; i++) { 45 x0 = xx[3 * i]; 46 x1 = xx[3 * i + 1]; 47 x2 = xx[3 * i + 2]; 48 49 yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 50 yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 51 yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 52 diag += 9; 53 } 54 break; 55 case 4: 56 for (i = 0; i < m; i++) { 57 x0 = xx[4 * i]; 58 x1 = xx[4 * i + 1]; 59 x2 = xx[4 * i + 2]; 60 x3 = xx[4 * i + 3]; 61 62 yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 63 yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 64 yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 65 yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 66 diag += 16; 67 } 68 break; 69 case 5: 70 for (i = 0; i < m; i++) { 71 x0 = xx[5 * i]; 72 x1 = xx[5 * i + 1]; 73 x2 = xx[5 * i + 2]; 74 x3 = xx[5 * i + 3]; 75 x4 = xx[5 * i + 4]; 76 77 yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 78 yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 79 yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 80 yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 81 yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 82 diag += 25; 83 } 84 break; 85 case 6: 86 for (i = 0; i < m; i++) { 87 x0 = xx[6 * i]; 88 x1 = xx[6 * i + 1]; 89 x2 = xx[6 * i + 2]; 90 x3 = xx[6 * i + 3]; 91 x4 = xx[6 * i + 4]; 92 x5 = xx[6 * i + 5]; 93 94 yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 95 yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 96 yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 97 yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 98 yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 99 yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 100 diag += 36; 101 } 102 break; 103 case 7: 104 for (i = 0; i < m; i++) { 105 x0 = xx[7 * i]; 106 x1 = xx[7 * i + 1]; 107 x2 = xx[7 * i + 2]; 108 x3 = xx[7 * i + 3]; 109 x4 = xx[7 * i + 4]; 110 x5 = xx[7 * i + 5]; 111 x6 = xx[7 * i + 6]; 112 113 yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 114 yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 115 yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 116 yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 117 yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 118 yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 119 yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 120 diag += 49; 121 } 122 break; 123 default: 124 for (i = 0; i < m; i++) { 125 for (ib = 0; ib < bs; ib++) { 126 PetscScalar rowsum = 0; 127 for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb]; 128 yy[bs * i + ib] = rowsum; 129 } 130 diag += bs * bs; 131 } 132 } 133 PetscCall(VecRestoreArrayRead(x, &xx)); 134 PetscCall(VecRestoreArray(y, &yy)); 135 PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */ 136 PetscFunctionReturn(0); 137 } 138 139 static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y) 140 { 141 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 142 PetscInt i, ib, jb; 143 const PetscInt m = jac->mbs; 144 const PetscInt bs = jac->bs; 145 const MatScalar *diag = jac->diag; 146 PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 147 const PetscScalar *xx; 148 149 PetscFunctionBegin; 150 PetscCall(VecGetArrayRead(x, &xx)); 151 PetscCall(VecGetArray(y, &yy)); 152 switch (bs) { 153 case 1: 154 for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 155 break; 156 case 2: 157 for (i = 0; i < m; i++) { 158 x0 = xx[2 * i]; 159 x1 = xx[2 * i + 1]; 160 yy[2 * i] = diag[0] * x0 + diag[1] * x1; 161 yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1; 162 diag += 4; 163 } 164 break; 165 case 3: 166 for (i = 0; i < m; i++) { 167 x0 = xx[3 * i]; 168 x1 = xx[3 * i + 1]; 169 x2 = xx[3 * i + 2]; 170 171 yy[3 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2; 172 yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2; 173 yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2; 174 diag += 9; 175 } 176 break; 177 case 4: 178 for (i = 0; i < m; i++) { 179 x0 = xx[4 * i]; 180 x1 = xx[4 * i + 1]; 181 x2 = xx[4 * i + 2]; 182 x3 = xx[4 * i + 3]; 183 184 yy[4 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3; 185 yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3; 186 yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3; 187 yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3; 188 diag += 16; 189 } 190 break; 191 case 5: 192 for (i = 0; i < m; i++) { 193 x0 = xx[5 * i]; 194 x1 = xx[5 * i + 1]; 195 x2 = xx[5 * i + 2]; 196 x3 = xx[5 * i + 3]; 197 x4 = xx[5 * i + 4]; 198 199 yy[5 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4; 200 yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4; 201 yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4; 202 yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4; 203 yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4; 204 diag += 25; 205 } 206 break; 207 case 6: 208 for (i = 0; i < m; i++) { 209 x0 = xx[6 * i]; 210 x1 = xx[6 * i + 1]; 211 x2 = xx[6 * i + 2]; 212 x3 = xx[6 * i + 3]; 213 x4 = xx[6 * i + 4]; 214 x5 = xx[6 * i + 5]; 215 216 yy[6 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5; 217 yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5; 218 yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5; 219 yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5; 220 yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5; 221 yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5; 222 diag += 36; 223 } 224 break; 225 case 7: 226 for (i = 0; i < m; i++) { 227 x0 = xx[7 * i]; 228 x1 = xx[7 * i + 1]; 229 x2 = xx[7 * i + 2]; 230 x3 = xx[7 * i + 3]; 231 x4 = xx[7 * i + 4]; 232 x5 = xx[7 * i + 5]; 233 x6 = xx[7 * i + 6]; 234 235 yy[7 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6; 236 yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6; 237 yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6; 238 yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6; 239 yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6; 240 yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6; 241 yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6; 242 diag += 49; 243 } 244 break; 245 default: 246 for (i = 0; i < m; i++) { 247 for (ib = 0; ib < bs; ib++) { 248 PetscScalar rowsum = 0; 249 for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb]; 250 yy[bs * i + ib] = rowsum; 251 } 252 diag += bs * bs; 253 } 254 } 255 PetscCall(VecRestoreArrayRead(x, &xx)); 256 PetscCall(VecRestoreArray(y, &yy)); 257 PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); 258 PetscFunctionReturn(0); 259 } 260 261 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 262 { 263 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 264 Mat A = pc->pmat; 265 MatFactorError err; 266 PetscInt nlocal; 267 268 PetscFunctionBegin; 269 PetscCall(MatInvertBlockDiagonal(A, &jac->diag)); 270 PetscCall(MatFactorGetError(A, &err)); 271 if (err) pc->failedreason = (PCFailedReason)err; 272 273 PetscCall(MatGetBlockSize(A, &jac->bs)); 274 PetscCall(MatGetLocalSize(A, &nlocal, NULL)); 275 jac->mbs = nlocal / jac->bs; 276 pc->ops->apply = PCApply_PBJacobi; 277 pc->ops->applytranspose = PCApplyTranspose_PBJacobi; 278 PetscFunctionReturn(0); 279 } 280 281 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 282 { 283 PetscFunctionBegin; 284 /* 285 Free the private data structure that was hanging off the PC 286 */ 287 PetscCall(PetscFree(pc->data)); 288 PetscFunctionReturn(0); 289 } 290 291 static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer) 292 { 293 PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 294 PetscBool iascii; 295 296 PetscFunctionBegin; 297 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 298 if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs)); 299 PetscFunctionReturn(0); 300 } 301 302 /*MC 303 PCPBJACOBI - Point block Jacobi preconditioner 304 305 Notes: 306 See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks 307 308 This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix 309 310 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 311 is detected a PETSc error is generated. 312 313 Developer Notes: 314 This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 315 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 316 terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing 317 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 318 319 Perhaps should provide an option that allows generation of a valid preconditioner 320 even if a block is singular as the `PCJACOBI` does. 321 322 Level: beginner 323 324 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI` 325 M*/ 326 327 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 328 { 329 PC_PBJacobi *jac; 330 331 PetscFunctionBegin; 332 /* 333 Creates the private data structure for this preconditioner and 334 attach it to the PC object. 335 */ 336 PetscCall(PetscNew(&jac)); 337 pc->data = (void *)jac; 338 339 /* 340 Initialize the pointers to vectors to ZERO; these will be used to store 341 diagonal entries of the matrix for fast preconditioner application. 342 */ 343 jac->diag = NULL; 344 345 /* 346 Set the pointers for the functions that are provided above. 347 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 348 are called, they will automatically call these functions. Note we 349 choose not to provide a couple of these functions since they are 350 not needed. 351 */ 352 pc->ops->apply = NULL; /*set depending on the block size */ 353 pc->ops->applytranspose = NULL; 354 pc->ops->setup = PCSetUp_PBJacobi; 355 pc->ops->destroy = PCDestroy_PBJacobi; 356 pc->ops->setfromoptions = NULL; 357 pc->ops->view = PCView_PBJacobi; 358 pc->ops->applyrichardson = NULL; 359 pc->ops->applysymmetricleft = NULL; 360 pc->ops->applysymmetricright = NULL; 361 PetscFunctionReturn(0); 362 } 363