1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 18 { 19 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 20 PetscInt i,m = jac->mbs; 21 const MatScalar *diag = jac->diag; 22 const PetscScalar *xx; 23 PetscScalar *yy; 24 25 PetscFunctionBegin; 26 PetscCall(VecGetArrayRead(x,&xx)); 27 PetscCall(VecGetArray(y,&yy)); 28 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 29 PetscCall(VecRestoreArrayRead(x,&xx)); 30 PetscCall(VecRestoreArray(y,&yy)); 31 PetscCall(PetscLogFlops(m)); 32 PetscFunctionReturn(0); 33 } 34 35 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 36 { 37 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 38 PetscInt i,m = jac->mbs; 39 const MatScalar *diag = jac->diag; 40 PetscScalar x0,x1,*yy; 41 const PetscScalar *xx; 42 43 PetscFunctionBegin; 44 PetscCall(VecGetArrayRead(x,&xx)); 45 PetscCall(VecGetArray(y,&yy)); 46 for (i=0; i<m; i++) { 47 x0 = xx[2*i]; x1 = xx[2*i+1]; 48 yy[2*i] = diag[0]*x0 + diag[2]*x1; 49 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 50 diag += 4; 51 } 52 PetscCall(VecRestoreArrayRead(x,&xx)); 53 PetscCall(VecRestoreArray(y,&yy)); 54 PetscCall(PetscLogFlops(6.0*m)); 55 PetscFunctionReturn(0); 56 } 57 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 58 { 59 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 60 PetscInt i,m = jac->mbs; 61 const MatScalar *diag = jac->diag; 62 PetscScalar x0,x1,x2,*yy; 63 const PetscScalar *xx; 64 65 PetscFunctionBegin; 66 PetscCall(VecGetArrayRead(x,&xx)); 67 PetscCall(VecGetArray(y,&yy)); 68 for (i=0; i<m; i++) { 69 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 70 71 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 72 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 73 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 74 diag += 9; 75 } 76 PetscCall(VecRestoreArrayRead(x,&xx)); 77 PetscCall(VecRestoreArray(y,&yy)); 78 PetscCall(PetscLogFlops(15.0*m)); 79 PetscFunctionReturn(0); 80 } 81 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 82 { 83 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 84 PetscInt i,m = jac->mbs; 85 const MatScalar *diag = jac->diag; 86 PetscScalar x0,x1,x2,x3,*yy; 87 const PetscScalar *xx; 88 89 PetscFunctionBegin; 90 PetscCall(VecGetArrayRead(x,&xx)); 91 PetscCall(VecGetArray(y,&yy)); 92 for (i=0; i<m; i++) { 93 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 94 95 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 96 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 97 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 98 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 99 diag += 16; 100 } 101 PetscCall(VecRestoreArrayRead(x,&xx)); 102 PetscCall(VecRestoreArray(y,&yy)); 103 PetscCall(PetscLogFlops(28.0*m)); 104 PetscFunctionReturn(0); 105 } 106 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 107 { 108 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 109 PetscInt i,m = jac->mbs; 110 const MatScalar *diag = jac->diag; 111 PetscScalar x0,x1,x2,x3,x4,*yy; 112 const PetscScalar *xx; 113 114 PetscFunctionBegin; 115 PetscCall(VecGetArrayRead(x,&xx)); 116 PetscCall(VecGetArray(y,&yy)); 117 for (i=0; i<m; i++) { 118 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 119 120 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 121 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 122 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 123 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 124 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 125 diag += 25; 126 } 127 PetscCall(VecRestoreArrayRead(x,&xx)); 128 PetscCall(VecRestoreArray(y,&yy)); 129 PetscCall(PetscLogFlops(45.0*m)); 130 PetscFunctionReturn(0); 131 } 132 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 133 { 134 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 135 PetscInt i,m = jac->mbs; 136 const MatScalar *diag = jac->diag; 137 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 138 const PetscScalar *xx; 139 140 PetscFunctionBegin; 141 PetscCall(VecGetArrayRead(x,&xx)); 142 PetscCall(VecGetArray(y,&yy)); 143 for (i=0; i<m; i++) { 144 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 145 146 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 147 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 148 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 149 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 150 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 151 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 152 diag += 36; 153 } 154 PetscCall(VecRestoreArrayRead(x,&xx)); 155 PetscCall(VecRestoreArray(y,&yy)); 156 PetscCall(PetscLogFlops(66.0*m)); 157 PetscFunctionReturn(0); 158 } 159 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 160 { 161 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 162 PetscInt i,m = jac->mbs; 163 const MatScalar *diag = jac->diag; 164 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 165 const PetscScalar *xx; 166 167 PetscFunctionBegin; 168 PetscCall(VecGetArrayRead(x,&xx)); 169 PetscCall(VecGetArray(y,&yy)); 170 for (i=0; i<m; i++) { 171 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 172 173 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 174 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 175 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 176 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 177 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 178 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 179 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 180 diag += 49; 181 } 182 PetscCall(VecRestoreArrayRead(x,&xx)); 183 PetscCall(VecRestoreArray(y,&yy)); 184 PetscCall(PetscLogFlops(91.0*m)); /* 2*bs2 - bs */ 185 PetscFunctionReturn(0); 186 } 187 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 188 { 189 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 190 PetscInt i,ib,jb; 191 const PetscInt m = jac->mbs; 192 const PetscInt bs = jac->bs; 193 const MatScalar *diag = jac->diag; 194 PetscScalar *yy; 195 const PetscScalar *xx; 196 197 PetscFunctionBegin; 198 PetscCall(VecGetArrayRead(x,&xx)); 199 PetscCall(VecGetArray(y,&yy)); 200 for (i=0; i<m; i++) { 201 for (ib=0; ib<bs; ib++) { 202 PetscScalar rowsum = 0; 203 for (jb=0; jb<bs; jb++) { 204 rowsum += diag[ib+jb*bs] * xx[bs*i+jb]; 205 } 206 yy[bs*i+ib] = rowsum; 207 } 208 diag += bs*bs; 209 } 210 PetscCall(VecRestoreArrayRead(x,&xx)); 211 PetscCall(VecRestoreArray(y,&yy)); 212 PetscCall(PetscLogFlops((2.0*bs*bs-bs)*m)); /* 2*bs2 - bs */ 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y) 217 { 218 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 219 PetscInt i,j,k,m = jac->mbs,bs=jac->bs; 220 const MatScalar *diag = jac->diag; 221 const PetscScalar *xx; 222 PetscScalar *yy; 223 224 PetscFunctionBegin; 225 PetscCall(VecGetArrayRead(x,&xx)); 226 PetscCall(VecGetArray(y,&yy)); 227 for (i=0; i<m; i++) { 228 for (j=0; j<bs; j++) yy[i*bs+j] = 0.; 229 for (j=0; j<bs; j++) { 230 for (k=0; k<bs; k++) { 231 yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j]; 232 } 233 } 234 diag += bs*bs; 235 } 236 PetscCall(VecRestoreArrayRead(x,&xx)); 237 PetscCall(VecRestoreArray(y,&yy)); 238 PetscCall(PetscLogFlops(m*bs*(2*bs-1))); 239 PetscFunctionReturn(0); 240 } 241 242 /* -------------------------------------------------------------------------- */ 243 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 244 { 245 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 246 Mat A = pc->pmat; 247 MatFactorError err; 248 PetscInt nlocal; 249 250 PetscFunctionBegin; 251 PetscCall(MatInvertBlockDiagonal(A,&jac->diag)); 252 PetscCall(MatFactorGetError(A,&err)); 253 if (err) pc->failedreason = (PCFailedReason)err; 254 255 PetscCall(MatGetBlockSize(A,&jac->bs)); 256 PetscCall(MatGetLocalSize(A,&nlocal,NULL)); 257 jac->mbs = nlocal/jac->bs; 258 switch (jac->bs) { 259 case 1: 260 pc->ops->apply = PCApply_PBJacobi_1; 261 break; 262 case 2: 263 pc->ops->apply = PCApply_PBJacobi_2; 264 break; 265 case 3: 266 pc->ops->apply = PCApply_PBJacobi_3; 267 break; 268 case 4: 269 pc->ops->apply = PCApply_PBJacobi_4; 270 break; 271 case 5: 272 pc->ops->apply = PCApply_PBJacobi_5; 273 break; 274 case 6: 275 pc->ops->apply = PCApply_PBJacobi_6; 276 break; 277 case 7: 278 pc->ops->apply = PCApply_PBJacobi_7; 279 break; 280 default: 281 pc->ops->apply = PCApply_PBJacobi_N; 282 break; 283 } 284 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 285 PetscFunctionReturn(0); 286 } 287 /* -------------------------------------------------------------------------- */ 288 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 289 { 290 PetscFunctionBegin; 291 /* 292 Free the private data structure that was hanging off the PC 293 */ 294 PetscCall(PetscFree(pc->data)); 295 PetscFunctionReturn(0); 296 } 297 298 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 299 { 300 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 301 PetscBool iascii; 302 303 PetscFunctionBegin; 304 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 305 if (iascii) { 306 PetscCall(PetscViewerASCIIPrintf(viewer," point-block size %" PetscInt_FMT "\n",jac->bs)); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 /* -------------------------------------------------------------------------- */ 312 /*MC 313 PCPBJACOBI - Point block Jacobi preconditioner 314 315 Notes: 316 See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCBJACOBI for large blocks 317 318 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 319 320 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 321 is detected a PETSc error is generated. 322 323 Developer Notes: 324 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 325 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 326 terminating KSP with a KSP_DIVERGED_NANORIF allowing 327 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 328 329 Perhaps should provide an option that allows generation of a valid preconditioner 330 even if a block is singular as the PCJACOBI does. 331 332 Level: beginner 333 334 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCVPBJACOBI, PCBJACOBI 335 336 M*/ 337 338 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 339 { 340 PC_PBJacobi *jac; 341 342 PetscFunctionBegin; 343 /* 344 Creates the private data structure for this preconditioner and 345 attach it to the PC object. 346 */ 347 PetscCall(PetscNewLog(pc,&jac)); 348 pc->data = (void*)jac; 349 350 /* 351 Initialize the pointers to vectors to ZERO; these will be used to store 352 diagonal entries of the matrix for fast preconditioner application. 353 */ 354 jac->diag = NULL; 355 356 /* 357 Set the pointers for the functions that are provided above. 358 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 359 are called, they will automatically call these functions. Note we 360 choose not to provide a couple of these functions since they are 361 not needed. 362 */ 363 pc->ops->apply = NULL; /*set depending on the block size */ 364 pc->ops->applytranspose = NULL; 365 pc->ops->setup = PCSetUp_PBJacobi; 366 pc->ops->destroy = PCDestroy_PBJacobi; 367 pc->ops->setfromoptions = NULL; 368 pc->ops->view = PCView_PBJacobi; 369 pc->ops->applyrichardson = NULL; 370 pc->ops->applysymmetricleft = NULL; 371 pc->ops->applysymmetricright = NULL; 372 PetscFunctionReturn(0); 373 } 374