1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 18 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 19 { 20 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 21 PetscErrorCode ierr; 22 PetscInt i,m = jac->mbs; 23 const MatScalar *diag = jac->diag; 24 const PetscScalar *xx; 25 PetscScalar *yy; 26 27 PetscFunctionBegin; 28 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 29 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 30 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 31 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 32 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 33 ierr = PetscLogFlops(m);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 38 { 39 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 40 PetscErrorCode ierr; 41 PetscInt i,m = jac->mbs; 42 const MatScalar *diag = jac->diag; 43 PetscScalar x0,x1,*yy; 44 const PetscScalar *xx; 45 46 PetscFunctionBegin; 47 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 49 for (i=0; i<m; i++) { 50 x0 = xx[2*i]; x1 = xx[2*i+1]; 51 yy[2*i] = diag[0]*x0 + diag[2]*x1; 52 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 53 diag += 4; 54 } 55 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 56 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 57 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 61 { 62 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 63 PetscErrorCode ierr; 64 PetscInt i,m = jac->mbs; 65 const MatScalar *diag = jac->diag; 66 PetscScalar x0,x1,x2,*yy; 67 const PetscScalar *xx; 68 69 PetscFunctionBegin; 70 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 71 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 72 for (i=0; i<m; i++) { 73 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 74 75 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 76 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 77 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 78 diag += 9; 79 } 80 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 81 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 82 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 86 { 87 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 88 PetscErrorCode ierr; 89 PetscInt i,m = jac->mbs; 90 const MatScalar *diag = jac->diag; 91 PetscScalar x0,x1,x2,x3,*yy; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 96 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 99 100 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 101 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 102 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 103 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 104 diag += 16; 105 } 106 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 107 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 108 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 112 { 113 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 114 PetscErrorCode ierr; 115 PetscInt i,m = jac->mbs; 116 const MatScalar *diag = jac->diag; 117 PetscScalar x0,x1,x2,x3,x4,*yy; 118 const PetscScalar *xx; 119 120 PetscFunctionBegin; 121 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 123 for (i=0; i<m; i++) { 124 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 125 126 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 127 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 128 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 129 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 130 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 131 diag += 25; 132 } 133 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 134 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 135 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 139 { 140 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 141 PetscErrorCode ierr; 142 PetscInt i,m = jac->mbs; 143 const MatScalar *diag = jac->diag; 144 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 145 const PetscScalar *xx; 146 147 PetscFunctionBegin; 148 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 149 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 150 for (i=0; i<m; i++) { 151 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 152 153 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 154 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 155 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 156 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 157 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 158 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 159 diag += 36; 160 } 161 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 162 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 163 ierr = PetscLogFlops(66.0*m);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 167 { 168 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 169 PetscErrorCode ierr; 170 PetscInt i,m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 177 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 178 for (i=0; i<m; i++) { 179 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 180 181 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 182 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 183 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 184 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 185 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 186 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 187 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 188 diag += 49; 189 } 190 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 191 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 192 ierr = PetscLogFlops(91.0*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 193 PetscFunctionReturn(0); 194 } 195 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 196 { 197 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 198 PetscErrorCode ierr; 199 PetscInt i,ib,jb; 200 const PetscInt m = jac->mbs; 201 const PetscInt bs = jac->bs; 202 const MatScalar *diag = jac->diag; 203 PetscScalar *yy; 204 const PetscScalar *xx; 205 206 PetscFunctionBegin; 207 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 208 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 209 for (i=0; i<m; i++) { 210 for (ib=0; ib<bs; ib++){ 211 PetscScalar rowsum = 0; 212 for (jb=0; jb<bs; jb++){ 213 rowsum += diag[ib+jb*bs] * xx[bs*i+jb]; 214 } 215 yy[bs*i+ib] = rowsum; 216 } 217 diag += bs*bs; 218 } 219 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 220 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 221 ierr = PetscLogFlops((2.0*bs*bs-bs)*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 226 { 227 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 228 PetscErrorCode ierr; 229 PetscInt i,j,k,m = jac->mbs,bs=jac->bs; 230 const MatScalar *diag = jac->diag; 231 const PetscScalar *xx; 232 PetscScalar *yy; 233 234 PetscFunctionBegin; 235 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 236 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 237 for (i=0; i<m; i++) { 238 for (j=0; j<bs; j++) yy[i*bs+j] = 0.; 239 for (j=0; j<bs; j++) { 240 for (k=0; k<bs; k++) { 241 yy[i*bs+k] += diag[k+bs*j]*xx[i*bs+j]; 242 } 243 } 244 diag += bs*bs; 245 } 246 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 247 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 248 ierr = PetscLogFlops(m*bs*(2*bs-1));CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y) 253 { 254 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 255 PetscErrorCode ierr; 256 PetscInt i,j,k,m = jac->mbs,bs=jac->bs; 257 const MatScalar *diag = jac->diag; 258 const PetscScalar *xx; 259 PetscScalar *yy; 260 261 PetscFunctionBegin; 262 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 263 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 264 for (i=0; i<m; i++) { 265 for (j=0; j<bs; j++) yy[i*bs+j] = 0.; 266 for (j=0; j<bs; j++) { 267 for (k=0; k<bs; k++) { 268 yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j]; 269 } 270 } 271 diag += bs*bs; 272 } 273 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 274 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 275 ierr = PetscLogFlops(m*bs*(2*bs-1));CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 /* -------------------------------------------------------------------------- */ 280 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 281 { 282 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 283 PetscErrorCode ierr; 284 Mat A = pc->pmat; 285 MatFactorError err; 286 PetscInt nlocal; 287 288 PetscFunctionBegin; 289 ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); 290 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 291 if (err) pc->failedreason = (PCFailedReason)err; 292 293 ierr = MatGetBlockSize(A,&jac->bs);CHKERRQ(ierr); 294 ierr = MatGetLocalSize(A,&nlocal,NULL);CHKERRQ(ierr); 295 jac->mbs = nlocal/jac->bs; 296 switch (jac->bs) { 297 case 1: 298 pc->ops->apply = PCApply_PBJacobi_1; 299 break; 300 case 2: 301 pc->ops->apply = PCApply_PBJacobi_2; 302 break; 303 case 3: 304 pc->ops->apply = PCApply_PBJacobi_3; 305 break; 306 case 4: 307 pc->ops->apply = PCApply_PBJacobi_4; 308 break; 309 case 5: 310 pc->ops->apply = PCApply_PBJacobi_5; 311 break; 312 case 6: 313 pc->ops->apply = PCApply_PBJacobi_6; 314 break; 315 case 7: 316 pc->ops->apply = PCApply_PBJacobi_7; 317 break; 318 default: 319 pc->ops->apply = PCApply_PBJacobi_N; 320 break; 321 } 322 pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 323 PetscFunctionReturn(0); 324 } 325 /* -------------------------------------------------------------------------- */ 326 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 327 { 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 /* 332 Free the private data structure that was hanging off the PC 333 */ 334 ierr = PetscFree(pc->data);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 339 { 340 PetscErrorCode ierr; 341 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 342 PetscBool iascii; 343 344 PetscFunctionBegin; 345 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 346 if (iascii) { 347 ierr = PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 /* -------------------------------------------------------------------------- */ 353 /*MC 354 PCPBJACOBI - Point block Jacobi preconditioner 355 356 357 Notes: 358 See PCJACOBI for point Jacobi preconditioning 359 360 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 361 362 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 363 is detected a PETSc error is generated. 364 365 Developer Notes: 366 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 367 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 368 terminating KSP with a KSP_DIVERGED_NANORIF allowing 369 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 370 371 Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner 372 even if a block is singular as the PCJACOBI does. 373 374 Level: beginner 375 376 377 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI 378 379 M*/ 380 381 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 382 { 383 PC_PBJacobi *jac; 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 /* 388 Creates the private data structure for this preconditioner and 389 attach it to the PC object. 390 */ 391 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 392 pc->data = (void*)jac; 393 394 /* 395 Initialize the pointers to vectors to ZERO; these will be used to store 396 diagonal entries of the matrix for fast preconditioner application. 397 */ 398 jac->diag = 0; 399 400 /* 401 Set the pointers for the functions that are provided above. 402 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 403 are called, they will automatically call these functions. Note we 404 choose not to provide a couple of these functions since they are 405 not needed. 406 */ 407 pc->ops->apply = 0; /*set depending on the block size */ 408 pc->ops->applytranspose = 0; 409 pc->ops->setup = PCSetUp_PBJacobi; 410 pc->ops->destroy = PCDestroy_PBJacobi; 411 pc->ops->setfromoptions = 0; 412 pc->ops->view = PCView_PBJacobi; 413 pc->ops->applyrichardson = 0; 414 pc->ops->applysymmetricleft = 0; 415 pc->ops->applysymmetricright = 0; 416 PetscFunctionReturn(0); 417 } 418 419 420