1 2 /* 3 Include files needed for the PBJacobi preconditioner: 4 pcimpl.h - private include file intended for use by all preconditioners 5 */ 6 7 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 8 9 /* 10 Private context (data structure) for the PBJacobi preconditioner. 11 */ 12 typedef struct { 13 const MatScalar *diag; 14 PetscInt bs,mbs; 15 } PC_PBJacobi; 16 17 18 static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y) 19 { 20 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 21 PetscErrorCode ierr; 22 PetscInt i,m = jac->mbs; 23 const MatScalar *diag = jac->diag; 24 const PetscScalar *xx; 25 PetscScalar *yy; 26 27 PetscFunctionBegin; 28 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 29 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 30 for (i=0; i<m; i++) yy[i] = diag[i]*xx[i]; 31 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 32 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 33 ierr = PetscLogFlops(m);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 38 { 39 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 40 PetscErrorCode ierr; 41 PetscInt i,m = jac->mbs; 42 const MatScalar *diag = jac->diag; 43 PetscScalar x0,x1,*yy; 44 const PetscScalar *xx; 45 46 PetscFunctionBegin; 47 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 49 for (i=0; i<m; i++) { 50 x0 = xx[2*i]; x1 = xx[2*i+1]; 51 yy[2*i] = diag[0]*x0 + diag[2]*x1; 52 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 53 diag += 4; 54 } 55 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 56 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 57 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 61 { 62 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 63 PetscErrorCode ierr; 64 PetscInt i,m = jac->mbs; 65 const MatScalar *diag = jac->diag; 66 PetscScalar x0,x1,x2,*yy; 67 const PetscScalar *xx; 68 69 PetscFunctionBegin; 70 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 71 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 72 for (i=0; i<m; i++) { 73 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 74 75 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 76 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 77 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 78 diag += 9; 79 } 80 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 81 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 82 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 86 { 87 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 88 PetscErrorCode ierr; 89 PetscInt i,m = jac->mbs; 90 const MatScalar *diag = jac->diag; 91 PetscScalar x0,x1,x2,x3,*yy; 92 const PetscScalar *xx; 93 94 PetscFunctionBegin; 95 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 96 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 99 100 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 101 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 102 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 103 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 104 diag += 16; 105 } 106 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 107 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 108 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 112 { 113 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 114 PetscErrorCode ierr; 115 PetscInt i,m = jac->mbs; 116 const MatScalar *diag = jac->diag; 117 PetscScalar x0,x1,x2,x3,x4,*yy; 118 const PetscScalar *xx; 119 120 PetscFunctionBegin; 121 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 123 for (i=0; i<m; i++) { 124 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 125 126 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 127 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 128 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 129 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 130 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 131 diag += 25; 132 } 133 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 134 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 135 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y) 139 { 140 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 141 PetscErrorCode ierr; 142 PetscInt i,m = jac->mbs; 143 const MatScalar *diag = jac->diag; 144 PetscScalar x0,x1,x2,x3,x4,x5,*yy; 145 const PetscScalar *xx; 146 147 PetscFunctionBegin; 148 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 149 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 150 for (i=0; i<m; i++) { 151 x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5]; 152 153 yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 154 yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 155 yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 156 yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 157 yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 158 yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 159 diag += 36; 160 } 161 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 162 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 163 ierr = PetscLogFlops(66.0*m);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) 167 { 168 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 169 PetscErrorCode ierr; 170 PetscInt i,m = jac->mbs; 171 const MatScalar *diag = jac->diag; 172 PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy; 173 const PetscScalar *xx; 174 175 PetscFunctionBegin; 176 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 177 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 178 for (i=0; i<m; i++) { 179 x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; 180 181 yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 182 yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 183 yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 184 yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 185 yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 186 yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 187 yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 188 diag += 49; 189 } 190 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 191 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 192 ierr = PetscLogFlops(91.0*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 193 PetscFunctionReturn(0); 194 } 195 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 196 { 197 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 198 PetscErrorCode ierr; 199 PetscInt i,ib,jb; 200 const PetscInt m = jac->mbs; 201 const PetscInt bs = jac->bs; 202 const MatScalar *diag = jac->diag; 203 PetscScalar *yy; 204 const PetscScalar *xx; 205 206 PetscFunctionBegin; 207 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 208 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 209 for (i=0; i<m; i++) { 210 for (ib=0; ib<bs; ib++){ 211 PetscScalar rowsum = 0; 212 for (jb=0; jb<bs; jb++){ 213 rowsum += diag[ib+jb*bs] * xx[bs*i+jb]; 214 } 215 yy[bs*i+ib] = rowsum; 216 } 217 diag += bs*bs; 218 } 219 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 220 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 221 ierr = PetscLogFlops((2.0*bs*bs-bs)*m);CHKERRQ(ierr); /* 2*bs2 - bs */ 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y) 226 { 227 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 228 PetscErrorCode ierr; 229 PetscInt i,j,k,m = jac->mbs,bs=jac->bs; 230 const MatScalar *diag = jac->diag; 231 const PetscScalar *xx; 232 PetscScalar *yy; 233 234 PetscFunctionBegin; 235 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 236 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 237 for (i=0; i<m; i++) { 238 for (j=0; j<bs; j++) yy[i*bs+j] = 0.; 239 for (j=0; j<bs; j++) { 240 for (k=0; k<bs; k++) { 241 yy[i*bs+k] += diag[k+bs*j]*xx[i*bs+j]; 242 } 243 } 244 diag += bs*bs; 245 } 246 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 247 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 248 ierr = PetscLogFlops(m*bs*(2*bs-1));CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 /* -------------------------------------------------------------------------- */ 253 static PetscErrorCode PCSetUp_PBJacobi(PC pc) 254 { 255 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 256 PetscErrorCode ierr; 257 Mat A = pc->pmat; 258 MatFactorError err; 259 PetscInt nlocal; 260 261 PetscFunctionBegin; 262 ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); 263 ierr = MatFactorGetError(A,&err);CHKERRQ(ierr); 264 if (err) pc->failedreason = (PCFailedReason)err; 265 266 ierr = MatGetBlockSize(A,&jac->bs);CHKERRQ(ierr); 267 ierr = MatGetLocalSize(A,&nlocal,NULL);CHKERRQ(ierr); 268 jac->mbs = nlocal/jac->bs; 269 switch (jac->bs) { 270 case 1: 271 pc->ops->apply = PCApply_PBJacobi_1; 272 break; 273 case 2: 274 pc->ops->apply = PCApply_PBJacobi_2; 275 break; 276 case 3: 277 pc->ops->apply = PCApply_PBJacobi_3; 278 break; 279 case 4: 280 pc->ops->apply = PCApply_PBJacobi_4; 281 break; 282 case 5: 283 pc->ops->apply = PCApply_PBJacobi_5; 284 break; 285 case 6: 286 pc->ops->apply = PCApply_PBJacobi_6; 287 break; 288 case 7: 289 pc->ops->apply = PCApply_PBJacobi_7; 290 break; 291 default: 292 pc->ops->apply = PCApply_PBJacobi_N; 293 break; 294 } 295 PetscFunctionReturn(0); 296 } 297 /* -------------------------------------------------------------------------- */ 298 static PetscErrorCode PCDestroy_PBJacobi(PC pc) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 /* 304 Free the private data structure that was hanging off the PC 305 */ 306 ierr = PetscFree(pc->data);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer) 311 { 312 PetscErrorCode ierr; 313 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 314 PetscBool iascii; 315 316 PetscFunctionBegin; 317 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 318 if (iascii) { 319 ierr = PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 /* -------------------------------------------------------------------------- */ 325 /*MC 326 PCPBJACOBI - Point block Jacobi preconditioner 327 328 329 Notes: 330 See PCJACOBI for point Jacobi preconditioning 331 332 This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix 333 334 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 335 is detected a PETSc error is generated. 336 337 Developer Notes: 338 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 339 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 340 terminating KSP with a KSP_DIVERGED_NANORIF allowing 341 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 342 343 Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner 344 even if a block is singular as the PCJACOBI does. 345 346 Level: beginner 347 348 349 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI 350 351 M*/ 352 353 PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 354 { 355 PC_PBJacobi *jac; 356 PetscErrorCode ierr; 357 358 PetscFunctionBegin; 359 /* 360 Creates the private data structure for this preconditioner and 361 attach it to the PC object. 362 */ 363 ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr); 364 pc->data = (void*)jac; 365 366 /* 367 Initialize the pointers to vectors to ZERO; these will be used to store 368 diagonal entries of the matrix for fast preconditioner application. 369 */ 370 jac->diag = 0; 371 372 /* 373 Set the pointers for the functions that are provided above. 374 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 375 are called, they will automatically call these functions. Note we 376 choose not to provide a couple of these functions since they are 377 not needed. 378 */ 379 pc->ops->apply = 0; /*set depending on the block size */ 380 pc->ops->applytranspose = 0; 381 pc->ops->setup = PCSetUp_PBJacobi; 382 pc->ops->destroy = PCDestroy_PBJacobi; 383 pc->ops->setfromoptions = 0; 384 pc->ops->view = PCView_PBJacobi; 385 pc->ops->applyrichardson = 0; 386 pc->ops->applysymmetricleft = 0; 387 pc->ops->applysymmetricright = 0; 388 PetscFunctionReturn(0); 389 } 390 391 392