1dba47a55SKris Buschelman 24b9ad928SBarry Smith /* 34b9ad928SBarry Smith Include files needed for the PBJacobi preconditioner: 44b9ad928SBarry Smith pcimpl.h - private include file intended for use by all preconditioners 54b9ad928SBarry Smith */ 64b9ad928SBarry Smith 7af0996ceSBarry Smith #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 84b9ad928SBarry Smith 94b9ad928SBarry Smith /* 104b9ad928SBarry Smith Private context (data structure) for the PBJacobi preconditioner. 114b9ad928SBarry Smith */ 124b9ad928SBarry Smith typedef struct { 13713ccfa9SJed Brown const MatScalar *diag; 14c1ac3661SBarry Smith PetscInt bs, mbs; 154b9ad928SBarry Smith } PC_PBJacobi; 164b9ad928SBarry Smith 17*1f713683SJunchao Zhang static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y) 18d71ae5a4SJacob Faibussowitsch { 19bbead8a2SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 20*1f713683SJunchao Zhang PetscInt i, ib, jb; 21*1f713683SJunchao Zhang const PetscInt m = jac->mbs; 22*1f713683SJunchao Zhang const PetscInt bs = jac->bs; 23bbead8a2SBarry Smith const MatScalar *diag = jac->diag; 24*1f713683SJunchao Zhang PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 25bbead8a2SBarry Smith const PetscScalar *xx; 26bbead8a2SBarry Smith 27bbead8a2SBarry Smith PetscFunctionBegin; 289566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 299566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 30*1f713683SJunchao Zhang switch (bs) { 31*1f713683SJunchao Zhang case 1: 322fa5cd67SKarl Rupp for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 33*1f713683SJunchao Zhang break; 34*1f713683SJunchao Zhang case 2: 354b9ad928SBarry Smith for (i = 0; i < m; i++) { 369371c9d4SSatish Balay x0 = xx[2 * i]; 379371c9d4SSatish Balay x1 = xx[2 * i + 1]; 384b9ad928SBarry Smith yy[2 * i] = diag[0] * x0 + diag[2] * x1; 394b9ad928SBarry Smith yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1; 404b9ad928SBarry Smith diag += 4; 414b9ad928SBarry Smith } 42*1f713683SJunchao Zhang break; 43*1f713683SJunchao Zhang case 3: 444b9ad928SBarry Smith for (i = 0; i < m; i++) { 459371c9d4SSatish Balay x0 = xx[3 * i]; 469371c9d4SSatish Balay x1 = xx[3 * i + 1]; 479371c9d4SSatish Balay x2 = xx[3 * i + 2]; 482fa5cd67SKarl Rupp 494b9ad928SBarry Smith yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 504b9ad928SBarry Smith yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 514b9ad928SBarry Smith yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 524b9ad928SBarry Smith diag += 9; 534b9ad928SBarry Smith } 54*1f713683SJunchao Zhang break; 55*1f713683SJunchao Zhang case 4: 564b9ad928SBarry Smith for (i = 0; i < m; i++) { 579371c9d4SSatish Balay x0 = xx[4 * i]; 589371c9d4SSatish Balay x1 = xx[4 * i + 1]; 599371c9d4SSatish Balay x2 = xx[4 * i + 2]; 609371c9d4SSatish Balay x3 = xx[4 * i + 3]; 612fa5cd67SKarl Rupp 624b9ad928SBarry Smith yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 634b9ad928SBarry Smith yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 644b9ad928SBarry Smith yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 654b9ad928SBarry Smith yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 664b9ad928SBarry Smith diag += 16; 674b9ad928SBarry Smith } 68*1f713683SJunchao Zhang break; 69*1f713683SJunchao Zhang case 5: 704b9ad928SBarry Smith for (i = 0; i < m; i++) { 719371c9d4SSatish Balay x0 = xx[5 * i]; 729371c9d4SSatish Balay x1 = xx[5 * i + 1]; 739371c9d4SSatish Balay x2 = xx[5 * i + 2]; 749371c9d4SSatish Balay x3 = xx[5 * i + 3]; 759371c9d4SSatish Balay x4 = xx[5 * i + 4]; 762fa5cd67SKarl Rupp 774b9ad928SBarry Smith yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 784b9ad928SBarry Smith yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 794b9ad928SBarry Smith yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 804b9ad928SBarry Smith yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 814b9ad928SBarry Smith yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 824b9ad928SBarry Smith diag += 25; 834b9ad928SBarry Smith } 84*1f713683SJunchao Zhang break; 85*1f713683SJunchao Zhang case 6: 860e1b4bd6SMark F. Adams for (i = 0; i < m; i++) { 879371c9d4SSatish Balay x0 = xx[6 * i]; 889371c9d4SSatish Balay x1 = xx[6 * i + 1]; 899371c9d4SSatish Balay x2 = xx[6 * i + 2]; 909371c9d4SSatish Balay x3 = xx[6 * i + 3]; 919371c9d4SSatish Balay x4 = xx[6 * i + 4]; 929371c9d4SSatish Balay x5 = xx[6 * i + 5]; 932fa5cd67SKarl Rupp 940e1b4bd6SMark F. Adams yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 950e1b4bd6SMark F. Adams yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 960e1b4bd6SMark F. Adams yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 970e1b4bd6SMark F. Adams yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 980e1b4bd6SMark F. Adams yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 990e1b4bd6SMark F. Adams yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 1000e1b4bd6SMark F. Adams diag += 36; 1010e1b4bd6SMark F. Adams } 102*1f713683SJunchao Zhang break; 103*1f713683SJunchao Zhang case 7: 1040a4ca348SMatthew G Knepley for (i = 0; i < m; i++) { 1059371c9d4SSatish Balay x0 = xx[7 * i]; 1069371c9d4SSatish Balay x1 = xx[7 * i + 1]; 1079371c9d4SSatish Balay x2 = xx[7 * i + 2]; 1089371c9d4SSatish Balay x3 = xx[7 * i + 3]; 1099371c9d4SSatish Balay x4 = xx[7 * i + 4]; 1109371c9d4SSatish Balay x5 = xx[7 * i + 5]; 1119371c9d4SSatish Balay x6 = xx[7 * i + 6]; 1122fa5cd67SKarl Rupp 1130a4ca348SMatthew G Knepley yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 1140a4ca348SMatthew G Knepley yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 1150a4ca348SMatthew G Knepley yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 1160a4ca348SMatthew G Knepley yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 1170a4ca348SMatthew G Knepley yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 1180a4ca348SMatthew G Knepley yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 1190a4ca348SMatthew G Knepley yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 1200a4ca348SMatthew G Knepley diag += 49; 1210a4ca348SMatthew G Knepley } 122*1f713683SJunchao Zhang break; 123*1f713683SJunchao Zhang default: 124f79daf70SMark Lohry for (i = 0; i < m; i++) { 125f79daf70SMark Lohry for (ib = 0; ib < bs; ib++) { 126f79daf70SMark Lohry PetscScalar rowsum = 0; 127ad540459SPierre Jolivet for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb]; 128f79daf70SMark Lohry yy[bs * i + ib] = rowsum; 129f79daf70SMark Lohry } 130f79daf70SMark Lohry diag += bs * bs; 131f79daf70SMark Lohry } 132*1f713683SJunchao Zhang } 1339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 1349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 1359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */ 136f79daf70SMark Lohry PetscFunctionReturn(0); 137f79daf70SMark Lohry } 138faff7d23SJed Brown 139d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc, Vec x, Vec y) 140d71ae5a4SJacob Faibussowitsch { 1418cd12025SJed Brown PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 1428cd12025SJed Brown PetscInt i, j, k, m = jac->mbs, bs = jac->bs; 1438cd12025SJed Brown const MatScalar *diag = jac->diag; 1448cd12025SJed Brown const PetscScalar *xx; 1458cd12025SJed Brown PetscScalar *yy; 1468cd12025SJed Brown 1478cd12025SJed Brown PetscFunctionBegin; 1489566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 1499566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 1508cd12025SJed Brown for (i = 0; i < m; i++) { 1518cd12025SJed Brown for (j = 0; j < bs; j++) yy[i * bs + j] = 0.; 1528cd12025SJed Brown for (j = 0; j < bs; j++) { 153ad540459SPierre Jolivet for (k = 0; k < bs; k++) yy[i * bs + k] += diag[k * bs + j] * xx[i * bs + j]; 1548cd12025SJed Brown } 1558cd12025SJed Brown diag += bs * bs; 1568cd12025SJed Brown } 1579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 1589566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 1599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(m * bs * (2 * bs - 1))); 1608cd12025SJed Brown PetscFunctionReturn(0); 1618cd12025SJed Brown } 1628cd12025SJed Brown 163d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_PBJacobi(PC pc) 164d71ae5a4SJacob Faibussowitsch { 1654b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 1664b9ad928SBarry Smith Mat A = pc->pmat; 16700e125f8SBarry Smith MatFactorError err; 168539c167fSBarry Smith PetscInt nlocal; 1694b9ad928SBarry Smith 1704b9ad928SBarry Smith PetscFunctionBegin; 1719566063dSJacob Faibussowitsch PetscCall(MatInvertBlockDiagonal(A, &jac->diag)); 1729566063dSJacob Faibussowitsch PetscCall(MatFactorGetError(A, &err)); 17300e125f8SBarry Smith if (err) pc->failedreason = (PCFailedReason)err; 174d7806518SHong Zhang 1759566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &jac->bs)); 1769566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &nlocal, NULL)); 177539c167fSBarry Smith jac->mbs = nlocal / jac->bs; 178*1f713683SJunchao Zhang pc->ops->apply = PCApply_PBJacobi; 1798cd12025SJed Brown pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N; 1804b9ad928SBarry Smith PetscFunctionReturn(0); 1814b9ad928SBarry Smith } 182f1580f4eSBarry Smith 183d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_PBJacobi(PC pc) 184d71ae5a4SJacob Faibussowitsch { 1854b9ad928SBarry Smith PetscFunctionBegin; 1864b9ad928SBarry Smith /* 1874b9ad928SBarry Smith Free the private data structure that was hanging off the PC 1884b9ad928SBarry Smith */ 1899566063dSJacob Faibussowitsch PetscCall(PetscFree(pc->data)); 1904b9ad928SBarry Smith PetscFunctionReturn(0); 1914b9ad928SBarry Smith } 192fa939db7SJed Brown 193d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer) 194d71ae5a4SJacob Faibussowitsch { 195fa939db7SJed Brown PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 196fa939db7SJed Brown PetscBool iascii; 197fa939db7SJed Brown 198fa939db7SJed Brown PetscFunctionBegin; 1999566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 20048a46eb9SPierre Jolivet if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs)); 201fa939db7SJed Brown PetscFunctionReturn(0); 202fa939db7SJed Brown } 203fa939db7SJed Brown 20437a17b4dSBarry Smith /*MC 205422a814eSBarry Smith PCPBJACOBI - Point block Jacobi preconditioner 206422a814eSBarry Smith 20795452b02SPatrick Sanan Notes: 208f1580f4eSBarry Smith See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks 209422a814eSBarry Smith 210f1580f4eSBarry Smith This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix 211422a814eSBarry Smith 212422a814eSBarry Smith Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 213422a814eSBarry Smith is detected a PETSc error is generated. 214422a814eSBarry Smith 21595452b02SPatrick Sanan Developer Notes: 216f1580f4eSBarry Smith This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 217422a814eSBarry Smith the factorization to continue even after a zero pivot is found resulting in a Nan and hence 218f1580f4eSBarry Smith terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing 219422a814eSBarry Smith a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 220422a814eSBarry Smith 22190dfcc32SBarry Smith Perhaps should provide an option that allows generation of a valid preconditioner 222f1580f4eSBarry Smith even if a block is singular as the `PCJACOBI` does. 22337a17b4dSBarry Smith 22437a17b4dSBarry Smith Level: beginner 22537a17b4dSBarry Smith 226db781477SPatrick Sanan .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI` 22737a17b4dSBarry Smith M*/ 22837a17b4dSBarry Smith 229d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 230d71ae5a4SJacob Faibussowitsch { 2314b9ad928SBarry Smith PC_PBJacobi *jac; 2324b9ad928SBarry Smith 2334b9ad928SBarry Smith PetscFunctionBegin; 2344b9ad928SBarry Smith /* 2354b9ad928SBarry Smith Creates the private data structure for this preconditioner and 2364b9ad928SBarry Smith attach it to the PC object. 2374b9ad928SBarry Smith */ 2384dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&jac)); 2394b9ad928SBarry Smith pc->data = (void *)jac; 2404b9ad928SBarry Smith 2414b9ad928SBarry Smith /* 2424b9ad928SBarry Smith Initialize the pointers to vectors to ZERO; these will be used to store 2434b9ad928SBarry Smith diagonal entries of the matrix for fast preconditioner application. 2444b9ad928SBarry Smith */ 2450a545947SLisandro Dalcin jac->diag = NULL; 2464b9ad928SBarry Smith 2474b9ad928SBarry Smith /* 2484b9ad928SBarry Smith Set the pointers for the functions that are provided above. 2494b9ad928SBarry Smith Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 2504b9ad928SBarry Smith are called, they will automatically call these functions. Note we 2514b9ad928SBarry Smith choose not to provide a couple of these functions since they are 2524b9ad928SBarry Smith not needed. 2534b9ad928SBarry Smith */ 2540a545947SLisandro Dalcin pc->ops->apply = NULL; /*set depending on the block size */ 2550a545947SLisandro Dalcin pc->ops->applytranspose = NULL; 2564b9ad928SBarry Smith pc->ops->setup = PCSetUp_PBJacobi; 2574b9ad928SBarry Smith pc->ops->destroy = PCDestroy_PBJacobi; 2580a545947SLisandro Dalcin pc->ops->setfromoptions = NULL; 259fa939db7SJed Brown pc->ops->view = PCView_PBJacobi; 2600a545947SLisandro Dalcin pc->ops->applyrichardson = NULL; 2610a545947SLisandro Dalcin pc->ops->applysymmetricleft = NULL; 2620a545947SLisandro Dalcin pc->ops->applysymmetricright = NULL; 2634b9ad928SBarry Smith PetscFunctionReturn(0); 2644b9ad928SBarry Smith } 265