112facf1bSJunchao Zhang #include <../src/ksp/pc/impls/pbjacobi/pbjacobi.h> 242ce410bSJunchao Zhang #include <petsc/private/matimpl.h> 34b9ad928SBarry Smith 41f713683SJunchao Zhang static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y) 5d71ae5a4SJacob Faibussowitsch { 6bbead8a2SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 71f713683SJunchao Zhang PetscInt i, ib, jb; 81f713683SJunchao Zhang const PetscInt m = jac->mbs; 91f713683SJunchao Zhang const PetscInt bs = jac->bs; 10bbead8a2SBarry Smith const MatScalar *diag = jac->diag; 111f713683SJunchao Zhang PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 12bbead8a2SBarry Smith const PetscScalar *xx; 13bbead8a2SBarry Smith 14bbead8a2SBarry Smith PetscFunctionBegin; 159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 169566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 171f713683SJunchao Zhang switch (bs) { 181f713683SJunchao Zhang case 1: 192fa5cd67SKarl Rupp for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 201f713683SJunchao Zhang break; 211f713683SJunchao Zhang case 2: 224b9ad928SBarry Smith for (i = 0; i < m; i++) { 239371c9d4SSatish Balay x0 = xx[2 * i]; 249371c9d4SSatish Balay x1 = xx[2 * i + 1]; 254b9ad928SBarry Smith yy[2 * i] = diag[0] * x0 + diag[2] * x1; 264b9ad928SBarry Smith yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1; 274b9ad928SBarry Smith diag += 4; 284b9ad928SBarry Smith } 291f713683SJunchao Zhang break; 301f713683SJunchao Zhang case 3: 314b9ad928SBarry Smith for (i = 0; i < m; i++) { 329371c9d4SSatish Balay x0 = xx[3 * i]; 339371c9d4SSatish Balay x1 = xx[3 * i + 1]; 349371c9d4SSatish Balay x2 = xx[3 * i + 2]; 352fa5cd67SKarl Rupp 364b9ad928SBarry Smith yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 374b9ad928SBarry Smith yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 384b9ad928SBarry Smith yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 394b9ad928SBarry Smith diag += 9; 404b9ad928SBarry Smith } 411f713683SJunchao Zhang break; 421f713683SJunchao Zhang case 4: 434b9ad928SBarry Smith for (i = 0; i < m; i++) { 449371c9d4SSatish Balay x0 = xx[4 * i]; 459371c9d4SSatish Balay x1 = xx[4 * i + 1]; 469371c9d4SSatish Balay x2 = xx[4 * i + 2]; 479371c9d4SSatish Balay x3 = xx[4 * i + 3]; 482fa5cd67SKarl Rupp 494b9ad928SBarry Smith yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 504b9ad928SBarry Smith yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 514b9ad928SBarry Smith yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 524b9ad928SBarry Smith yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 534b9ad928SBarry Smith diag += 16; 544b9ad928SBarry Smith } 551f713683SJunchao Zhang break; 561f713683SJunchao Zhang case 5: 574b9ad928SBarry Smith for (i = 0; i < m; i++) { 589371c9d4SSatish Balay x0 = xx[5 * i]; 599371c9d4SSatish Balay x1 = xx[5 * i + 1]; 609371c9d4SSatish Balay x2 = xx[5 * i + 2]; 619371c9d4SSatish Balay x3 = xx[5 * i + 3]; 629371c9d4SSatish Balay x4 = xx[5 * i + 4]; 632fa5cd67SKarl Rupp 644b9ad928SBarry Smith yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 654b9ad928SBarry Smith yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 664b9ad928SBarry Smith yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 674b9ad928SBarry Smith yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 684b9ad928SBarry Smith yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 694b9ad928SBarry Smith diag += 25; 704b9ad928SBarry Smith } 711f713683SJunchao Zhang break; 721f713683SJunchao Zhang case 6: 730e1b4bd6SMark F. Adams for (i = 0; i < m; i++) { 749371c9d4SSatish Balay x0 = xx[6 * i]; 759371c9d4SSatish Balay x1 = xx[6 * i + 1]; 769371c9d4SSatish Balay x2 = xx[6 * i + 2]; 779371c9d4SSatish Balay x3 = xx[6 * i + 3]; 789371c9d4SSatish Balay x4 = xx[6 * i + 4]; 799371c9d4SSatish Balay x5 = xx[6 * i + 5]; 802fa5cd67SKarl Rupp 810e1b4bd6SMark F. Adams yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 820e1b4bd6SMark F. Adams yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 830e1b4bd6SMark F. Adams yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 840e1b4bd6SMark F. Adams yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 850e1b4bd6SMark F. Adams yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 860e1b4bd6SMark F. Adams yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 870e1b4bd6SMark F. Adams diag += 36; 880e1b4bd6SMark F. Adams } 891f713683SJunchao Zhang break; 901f713683SJunchao Zhang case 7: 910a4ca348SMatthew G Knepley for (i = 0; i < m; i++) { 929371c9d4SSatish Balay x0 = xx[7 * i]; 939371c9d4SSatish Balay x1 = xx[7 * i + 1]; 949371c9d4SSatish Balay x2 = xx[7 * i + 2]; 959371c9d4SSatish Balay x3 = xx[7 * i + 3]; 969371c9d4SSatish Balay x4 = xx[7 * i + 4]; 979371c9d4SSatish Balay x5 = xx[7 * i + 5]; 989371c9d4SSatish Balay x6 = xx[7 * i + 6]; 992fa5cd67SKarl Rupp 1000a4ca348SMatthew G Knepley yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 1010a4ca348SMatthew G Knepley yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 1020a4ca348SMatthew G Knepley yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 1030a4ca348SMatthew G Knepley yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 1040a4ca348SMatthew G Knepley yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 1050a4ca348SMatthew G Knepley yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 1060a4ca348SMatthew G Knepley yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 1070a4ca348SMatthew G Knepley diag += 49; 1080a4ca348SMatthew G Knepley } 1091f713683SJunchao Zhang break; 1101f713683SJunchao Zhang default: 111f79daf70SMark Lohry for (i = 0; i < m; i++) { 112f79daf70SMark Lohry for (ib = 0; ib < bs; ib++) { 113f79daf70SMark Lohry PetscScalar rowsum = 0; 114ad540459SPierre Jolivet for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb]; 115f79daf70SMark Lohry yy[bs * i + ib] = rowsum; 116f79daf70SMark Lohry } 117f79daf70SMark Lohry diag += bs * bs; 118f79daf70SMark Lohry } 1191f713683SJunchao Zhang } 1209566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 1219566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 1229566063dSJacob Faibussowitsch PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */ 1233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 124f79daf70SMark Lohry } 125faff7d23SJed Brown 126dc1da017SJunchao Zhang static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y) 127d71ae5a4SJacob Faibussowitsch { 1288cd12025SJed Brown PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 129dc1da017SJunchao Zhang PetscInt i, ib, jb; 130dc1da017SJunchao Zhang const PetscInt m = jac->mbs; 131dc1da017SJunchao Zhang const PetscInt bs = jac->bs; 1328cd12025SJed Brown const MatScalar *diag = jac->diag; 133dc1da017SJunchao Zhang PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 1348cd12025SJed Brown const PetscScalar *xx; 1358cd12025SJed Brown 1368cd12025SJed Brown PetscFunctionBegin; 1379566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 1389566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 139dc1da017SJunchao Zhang switch (bs) { 140dc1da017SJunchao Zhang case 1: 141dc1da017SJunchao Zhang for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i]; 142dc1da017SJunchao Zhang break; 143dc1da017SJunchao Zhang case 2: 1448cd12025SJed Brown for (i = 0; i < m; i++) { 145dc1da017SJunchao Zhang x0 = xx[2 * i]; 146dc1da017SJunchao Zhang x1 = xx[2 * i + 1]; 147dc1da017SJunchao Zhang yy[2 * i] = diag[0] * x0 + diag[1] * x1; 148dc1da017SJunchao Zhang yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1; 149dc1da017SJunchao Zhang diag += 4; 150dc1da017SJunchao Zhang } 151dc1da017SJunchao Zhang break; 152dc1da017SJunchao Zhang case 3: 153dc1da017SJunchao Zhang for (i = 0; i < m; i++) { 154dc1da017SJunchao Zhang x0 = xx[3 * i]; 155dc1da017SJunchao Zhang x1 = xx[3 * i + 1]; 156dc1da017SJunchao Zhang x2 = xx[3 * i + 2]; 157dc1da017SJunchao Zhang 158dc1da017SJunchao Zhang yy[3 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2; 159dc1da017SJunchao Zhang yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2; 160dc1da017SJunchao Zhang yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2; 161dc1da017SJunchao Zhang diag += 9; 162dc1da017SJunchao Zhang } 163dc1da017SJunchao Zhang break; 164dc1da017SJunchao Zhang case 4: 165dc1da017SJunchao Zhang for (i = 0; i < m; i++) { 166dc1da017SJunchao Zhang x0 = xx[4 * i]; 167dc1da017SJunchao Zhang x1 = xx[4 * i + 1]; 168dc1da017SJunchao Zhang x2 = xx[4 * i + 2]; 169dc1da017SJunchao Zhang x3 = xx[4 * i + 3]; 170dc1da017SJunchao Zhang 171dc1da017SJunchao Zhang yy[4 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3; 172dc1da017SJunchao Zhang yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3; 173dc1da017SJunchao Zhang yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3; 174dc1da017SJunchao Zhang yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3; 175dc1da017SJunchao Zhang diag += 16; 176dc1da017SJunchao Zhang } 177dc1da017SJunchao Zhang break; 178dc1da017SJunchao Zhang case 5: 179dc1da017SJunchao Zhang for (i = 0; i < m; i++) { 180dc1da017SJunchao Zhang x0 = xx[5 * i]; 181dc1da017SJunchao Zhang x1 = xx[5 * i + 1]; 182dc1da017SJunchao Zhang x2 = xx[5 * i + 2]; 183dc1da017SJunchao Zhang x3 = xx[5 * i + 3]; 184dc1da017SJunchao Zhang x4 = xx[5 * i + 4]; 185dc1da017SJunchao Zhang 186dc1da017SJunchao Zhang yy[5 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4; 187dc1da017SJunchao Zhang yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4; 188dc1da017SJunchao Zhang yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4; 189dc1da017SJunchao Zhang yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4; 190dc1da017SJunchao Zhang yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4; 191dc1da017SJunchao Zhang diag += 25; 192dc1da017SJunchao Zhang } 193dc1da017SJunchao Zhang break; 194dc1da017SJunchao Zhang case 6: 195dc1da017SJunchao Zhang for (i = 0; i < m; i++) { 196dc1da017SJunchao Zhang x0 = xx[6 * i]; 197dc1da017SJunchao Zhang x1 = xx[6 * i + 1]; 198dc1da017SJunchao Zhang x2 = xx[6 * i + 2]; 199dc1da017SJunchao Zhang x3 = xx[6 * i + 3]; 200dc1da017SJunchao Zhang x4 = xx[6 * i + 4]; 201dc1da017SJunchao Zhang x5 = xx[6 * i + 5]; 202dc1da017SJunchao Zhang 203dc1da017SJunchao Zhang yy[6 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5; 204dc1da017SJunchao Zhang yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5; 205dc1da017SJunchao Zhang yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5; 206dc1da017SJunchao Zhang yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5; 207dc1da017SJunchao Zhang yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5; 208dc1da017SJunchao Zhang yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5; 209dc1da017SJunchao Zhang diag += 36; 210dc1da017SJunchao Zhang } 211dc1da017SJunchao Zhang break; 212dc1da017SJunchao Zhang case 7: 213dc1da017SJunchao Zhang for (i = 0; i < m; i++) { 214dc1da017SJunchao Zhang x0 = xx[7 * i]; 215dc1da017SJunchao Zhang x1 = xx[7 * i + 1]; 216dc1da017SJunchao Zhang x2 = xx[7 * i + 2]; 217dc1da017SJunchao Zhang x3 = xx[7 * i + 3]; 218dc1da017SJunchao Zhang x4 = xx[7 * i + 4]; 219dc1da017SJunchao Zhang x5 = xx[7 * i + 5]; 220dc1da017SJunchao Zhang x6 = xx[7 * i + 6]; 221dc1da017SJunchao Zhang 222dc1da017SJunchao Zhang yy[7 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6; 223dc1da017SJunchao Zhang yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6; 224dc1da017SJunchao Zhang yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6; 225dc1da017SJunchao Zhang yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6; 226dc1da017SJunchao Zhang yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6; 227dc1da017SJunchao Zhang yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6; 228dc1da017SJunchao Zhang yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6; 229dc1da017SJunchao Zhang diag += 49; 230dc1da017SJunchao Zhang } 231dc1da017SJunchao Zhang break; 232dc1da017SJunchao Zhang default: 233dc1da017SJunchao Zhang for (i = 0; i < m; i++) { 234dc1da017SJunchao Zhang for (ib = 0; ib < bs; ib++) { 235dc1da017SJunchao Zhang PetscScalar rowsum = 0; 236dc1da017SJunchao Zhang for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb]; 237dc1da017SJunchao Zhang yy[bs * i + ib] = rowsum; 2388cd12025SJed Brown } 2398cd12025SJed Brown diag += bs * bs; 2408cd12025SJed Brown } 241dc1da017SJunchao Zhang } 2429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 2439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 244dc1da017SJunchao Zhang PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); 2453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2468cd12025SJed Brown } 2478cd12025SJed Brown 24842ce410bSJunchao Zhang PETSC_INTERN PetscErrorCode PCSetUp_PBJacobi_Host(PC pc, Mat diagPB) 249d71ae5a4SJacob Faibussowitsch { 2504b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 25142ce410bSJunchao Zhang Mat A = diagPB ? diagPB : pc->pmat; 25200e125f8SBarry Smith MatFactorError err; 253539c167fSBarry Smith PetscInt nlocal; 2544b9ad928SBarry Smith 2554b9ad928SBarry Smith PetscFunctionBegin; 2569566063dSJacob Faibussowitsch PetscCall(MatInvertBlockDiagonal(A, &jac->diag)); 2579566063dSJacob Faibussowitsch PetscCall(MatFactorGetError(A, &err)); 25800e125f8SBarry Smith if (err) pc->failedreason = (PCFailedReason)err; 259d7806518SHong Zhang 2609566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(A, &jac->bs)); 2619566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &nlocal, NULL)); 262539c167fSBarry Smith jac->mbs = nlocal / jac->bs; 2633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2644b9ad928SBarry Smith } 265f1580f4eSBarry Smith 26612facf1bSJunchao Zhang static PetscErrorCode PCSetUp_PBJacobi(PC pc) 26712facf1bSJunchao Zhang { 26842ce410bSJunchao Zhang PetscBool flg; 26942ce410bSJunchao Zhang Mat diagPB = NULL; 27042ce410bSJunchao Zhang 27112facf1bSJunchao Zhang PetscFunctionBegin; 27212facf1bSJunchao Zhang /* In PCCreate_PBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */ 27342ce410bSJunchao Zhang 27442ce410bSJunchao Zhang // pmat (e.g., MatCEED from libCEED) might have its own method to provide a matrix (diagPB) 27542ce410bSJunchao Zhang // made of the diagonal blocks. So we check both pmat and diagVPB. 27642ce410bSJunchao Zhang PetscCall(MatHasOperation(pc->pmat, MATOP_GET_BLOCK_DIAGONAL, &flg)); 27742ce410bSJunchao Zhang if (flg) PetscUseTypeMethod(pc->pmat, getblockdiagonal, &diagPB); // diagPB's reference count is increased upon return 27842ce410bSJunchao Zhang 27912facf1bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 28012facf1bSJunchao Zhang PetscBool isCuda; 28112facf1bSJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 28242ce410bSJunchao Zhang if (!isCuda && diagPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagPB, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 28312facf1bSJunchao Zhang #endif 28412facf1bSJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 28512facf1bSJunchao Zhang PetscBool isKok; 28612facf1bSJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 28742ce410bSJunchao Zhang if (!isKok && diagPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagPB, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 28812facf1bSJunchao Zhang #endif 28912facf1bSJunchao Zhang 29012facf1bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 29142ce410bSJunchao Zhang if (isCuda) PetscCall(PCSetUp_PBJacobi_CUDA(pc, diagPB)); 29212facf1bSJunchao Zhang else 29312facf1bSJunchao Zhang #endif 29412facf1bSJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 29512facf1bSJunchao Zhang if (isKok) 29642ce410bSJunchao Zhang PetscCall(PCSetUp_PBJacobi_Kokkos(pc, diagPB)); 29712facf1bSJunchao Zhang else 29812facf1bSJunchao Zhang #endif 29912facf1bSJunchao Zhang { 30042ce410bSJunchao Zhang PetscCall(PCSetUp_PBJacobi_Host(pc, diagPB)); 30112facf1bSJunchao Zhang } 30242ce410bSJunchao Zhang PetscCall(MatDestroy(&diagPB)); // since we don't need it anymore, we don't stash it in PC_PBJacobi 3033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30412facf1bSJunchao Zhang } 30512facf1bSJunchao Zhang 30612facf1bSJunchao Zhang PetscErrorCode PCDestroy_PBJacobi(PC pc) 307d71ae5a4SJacob Faibussowitsch { 30842ce410bSJunchao Zhang PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 30942ce410bSJunchao Zhang 3104b9ad928SBarry Smith PetscFunctionBegin; 3114b9ad928SBarry Smith /* 3124b9ad928SBarry Smith Free the private data structure that was hanging off the PC 3134b9ad928SBarry Smith */ 31442ce410bSJunchao Zhang // PetscCall(PetscFree(jac->diag)); // the memory is owned by e.g., a->ibdiag in Mat_SeqAIJ, so don't free it here. 31542ce410bSJunchao Zhang PetscCall(MatDestroy(&jac->diagPB)); 3169566063dSJacob Faibussowitsch PetscCall(PetscFree(pc->data)); 3173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3184b9ad928SBarry Smith } 319fa939db7SJed Brown 320d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer) 321d71ae5a4SJacob Faibussowitsch { 322fa939db7SJed Brown PC_PBJacobi *jac = (PC_PBJacobi *)pc->data; 3239f196a02SMartin Diehl PetscBool isascii; 324fa939db7SJed Brown 325fa939db7SJed Brown PetscFunctionBegin; 3269f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3279f196a02SMartin Diehl if (isascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs)); 3283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 329fa939db7SJed Brown } 330fa939db7SJed Brown 33137a17b4dSBarry Smith /*MC 332422a814eSBarry Smith PCPBJACOBI - Point block Jacobi preconditioner 333422a814eSBarry Smith 33495452b02SPatrick Sanan Notes: 335f1580f4eSBarry Smith See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks 336422a814eSBarry Smith 337f1580f4eSBarry Smith This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix 338422a814eSBarry Smith 339422a814eSBarry Smith Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 340422a814eSBarry Smith is detected a PETSc error is generated. 341422a814eSBarry Smith 34295452b02SPatrick Sanan Developer Notes: 343f1580f4eSBarry Smith This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 344*76c63389SBarry Smith the factorization to continue even after a zero pivot is found resulting in a NaN and hence 34548773899SPierre Jolivet terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing 346422a814eSBarry Smith a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 347422a814eSBarry Smith 34890dfcc32SBarry Smith Perhaps should provide an option that allows generation of a valid preconditioner 349f1580f4eSBarry Smith even if a block is singular as the `PCJACOBI` does. 35037a17b4dSBarry Smith 35137a17b4dSBarry Smith Level: beginner 35237a17b4dSBarry Smith 353562efe2eSBarry Smith .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI` 35437a17b4dSBarry Smith M*/ 35537a17b4dSBarry Smith 356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc) 357d71ae5a4SJacob Faibussowitsch { 3584b9ad928SBarry Smith PC_PBJacobi *jac; 3594b9ad928SBarry Smith 3604b9ad928SBarry Smith PetscFunctionBegin; 3614b9ad928SBarry Smith /* 3624b9ad928SBarry Smith Creates the private data structure for this preconditioner and 3634b9ad928SBarry Smith attach it to the PC object. 3644b9ad928SBarry Smith */ 3654dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&jac)); 3664b9ad928SBarry Smith pc->data = (void *)jac; 3674b9ad928SBarry Smith 3684b9ad928SBarry Smith /* 3694b9ad928SBarry Smith Initialize the pointers to vectors to ZERO; these will be used to store 3704b9ad928SBarry Smith diagonal entries of the matrix for fast preconditioner application. 3714b9ad928SBarry Smith */ 3720a545947SLisandro Dalcin jac->diag = NULL; 3734b9ad928SBarry Smith 3744b9ad928SBarry Smith /* 3754b9ad928SBarry Smith Set the pointers for the functions that are provided above. 3764b9ad928SBarry Smith Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 3774b9ad928SBarry Smith are called, they will automatically call these functions. Note we 3784b9ad928SBarry Smith choose not to provide a couple of these functions since they are 3794b9ad928SBarry Smith not needed. 3804b9ad928SBarry Smith */ 38112facf1bSJunchao Zhang pc->ops->apply = PCApply_PBJacobi; 38212facf1bSJunchao Zhang pc->ops->applytranspose = PCApplyTranspose_PBJacobi; 3834b9ad928SBarry Smith pc->ops->setup = PCSetUp_PBJacobi; 3844b9ad928SBarry Smith pc->ops->destroy = PCDestroy_PBJacobi; 3850a545947SLisandro Dalcin pc->ops->setfromoptions = NULL; 386fa939db7SJed Brown pc->ops->view = PCView_PBJacobi; 3870a545947SLisandro Dalcin pc->ops->applyrichardson = NULL; 3880a545947SLisandro Dalcin pc->ops->applysymmetricleft = NULL; 3890a545947SLisandro Dalcin pc->ops->applysymmetricright = NULL; 3903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3914b9ad928SBarry Smith } 392