1f1be3500SJunchao Zhang #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h> 242ce410bSJunchao Zhang #include <petsc/private/matimpl.h> 30da83c2eSBarry Smith 4d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) 5d71ae5a4SJacob Faibussowitsch { 60da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 70da83c2eSBarry Smith PetscInt i, ncnt = 0; 80da83c2eSBarry Smith const MatScalar *diag = jac->diag; 90da83c2eSBarry Smith PetscInt ib, jb, bs; 100da83c2eSBarry Smith const PetscScalar *xx; 110da83c2eSBarry Smith PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 120da83c2eSBarry Smith PetscInt nblocks; 130da83c2eSBarry Smith const PetscInt *bsizes; 140da83c2eSBarry Smith 150da83c2eSBarry Smith PetscFunctionBegin; 169566063dSJacob Faibussowitsch PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 189566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 190da83c2eSBarry Smith for (i = 0; i < nblocks; i++) { 200da83c2eSBarry Smith bs = bsizes[i]; 210da83c2eSBarry Smith switch (bs) { 22d71ae5a4SJacob Faibussowitsch case 1: 23d71ae5a4SJacob Faibussowitsch yy[ncnt] = *diag * xx[ncnt]; 24d71ae5a4SJacob Faibussowitsch break; 250da83c2eSBarry Smith case 2: 269371c9d4SSatish Balay x0 = xx[ncnt]; 279371c9d4SSatish Balay x1 = xx[ncnt + 1]; 280da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[2] * x1; 290da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1; 300da83c2eSBarry Smith break; 310da83c2eSBarry Smith case 3: 329371c9d4SSatish Balay x0 = xx[ncnt]; 339371c9d4SSatish Balay x1 = xx[ncnt + 1]; 349371c9d4SSatish Balay x2 = xx[ncnt + 2]; 350da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 360da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 370da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 380da83c2eSBarry Smith break; 390da83c2eSBarry Smith case 4: 409371c9d4SSatish Balay x0 = xx[ncnt]; 419371c9d4SSatish Balay x1 = xx[ncnt + 1]; 429371c9d4SSatish Balay x2 = xx[ncnt + 2]; 439371c9d4SSatish Balay x3 = xx[ncnt + 3]; 440da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 450da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 460da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 470da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 480da83c2eSBarry Smith break; 490da83c2eSBarry Smith case 5: 509371c9d4SSatish Balay x0 = xx[ncnt]; 519371c9d4SSatish Balay x1 = xx[ncnt + 1]; 529371c9d4SSatish Balay x2 = xx[ncnt + 2]; 539371c9d4SSatish Balay x3 = xx[ncnt + 3]; 549371c9d4SSatish Balay x4 = xx[ncnt + 4]; 550da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 560da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 570da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 580da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 590da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 600da83c2eSBarry Smith break; 610da83c2eSBarry Smith case 6: 629371c9d4SSatish Balay x0 = xx[ncnt]; 639371c9d4SSatish Balay x1 = xx[ncnt + 1]; 649371c9d4SSatish Balay x2 = xx[ncnt + 2]; 659371c9d4SSatish Balay x3 = xx[ncnt + 3]; 669371c9d4SSatish Balay x4 = xx[ncnt + 4]; 679371c9d4SSatish Balay x5 = xx[ncnt + 5]; 680da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 690da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 700da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 710da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 720da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 730da83c2eSBarry Smith yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 740da83c2eSBarry Smith break; 750da83c2eSBarry Smith case 7: 769371c9d4SSatish Balay x0 = xx[ncnt]; 779371c9d4SSatish Balay x1 = xx[ncnt + 1]; 789371c9d4SSatish Balay x2 = xx[ncnt + 2]; 799371c9d4SSatish Balay x3 = xx[ncnt + 3]; 809371c9d4SSatish Balay x4 = xx[ncnt + 4]; 819371c9d4SSatish Balay x5 = xx[ncnt + 5]; 829371c9d4SSatish Balay x6 = xx[ncnt + 6]; 830da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 840da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 850da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 860da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 870da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 880da83c2eSBarry Smith yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 890da83c2eSBarry Smith yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 900da83c2eSBarry Smith break; 910da83c2eSBarry Smith default: 920da83c2eSBarry Smith for (ib = 0; ib < bs; ib++) { 930da83c2eSBarry Smith PetscScalar rowsum = 0; 94ad540459SPierre Jolivet for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb]; 950da83c2eSBarry Smith yy[ncnt + ib] = rowsum; 960da83c2eSBarry Smith } 970da83c2eSBarry Smith } 980da83c2eSBarry Smith ncnt += bsizes[i]; 990da83c2eSBarry Smith diag += bsizes[i] * bsizes[i]; 1000da83c2eSBarry Smith } 1019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 1029566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 1033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1040da83c2eSBarry Smith } 1050da83c2eSBarry Smith 10669eda9daSJed Brown static PetscErrorCode PCApplyTranspose_VPBJacobi(PC pc, Vec x, Vec y) 10769eda9daSJed Brown { 10869eda9daSJed Brown PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 10969eda9daSJed Brown PetscInt i, ncnt = 0; 11069eda9daSJed Brown const MatScalar *diag = jac->diag; 11169eda9daSJed Brown PetscInt ib, jb, bs; 11269eda9daSJed Brown const PetscScalar *xx; 11369eda9daSJed Brown PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 11469eda9daSJed Brown PetscInt nblocks; 11569eda9daSJed Brown const PetscInt *bsizes; 11669eda9daSJed Brown 11769eda9daSJed Brown PetscFunctionBegin; 11869eda9daSJed Brown PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 11969eda9daSJed Brown PetscCall(VecGetArrayRead(x, &xx)); 12069eda9daSJed Brown PetscCall(VecGetArray(y, &yy)); 12169eda9daSJed Brown for (i = 0; i < nblocks; i++) { 12269eda9daSJed Brown bs = bsizes[i]; 12369eda9daSJed Brown switch (bs) { 12469eda9daSJed Brown case 1: 12569eda9daSJed Brown yy[ncnt] = *diag * xx[ncnt]; 12669eda9daSJed Brown break; 12769eda9daSJed Brown case 2: 12869eda9daSJed Brown x0 = xx[ncnt]; 12969eda9daSJed Brown x1 = xx[ncnt + 1]; 13069eda9daSJed Brown yy[ncnt] = diag[0] * x0 + diag[1] * x1; 13169eda9daSJed Brown yy[ncnt + 1] = diag[2] * x0 + diag[3] * x1; 13269eda9daSJed Brown break; 13369eda9daSJed Brown case 3: 13469eda9daSJed Brown x0 = xx[ncnt]; 13569eda9daSJed Brown x1 = xx[ncnt + 1]; 13669eda9daSJed Brown x2 = xx[ncnt + 2]; 13769eda9daSJed Brown yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2; 13869eda9daSJed Brown yy[ncnt + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2; 13969eda9daSJed Brown yy[ncnt + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2; 14069eda9daSJed Brown break; 14169eda9daSJed Brown case 4: 14269eda9daSJed Brown x0 = xx[ncnt]; 14369eda9daSJed Brown x1 = xx[ncnt + 1]; 14469eda9daSJed Brown x2 = xx[ncnt + 2]; 14569eda9daSJed Brown x3 = xx[ncnt + 3]; 14669eda9daSJed Brown yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3; 14769eda9daSJed Brown yy[ncnt + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3; 14869eda9daSJed Brown yy[ncnt + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3; 14969eda9daSJed Brown yy[ncnt + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3; 15069eda9daSJed Brown break; 15169eda9daSJed Brown case 5: 15269eda9daSJed Brown x0 = xx[ncnt]; 15369eda9daSJed Brown x1 = xx[ncnt + 1]; 15469eda9daSJed Brown x2 = xx[ncnt + 2]; 15569eda9daSJed Brown x3 = xx[ncnt + 3]; 15669eda9daSJed Brown x4 = xx[ncnt + 4]; 15769eda9daSJed Brown yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4; 15869eda9daSJed Brown yy[ncnt + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4; 15969eda9daSJed Brown yy[ncnt + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4; 16069eda9daSJed Brown yy[ncnt + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4; 16169eda9daSJed Brown yy[ncnt + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4; 16269eda9daSJed Brown break; 16369eda9daSJed Brown case 6: 16469eda9daSJed Brown x0 = xx[ncnt]; 16569eda9daSJed Brown x1 = xx[ncnt + 1]; 16669eda9daSJed Brown x2 = xx[ncnt + 2]; 16769eda9daSJed Brown x3 = xx[ncnt + 3]; 16869eda9daSJed Brown x4 = xx[ncnt + 4]; 16969eda9daSJed Brown x5 = xx[ncnt + 5]; 17069eda9daSJed Brown yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5; 17169eda9daSJed Brown yy[ncnt + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5; 17269eda9daSJed Brown yy[ncnt + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5; 17369eda9daSJed Brown yy[ncnt + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5; 17469eda9daSJed Brown yy[ncnt + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5; 17569eda9daSJed Brown yy[ncnt + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5; 17669eda9daSJed Brown break; 17769eda9daSJed Brown case 7: 17869eda9daSJed Brown x0 = xx[ncnt]; 17969eda9daSJed Brown x1 = xx[ncnt + 1]; 18069eda9daSJed Brown x2 = xx[ncnt + 2]; 18169eda9daSJed Brown x3 = xx[ncnt + 3]; 18269eda9daSJed Brown x4 = xx[ncnt + 4]; 18369eda9daSJed Brown x5 = xx[ncnt + 5]; 18469eda9daSJed Brown x6 = xx[ncnt + 6]; 18569eda9daSJed Brown yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6; 18669eda9daSJed Brown yy[ncnt + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6; 18769eda9daSJed Brown yy[ncnt + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6; 18869eda9daSJed Brown yy[ncnt + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6; 18969eda9daSJed Brown yy[ncnt + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6; 19069eda9daSJed Brown yy[ncnt + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6; 19169eda9daSJed Brown yy[ncnt + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6; 19269eda9daSJed Brown break; 19369eda9daSJed Brown default: 19469eda9daSJed Brown for (ib = 0; ib < bs; ib++) { 19569eda9daSJed Brown PetscScalar rowsum = 0; 19669eda9daSJed Brown for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[ncnt + jb]; 19769eda9daSJed Brown yy[ncnt + ib] = rowsum; 19869eda9daSJed Brown } 19969eda9daSJed Brown } 20069eda9daSJed Brown ncnt += bsizes[i]; 20169eda9daSJed Brown diag += bsizes[i] * bsizes[i]; 20269eda9daSJed Brown } 20369eda9daSJed Brown PetscCall(VecRestoreArrayRead(x, &xx)); 20469eda9daSJed Brown PetscCall(VecRestoreArray(y, &yy)); 2053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20669eda9daSJed Brown } 20769eda9daSJed Brown 20842ce410bSJunchao Zhang PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc, Mat diagVPB) 209d71ae5a4SJacob Faibussowitsch { 2100da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 21142ce410bSJunchao Zhang Mat A = diagVPB ? diagVPB : pc->pmat; 2120da83c2eSBarry Smith MatFactorError err; 2130da83c2eSBarry Smith PetscInt i, nsize = 0, nlocal; 2140da83c2eSBarry Smith PetscInt nblocks; 2150da83c2eSBarry Smith const PetscInt *bsizes; 2160da83c2eSBarry Smith 2170da83c2eSBarry Smith PetscFunctionBegin; 2189566063dSJacob Faibussowitsch PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 2199566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL)); 22008401ef6SPierre Jolivet PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 2210da83c2eSBarry Smith if (!jac->diag) { 2221690c2aeSBarry Smith PetscInt max_bs = -1, min_bs = PETSC_INT_MAX; 2230a94ea6bSJed Brown for (i = 0; i < nblocks; i++) { 2240a94ea6bSJed Brown min_bs = PetscMin(min_bs, bsizes[i]); 2250a94ea6bSJed Brown max_bs = PetscMax(max_bs, bsizes[i]); 2260a94ea6bSJed Brown nsize += bsizes[i] * bsizes[i]; 2270a94ea6bSJed Brown } 2289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsize, &jac->diag)); 2290a94ea6bSJed Brown jac->nblocks = nblocks; 2300a94ea6bSJed Brown jac->min_bs = min_bs; 2310a94ea6bSJed Brown jac->max_bs = max_bs; 2320da83c2eSBarry Smith } 2339566063dSJacob Faibussowitsch PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag)); 2349566063dSJacob Faibussowitsch PetscCall(MatFactorGetError(A, &err)); 2350da83c2eSBarry Smith if (err) pc->failedreason = (PCFailedReason)err; 2360da83c2eSBarry Smith pc->ops->apply = PCApply_VPBJacobi; 23769eda9daSJed Brown pc->ops->applytranspose = PCApplyTranspose_VPBJacobi; 2383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2390da83c2eSBarry Smith } 2408a9c020eSBarry Smith 241d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_VPBJacobi(PC pc) 242d71ae5a4SJacob Faibussowitsch { 24342ce410bSJunchao Zhang PetscBool flg; 24442ce410bSJunchao Zhang Mat diagVPB = NULL; 24542ce410bSJunchao Zhang 246f1be3500SJunchao Zhang PetscFunctionBegin; 24742ce410bSJunchao Zhang // In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch 24842ce410bSJunchao Zhang 24942ce410bSJunchao Zhang // pmat (e.g., MatCEED from libCEED) might have its own method to provide a matrix (diagVPB) 25042ce410bSJunchao Zhang // made of the diagonal blocks. So we check both pmat and diagVPB. 25142ce410bSJunchao Zhang PetscCall(MatHasOperation(pc->pmat, MATOP_GET_VBLOCK_DIAGONAL, &flg)); 25242ce410bSJunchao Zhang if (flg) PetscUseTypeMethod(pc->pmat, getvblockdiagonal, &diagVPB); // diagVPB's reference count is increased upon return 25342ce410bSJunchao Zhang 254f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 255f1be3500SJunchao Zhang PetscBool isCuda; 256f1be3500SJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 25742ce410bSJunchao Zhang if (!isCuda && diagVPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagVPB, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 258f1be3500SJunchao Zhang #endif 259f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 260f1be3500SJunchao Zhang PetscBool isKok; 261f1be3500SJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 26242ce410bSJunchao Zhang if (!isKok && diagVPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagVPB, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 263f1be3500SJunchao Zhang #endif 264f1be3500SJunchao Zhang 265f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 26642ce410bSJunchao Zhang if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc, diagVPB)); 2679371c9d4SSatish Balay else 268f1be3500SJunchao Zhang #endif 269f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 2709371c9d4SSatish Balay if (isKok) 27142ce410bSJunchao Zhang PetscCall(PCSetUp_VPBJacobi_Kokkos(pc, diagVPB)); 2729371c9d4SSatish Balay else 273f1be3500SJunchao Zhang #endif 2749371c9d4SSatish Balay { 27542ce410bSJunchao Zhang PetscCall(PCSetUp_VPBJacobi_Host(pc, diagVPB)); 2769371c9d4SSatish Balay } 27742ce410bSJunchao Zhang PetscCall(MatDestroy(&diagVPB)); // since we don't need it anymore, we don't need to stash it in PC_VPBJacobi 2783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 279f1be3500SJunchao Zhang } 280f1be3500SJunchao Zhang 2810a94ea6bSJed Brown static PetscErrorCode PCView_VPBJacobi(PC pc, PetscViewer viewer) 2820a94ea6bSJed Brown { 2830a94ea6bSJed Brown PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 2849f196a02SMartin Diehl PetscBool isascii; 2850a94ea6bSJed Brown 2860a94ea6bSJed Brown PetscFunctionBegin; 2879f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2889f196a02SMartin Diehl if (isascii) { 2890a94ea6bSJed Brown PetscCall(PetscViewerASCIIPrintf(viewer, " number of blocks: %" PetscInt_FMT "\n", jac->nblocks)); 2900a94ea6bSJed Brown PetscCall(PetscViewerASCIIPrintf(viewer, " block sizes: min=%" PetscInt_FMT " max=%" PetscInt_FMT "\n", jac->min_bs, jac->max_bs)); 2910a94ea6bSJed Brown } 2923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2930a94ea6bSJed Brown } 2940a94ea6bSJed Brown 295d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) 296d71ae5a4SJacob Faibussowitsch { 2970da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 2980da83c2eSBarry Smith 2990da83c2eSBarry Smith PetscFunctionBegin; 3000da83c2eSBarry Smith /* 3010da83c2eSBarry Smith Free the private data structure that was hanging off the PC 3020da83c2eSBarry Smith */ 3039566063dSJacob Faibussowitsch PetscCall(PetscFree(jac->diag)); 30442ce410bSJunchao Zhang PetscCall(MatDestroy(&jac->diagVPB)); 3059566063dSJacob Faibussowitsch PetscCall(PetscFree(pc->data)); 3063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3070da83c2eSBarry Smith } 3080da83c2eSBarry Smith 3090da83c2eSBarry Smith /*MC 3100da83c2eSBarry Smith PCVPBJACOBI - Variable size point block Jacobi preconditioner 3110da83c2eSBarry Smith 312f1580f4eSBarry Smith Level: beginner 3130da83c2eSBarry Smith 314f1580f4eSBarry Smith Notes: 315f1580f4eSBarry Smith See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks 316f1580f4eSBarry Smith 317f1580f4eSBarry Smith This works for `MATAIJ` matrices 3180da83c2eSBarry Smith 3190da83c2eSBarry Smith Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 3200da83c2eSBarry Smith is detected a PETSc error is generated. 3210da83c2eSBarry Smith 322f1580f4eSBarry Smith One must call `MatSetVariableBlockSizes()` to use this preconditioner 32390dfcc32SBarry Smith 3240da83c2eSBarry Smith Developer Notes: 325f1580f4eSBarry Smith This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 326*76c63389SBarry Smith the factorization to continue even after a zero pivot is found resulting in a NaN and hence 32748773899SPierre Jolivet terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing 3280da83c2eSBarry Smith a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 3290da83c2eSBarry Smith 3300da83c2eSBarry Smith Perhaps should provide an option that allows generation of a valid preconditioner 331f1580f4eSBarry Smith even if a block is singular as the `PCJACOBI` does. 3320da83c2eSBarry Smith 333562efe2eSBarry Smith .seealso: [](ch_ksp), `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI` 3340da83c2eSBarry Smith M*/ 3350da83c2eSBarry Smith 336d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) 337d71ae5a4SJacob Faibussowitsch { 3380da83c2eSBarry Smith PC_VPBJacobi *jac; 3390da83c2eSBarry Smith 3400da83c2eSBarry Smith PetscFunctionBegin; 3410da83c2eSBarry Smith /* 3420da83c2eSBarry Smith Creates the private data structure for this preconditioner and 3430da83c2eSBarry Smith attach it to the PC object. 3440da83c2eSBarry Smith */ 3454dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&jac)); 3460da83c2eSBarry Smith pc->data = (void *)jac; 3470da83c2eSBarry Smith 3480da83c2eSBarry Smith /* 3490da83c2eSBarry Smith Initialize the pointers to vectors to ZERO; these will be used to store 3500da83c2eSBarry Smith diagonal entries of the matrix for fast preconditioner application. 3510da83c2eSBarry Smith */ 3520da83c2eSBarry Smith jac->diag = NULL; 3530da83c2eSBarry Smith 3540da83c2eSBarry Smith /* 3550da83c2eSBarry Smith Set the pointers for the functions that are provided above. 3560da83c2eSBarry Smith Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 3570da83c2eSBarry Smith are called, they will automatically call these functions. Note we 3580da83c2eSBarry Smith choose not to provide a couple of these functions since they are 3590da83c2eSBarry Smith not needed. 3600da83c2eSBarry Smith */ 3610da83c2eSBarry Smith pc->ops->apply = PCApply_VPBJacobi; 3620a545947SLisandro Dalcin pc->ops->applytranspose = NULL; 3630da83c2eSBarry Smith pc->ops->setup = PCSetUp_VPBJacobi; 3640da83c2eSBarry Smith pc->ops->destroy = PCDestroy_VPBJacobi; 3650a545947SLisandro Dalcin pc->ops->setfromoptions = NULL; 3660a94ea6bSJed Brown pc->ops->view = PCView_VPBJacobi; 3670a545947SLisandro Dalcin pc->ops->applyrichardson = NULL; 3680a545947SLisandro Dalcin pc->ops->applysymmetricleft = NULL; 3690a545947SLisandro Dalcin pc->ops->applysymmetricright = NULL; 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3710da83c2eSBarry Smith } 372