1f1be3500SJunchao Zhang #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h> 20da83c2eSBarry Smith 3*9371c9d4SSatish Balay static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) { 40da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 50da83c2eSBarry Smith PetscInt i, ncnt = 0; 60da83c2eSBarry Smith const MatScalar *diag = jac->diag; 70da83c2eSBarry Smith PetscInt ib, jb, bs; 80da83c2eSBarry Smith const PetscScalar *xx; 90da83c2eSBarry Smith PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 100da83c2eSBarry Smith PetscInt nblocks; 110da83c2eSBarry Smith const PetscInt *bsizes; 120da83c2eSBarry Smith 130da83c2eSBarry Smith PetscFunctionBegin; 149566063dSJacob Faibussowitsch PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 169566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 170da83c2eSBarry Smith for (i = 0; i < nblocks; i++) { 180da83c2eSBarry Smith bs = bsizes[i]; 190da83c2eSBarry Smith switch (bs) { 20*9371c9d4SSatish Balay case 1: yy[ncnt] = *diag * xx[ncnt]; break; 210da83c2eSBarry Smith case 2: 22*9371c9d4SSatish Balay x0 = xx[ncnt]; 23*9371c9d4SSatish Balay x1 = xx[ncnt + 1]; 240da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[2] * x1; 250da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1; 260da83c2eSBarry Smith break; 270da83c2eSBarry Smith case 3: 28*9371c9d4SSatish Balay x0 = xx[ncnt]; 29*9371c9d4SSatish Balay x1 = xx[ncnt + 1]; 30*9371c9d4SSatish Balay x2 = xx[ncnt + 2]; 310da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 320da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 330da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 340da83c2eSBarry Smith break; 350da83c2eSBarry Smith case 4: 36*9371c9d4SSatish Balay x0 = xx[ncnt]; 37*9371c9d4SSatish Balay x1 = xx[ncnt + 1]; 38*9371c9d4SSatish Balay x2 = xx[ncnt + 2]; 39*9371c9d4SSatish Balay x3 = xx[ncnt + 3]; 400da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 410da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 420da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 430da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 440da83c2eSBarry Smith break; 450da83c2eSBarry Smith case 5: 46*9371c9d4SSatish Balay x0 = xx[ncnt]; 47*9371c9d4SSatish Balay x1 = xx[ncnt + 1]; 48*9371c9d4SSatish Balay x2 = xx[ncnt + 2]; 49*9371c9d4SSatish Balay x3 = xx[ncnt + 3]; 50*9371c9d4SSatish Balay x4 = xx[ncnt + 4]; 510da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 520da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 530da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 540da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 550da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 560da83c2eSBarry Smith break; 570da83c2eSBarry Smith case 6: 58*9371c9d4SSatish Balay x0 = xx[ncnt]; 59*9371c9d4SSatish Balay x1 = xx[ncnt + 1]; 60*9371c9d4SSatish Balay x2 = xx[ncnt + 2]; 61*9371c9d4SSatish Balay x3 = xx[ncnt + 3]; 62*9371c9d4SSatish Balay x4 = xx[ncnt + 4]; 63*9371c9d4SSatish Balay x5 = xx[ncnt + 5]; 640da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 650da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 660da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 670da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 680da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 690da83c2eSBarry Smith yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 700da83c2eSBarry Smith break; 710da83c2eSBarry Smith case 7: 72*9371c9d4SSatish Balay x0 = xx[ncnt]; 73*9371c9d4SSatish Balay x1 = xx[ncnt + 1]; 74*9371c9d4SSatish Balay x2 = xx[ncnt + 2]; 75*9371c9d4SSatish Balay x3 = xx[ncnt + 3]; 76*9371c9d4SSatish Balay x4 = xx[ncnt + 4]; 77*9371c9d4SSatish Balay x5 = xx[ncnt + 5]; 78*9371c9d4SSatish Balay x6 = xx[ncnt + 6]; 790da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 800da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 810da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 820da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 830da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 840da83c2eSBarry Smith yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 850da83c2eSBarry Smith yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 860da83c2eSBarry Smith break; 870da83c2eSBarry Smith default: 880da83c2eSBarry Smith for (ib = 0; ib < bs; ib++) { 890da83c2eSBarry Smith PetscScalar rowsum = 0; 90*9371c9d4SSatish Balay for (jb = 0; jb < bs; jb++) { rowsum += diag[ib + jb * bs] * xx[ncnt + jb]; } 910da83c2eSBarry Smith yy[ncnt + ib] = rowsum; 920da83c2eSBarry Smith } 930da83c2eSBarry Smith } 940da83c2eSBarry Smith ncnt += bsizes[i]; 950da83c2eSBarry Smith diag += bsizes[i] * bsizes[i]; 960da83c2eSBarry Smith } 979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 990da83c2eSBarry Smith PetscFunctionReturn(0); 1000da83c2eSBarry Smith } 1010da83c2eSBarry Smith 102*9371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) { 1030da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 1040da83c2eSBarry Smith Mat A = pc->pmat; 1050da83c2eSBarry Smith MatFactorError err; 1060da83c2eSBarry Smith PetscInt i, nsize = 0, nlocal; 1070da83c2eSBarry Smith PetscInt nblocks; 1080da83c2eSBarry Smith const PetscInt *bsizes; 1090da83c2eSBarry Smith 1100da83c2eSBarry Smith PetscFunctionBegin; 1119566063dSJacob Faibussowitsch PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 1129566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL)); 11308401ef6SPierre Jolivet PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 1140da83c2eSBarry Smith if (!jac->diag) { 1150da83c2eSBarry Smith for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i]; 1169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsize, &jac->diag)); 1170da83c2eSBarry Smith } 1189566063dSJacob Faibussowitsch PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag)); 1199566063dSJacob Faibussowitsch PetscCall(MatFactorGetError(A, &err)); 1200da83c2eSBarry Smith if (err) pc->failedreason = (PCFailedReason)err; 1210da83c2eSBarry Smith pc->ops->apply = PCApply_VPBJacobi; 1220da83c2eSBarry Smith PetscFunctionReturn(0); 1230da83c2eSBarry Smith } 1248a9c020eSBarry Smith 125*9371c9d4SSatish Balay static PetscErrorCode PCSetUp_VPBJacobi(PC pc) { 126f1be3500SJunchao Zhang PetscFunctionBegin; 127f1be3500SJunchao Zhang /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */ 128f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 129f1be3500SJunchao Zhang PetscBool isCuda; 130f1be3500SJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 131f1be3500SJunchao Zhang #endif 132f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 133f1be3500SJunchao Zhang PetscBool isKok; 134f1be3500SJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 135f1be3500SJunchao Zhang #endif 136f1be3500SJunchao Zhang 137f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 138*9371c9d4SSatish Balay if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); 139*9371c9d4SSatish Balay else 140f1be3500SJunchao Zhang #endif 141f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 142*9371c9d4SSatish Balay if (isKok) 143*9371c9d4SSatish Balay PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); 144*9371c9d4SSatish Balay else 145f1be3500SJunchao Zhang #endif 146*9371c9d4SSatish Balay { 147*9371c9d4SSatish Balay PetscCall(PCSetUp_VPBJacobi_Host(pc)); 148*9371c9d4SSatish Balay } 149f1be3500SJunchao Zhang PetscFunctionReturn(0); 150f1be3500SJunchao Zhang } 151f1be3500SJunchao Zhang 152*9371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) { 1530da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 1540da83c2eSBarry Smith 1550da83c2eSBarry Smith PetscFunctionBegin; 1560da83c2eSBarry Smith /* 1570da83c2eSBarry Smith Free the private data structure that was hanging off the PC 1580da83c2eSBarry Smith */ 1599566063dSJacob Faibussowitsch PetscCall(PetscFree(jac->diag)); 1609566063dSJacob Faibussowitsch PetscCall(PetscFree(pc->data)); 1610da83c2eSBarry Smith PetscFunctionReturn(0); 1620da83c2eSBarry Smith } 1630da83c2eSBarry Smith 1640da83c2eSBarry Smith /*MC 1650da83c2eSBarry Smith PCVPBJACOBI - Variable size point block Jacobi preconditioner 1660da83c2eSBarry Smith 1670da83c2eSBarry Smith Notes: 1680da83c2eSBarry Smith See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks 1690da83c2eSBarry Smith 1700da83c2eSBarry Smith This works for AIJ matrices 1710da83c2eSBarry Smith 1720da83c2eSBarry Smith Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 1730da83c2eSBarry Smith is detected a PETSc error is generated. 1740da83c2eSBarry Smith 1750da83c2eSBarry Smith One must call MatSetVariableBlockSizes() to use this preconditioner 17690dfcc32SBarry Smith 1770da83c2eSBarry Smith Developer Notes: 1780da83c2eSBarry Smith This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 1790da83c2eSBarry Smith the factorization to continue even after a zero pivot is found resulting in a Nan and hence 1800da83c2eSBarry Smith terminating KSP with a KSP_DIVERGED_NANORIF allowing 1810da83c2eSBarry Smith a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 1820da83c2eSBarry Smith 1830da83c2eSBarry Smith Perhaps should provide an option that allows generation of a valid preconditioner 1840da83c2eSBarry Smith even if a block is singular as the PCJACOBI does. 1850da83c2eSBarry Smith 1860da83c2eSBarry Smith Level: beginner 1870da83c2eSBarry Smith 188db781477SPatrick Sanan .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI` 1890da83c2eSBarry Smith 1900da83c2eSBarry Smith M*/ 1910da83c2eSBarry Smith 192*9371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) { 1930da83c2eSBarry Smith PC_VPBJacobi *jac; 1940da83c2eSBarry Smith 1950da83c2eSBarry Smith PetscFunctionBegin; 1960da83c2eSBarry Smith /* 1970da83c2eSBarry Smith Creates the private data structure for this preconditioner and 1980da83c2eSBarry Smith attach it to the PC object. 1990da83c2eSBarry Smith */ 2009566063dSJacob Faibussowitsch PetscCall(PetscNewLog(pc, &jac)); 2010da83c2eSBarry Smith pc->data = (void *)jac; 2020da83c2eSBarry Smith 2030da83c2eSBarry Smith /* 2040da83c2eSBarry Smith Initialize the pointers to vectors to ZERO; these will be used to store 2050da83c2eSBarry Smith diagonal entries of the matrix for fast preconditioner application. 2060da83c2eSBarry Smith */ 2070da83c2eSBarry Smith jac->diag = NULL; 2080da83c2eSBarry Smith 2090da83c2eSBarry Smith /* 2100da83c2eSBarry Smith Set the pointers for the functions that are provided above. 2110da83c2eSBarry Smith Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 2120da83c2eSBarry Smith are called, they will automatically call these functions. Note we 2130da83c2eSBarry Smith choose not to provide a couple of these functions since they are 2140da83c2eSBarry Smith not needed. 2150da83c2eSBarry Smith */ 2160da83c2eSBarry Smith pc->ops->apply = PCApply_VPBJacobi; 2170a545947SLisandro Dalcin pc->ops->applytranspose = NULL; 2180da83c2eSBarry Smith pc->ops->setup = PCSetUp_VPBJacobi; 2190da83c2eSBarry Smith pc->ops->destroy = PCDestroy_VPBJacobi; 2200a545947SLisandro Dalcin pc->ops->setfromoptions = NULL; 2210a545947SLisandro Dalcin pc->ops->applyrichardson = NULL; 2220a545947SLisandro Dalcin pc->ops->applysymmetricleft = NULL; 2230a545947SLisandro Dalcin pc->ops->applysymmetricright = NULL; 2240da83c2eSBarry Smith PetscFunctionReturn(0); 2250da83c2eSBarry Smith } 226