1f1be3500SJunchao Zhang #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h> 20da83c2eSBarry Smith 39371c9d4SSatish Balay static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) { 40da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 50da83c2eSBarry Smith PetscInt i, ncnt = 0; 60da83c2eSBarry Smith const MatScalar *diag = jac->diag; 70da83c2eSBarry Smith PetscInt ib, jb, bs; 80da83c2eSBarry Smith const PetscScalar *xx; 90da83c2eSBarry Smith PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 100da83c2eSBarry Smith PetscInt nblocks; 110da83c2eSBarry Smith const PetscInt *bsizes; 120da83c2eSBarry Smith 130da83c2eSBarry Smith PetscFunctionBegin; 149566063dSJacob Faibussowitsch PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 169566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yy)); 170da83c2eSBarry Smith for (i = 0; i < nblocks; i++) { 180da83c2eSBarry Smith bs = bsizes[i]; 190da83c2eSBarry Smith switch (bs) { 209371c9d4SSatish Balay case 1: yy[ncnt] = *diag * xx[ncnt]; break; 210da83c2eSBarry Smith case 2: 229371c9d4SSatish Balay x0 = xx[ncnt]; 239371c9d4SSatish Balay x1 = xx[ncnt + 1]; 240da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[2] * x1; 250da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1; 260da83c2eSBarry Smith break; 270da83c2eSBarry Smith case 3: 289371c9d4SSatish Balay x0 = xx[ncnt]; 299371c9d4SSatish Balay x1 = xx[ncnt + 1]; 309371c9d4SSatish Balay x2 = xx[ncnt + 2]; 310da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 320da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 330da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 340da83c2eSBarry Smith break; 350da83c2eSBarry Smith case 4: 369371c9d4SSatish Balay x0 = xx[ncnt]; 379371c9d4SSatish Balay x1 = xx[ncnt + 1]; 389371c9d4SSatish Balay x2 = xx[ncnt + 2]; 399371c9d4SSatish Balay x3 = xx[ncnt + 3]; 400da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 410da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 420da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 430da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 440da83c2eSBarry Smith break; 450da83c2eSBarry Smith case 5: 469371c9d4SSatish Balay x0 = xx[ncnt]; 479371c9d4SSatish Balay x1 = xx[ncnt + 1]; 489371c9d4SSatish Balay x2 = xx[ncnt + 2]; 499371c9d4SSatish Balay x3 = xx[ncnt + 3]; 509371c9d4SSatish Balay x4 = xx[ncnt + 4]; 510da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 520da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 530da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 540da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 550da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 560da83c2eSBarry Smith break; 570da83c2eSBarry Smith case 6: 589371c9d4SSatish Balay x0 = xx[ncnt]; 599371c9d4SSatish Balay x1 = xx[ncnt + 1]; 609371c9d4SSatish Balay x2 = xx[ncnt + 2]; 619371c9d4SSatish Balay x3 = xx[ncnt + 3]; 629371c9d4SSatish Balay x4 = xx[ncnt + 4]; 639371c9d4SSatish Balay x5 = xx[ncnt + 5]; 640da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 650da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 660da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 670da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 680da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 690da83c2eSBarry Smith yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 700da83c2eSBarry Smith break; 710da83c2eSBarry Smith case 7: 729371c9d4SSatish Balay x0 = xx[ncnt]; 739371c9d4SSatish Balay x1 = xx[ncnt + 1]; 749371c9d4SSatish Balay x2 = xx[ncnt + 2]; 759371c9d4SSatish Balay x3 = xx[ncnt + 3]; 769371c9d4SSatish Balay x4 = xx[ncnt + 4]; 779371c9d4SSatish Balay x5 = xx[ncnt + 5]; 789371c9d4SSatish Balay x6 = xx[ncnt + 6]; 790da83c2eSBarry Smith yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 800da83c2eSBarry Smith yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 810da83c2eSBarry Smith yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 820da83c2eSBarry Smith yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 830da83c2eSBarry Smith yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 840da83c2eSBarry Smith yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 850da83c2eSBarry Smith yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 860da83c2eSBarry Smith break; 870da83c2eSBarry Smith default: 880da83c2eSBarry Smith for (ib = 0; ib < bs; ib++) { 890da83c2eSBarry Smith PetscScalar rowsum = 0; 90ad540459SPierre Jolivet for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb]; 910da83c2eSBarry Smith yy[ncnt + ib] = rowsum; 920da83c2eSBarry Smith } 930da83c2eSBarry Smith } 940da83c2eSBarry Smith ncnt += bsizes[i]; 950da83c2eSBarry Smith diag += bsizes[i] * bsizes[i]; 960da83c2eSBarry Smith } 979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yy)); 990da83c2eSBarry Smith PetscFunctionReturn(0); 1000da83c2eSBarry Smith } 1010da83c2eSBarry Smith 1029371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) { 1030da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 1040da83c2eSBarry Smith Mat A = pc->pmat; 1050da83c2eSBarry Smith MatFactorError err; 1060da83c2eSBarry Smith PetscInt i, nsize = 0, nlocal; 1070da83c2eSBarry Smith PetscInt nblocks; 1080da83c2eSBarry Smith const PetscInt *bsizes; 1090da83c2eSBarry Smith 1100da83c2eSBarry Smith PetscFunctionBegin; 1119566063dSJacob Faibussowitsch PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 1129566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL)); 11308401ef6SPierre Jolivet PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 1140da83c2eSBarry Smith if (!jac->diag) { 1150da83c2eSBarry Smith for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i]; 1169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsize, &jac->diag)); 1170da83c2eSBarry Smith } 1189566063dSJacob Faibussowitsch PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag)); 1199566063dSJacob Faibussowitsch PetscCall(MatFactorGetError(A, &err)); 1200da83c2eSBarry Smith if (err) pc->failedreason = (PCFailedReason)err; 1210da83c2eSBarry Smith pc->ops->apply = PCApply_VPBJacobi; 1220da83c2eSBarry Smith PetscFunctionReturn(0); 1230da83c2eSBarry Smith } 1248a9c020eSBarry Smith 1259371c9d4SSatish Balay static PetscErrorCode PCSetUp_VPBJacobi(PC pc) { 126f1be3500SJunchao Zhang PetscFunctionBegin; 127f1be3500SJunchao Zhang /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */ 128f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 129f1be3500SJunchao Zhang PetscBool isCuda; 130f1be3500SJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 131f1be3500SJunchao Zhang #endif 132f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 133f1be3500SJunchao Zhang PetscBool isKok; 134f1be3500SJunchao Zhang PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 135f1be3500SJunchao Zhang #endif 136f1be3500SJunchao Zhang 137f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 1389371c9d4SSatish Balay if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); 1399371c9d4SSatish Balay else 140f1be3500SJunchao Zhang #endif 141f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1429371c9d4SSatish Balay if (isKok) 1439371c9d4SSatish Balay PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); 1449371c9d4SSatish Balay else 145f1be3500SJunchao Zhang #endif 1469371c9d4SSatish Balay { 1479371c9d4SSatish Balay PetscCall(PCSetUp_VPBJacobi_Host(pc)); 1489371c9d4SSatish Balay } 149f1be3500SJunchao Zhang PetscFunctionReturn(0); 150f1be3500SJunchao Zhang } 151f1be3500SJunchao Zhang 1529371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) { 1530da83c2eSBarry Smith PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 1540da83c2eSBarry Smith 1550da83c2eSBarry Smith PetscFunctionBegin; 1560da83c2eSBarry Smith /* 1570da83c2eSBarry Smith Free the private data structure that was hanging off the PC 1580da83c2eSBarry Smith */ 1599566063dSJacob Faibussowitsch PetscCall(PetscFree(jac->diag)); 1609566063dSJacob Faibussowitsch PetscCall(PetscFree(pc->data)); 1610da83c2eSBarry Smith PetscFunctionReturn(0); 1620da83c2eSBarry Smith } 1630da83c2eSBarry Smith 1640da83c2eSBarry Smith /*MC 1650da83c2eSBarry Smith PCVPBJACOBI - Variable size point block Jacobi preconditioner 1660da83c2eSBarry Smith 167*f1580f4eSBarry Smith Level: beginner 1680da83c2eSBarry Smith 169*f1580f4eSBarry Smith Notes: 170*f1580f4eSBarry Smith See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks 171*f1580f4eSBarry Smith 172*f1580f4eSBarry Smith This works for `MATAIJ` matrices 1730da83c2eSBarry Smith 1740da83c2eSBarry Smith Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 1750da83c2eSBarry Smith is detected a PETSc error is generated. 1760da83c2eSBarry Smith 177*f1580f4eSBarry Smith One must call `MatSetVariableBlockSizes()` to use this preconditioner 17890dfcc32SBarry Smith 1790da83c2eSBarry Smith Developer Notes: 180*f1580f4eSBarry Smith This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 1810da83c2eSBarry Smith the factorization to continue even after a zero pivot is found resulting in a Nan and hence 182*f1580f4eSBarry Smith terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing 1830da83c2eSBarry Smith a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 1840da83c2eSBarry Smith 1850da83c2eSBarry Smith Perhaps should provide an option that allows generation of a valid preconditioner 186*f1580f4eSBarry Smith even if a block is singular as the `PCJACOBI` does. 1870da83c2eSBarry Smith 188db781477SPatrick Sanan .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI` 1890da83c2eSBarry Smith M*/ 1900da83c2eSBarry Smith 1919371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) { 1920da83c2eSBarry Smith PC_VPBJacobi *jac; 1930da83c2eSBarry Smith 1940da83c2eSBarry Smith PetscFunctionBegin; 1950da83c2eSBarry Smith /* 1960da83c2eSBarry Smith Creates the private data structure for this preconditioner and 1970da83c2eSBarry Smith attach it to the PC object. 1980da83c2eSBarry Smith */ 1999566063dSJacob Faibussowitsch PetscCall(PetscNewLog(pc, &jac)); 2000da83c2eSBarry Smith pc->data = (void *)jac; 2010da83c2eSBarry Smith 2020da83c2eSBarry Smith /* 2030da83c2eSBarry Smith Initialize the pointers to vectors to ZERO; these will be used to store 2040da83c2eSBarry Smith diagonal entries of the matrix for fast preconditioner application. 2050da83c2eSBarry Smith */ 2060da83c2eSBarry Smith jac->diag = NULL; 2070da83c2eSBarry Smith 2080da83c2eSBarry Smith /* 2090da83c2eSBarry Smith Set the pointers for the functions that are provided above. 2100da83c2eSBarry Smith Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 2110da83c2eSBarry Smith are called, they will automatically call these functions. Note we 2120da83c2eSBarry Smith choose not to provide a couple of these functions since they are 2130da83c2eSBarry Smith not needed. 2140da83c2eSBarry Smith */ 2150da83c2eSBarry Smith pc->ops->apply = PCApply_VPBJacobi; 2160a545947SLisandro Dalcin pc->ops->applytranspose = NULL; 2170da83c2eSBarry Smith pc->ops->setup = PCSetUp_VPBJacobi; 2180da83c2eSBarry Smith pc->ops->destroy = PCDestroy_VPBJacobi; 2190a545947SLisandro Dalcin pc->ops->setfromoptions = NULL; 2200a545947SLisandro Dalcin pc->ops->applyrichardson = NULL; 2210a545947SLisandro Dalcin pc->ops->applysymmetricleft = NULL; 2220a545947SLisandro Dalcin pc->ops->applysymmetricright = NULL; 2230da83c2eSBarry Smith PetscFunctionReturn(0); 2240da83c2eSBarry Smith } 225