xref: /petsc/src/ksp/pc/impls/vpbjacobi/vpbjacobi.c (revision f1580f4e3ce5d5b2393648fd039d0d41b440385d)
1f1be3500SJunchao Zhang #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
20da83c2eSBarry Smith 
39371c9d4SSatish Balay static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) {
40da83c2eSBarry Smith   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
50da83c2eSBarry Smith   PetscInt           i, ncnt = 0;
60da83c2eSBarry Smith   const MatScalar   *diag = jac->diag;
70da83c2eSBarry Smith   PetscInt           ib, jb, bs;
80da83c2eSBarry Smith   const PetscScalar *xx;
90da83c2eSBarry Smith   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
100da83c2eSBarry Smith   PetscInt           nblocks;
110da83c2eSBarry Smith   const PetscInt    *bsizes;
120da83c2eSBarry Smith 
130da83c2eSBarry Smith   PetscFunctionBegin;
149566063dSJacob Faibussowitsch   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
170da83c2eSBarry Smith   for (i = 0; i < nblocks; i++) {
180da83c2eSBarry Smith     bs = bsizes[i];
190da83c2eSBarry Smith     switch (bs) {
209371c9d4SSatish Balay     case 1: yy[ncnt] = *diag * xx[ncnt]; break;
210da83c2eSBarry Smith     case 2:
229371c9d4SSatish Balay       x0           = xx[ncnt];
239371c9d4SSatish Balay       x1           = xx[ncnt + 1];
240da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[2] * x1;
250da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
260da83c2eSBarry Smith       break;
270da83c2eSBarry Smith     case 3:
289371c9d4SSatish Balay       x0           = xx[ncnt];
299371c9d4SSatish Balay       x1           = xx[ncnt + 1];
309371c9d4SSatish Balay       x2           = xx[ncnt + 2];
310da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
320da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
330da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
340da83c2eSBarry Smith       break;
350da83c2eSBarry Smith     case 4:
369371c9d4SSatish Balay       x0           = xx[ncnt];
379371c9d4SSatish Balay       x1           = xx[ncnt + 1];
389371c9d4SSatish Balay       x2           = xx[ncnt + 2];
399371c9d4SSatish Balay       x3           = xx[ncnt + 3];
400da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
410da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
420da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
430da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
440da83c2eSBarry Smith       break;
450da83c2eSBarry Smith     case 5:
469371c9d4SSatish Balay       x0           = xx[ncnt];
479371c9d4SSatish Balay       x1           = xx[ncnt + 1];
489371c9d4SSatish Balay       x2           = xx[ncnt + 2];
499371c9d4SSatish Balay       x3           = xx[ncnt + 3];
509371c9d4SSatish Balay       x4           = xx[ncnt + 4];
510da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
520da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
530da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
540da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
550da83c2eSBarry Smith       yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
560da83c2eSBarry Smith       break;
570da83c2eSBarry Smith     case 6:
589371c9d4SSatish Balay       x0           = xx[ncnt];
599371c9d4SSatish Balay       x1           = xx[ncnt + 1];
609371c9d4SSatish Balay       x2           = xx[ncnt + 2];
619371c9d4SSatish Balay       x3           = xx[ncnt + 3];
629371c9d4SSatish Balay       x4           = xx[ncnt + 4];
639371c9d4SSatish Balay       x5           = xx[ncnt + 5];
640da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
650da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
660da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
670da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
680da83c2eSBarry Smith       yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
690da83c2eSBarry Smith       yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
700da83c2eSBarry Smith       break;
710da83c2eSBarry Smith     case 7:
729371c9d4SSatish Balay       x0           = xx[ncnt];
739371c9d4SSatish Balay       x1           = xx[ncnt + 1];
749371c9d4SSatish Balay       x2           = xx[ncnt + 2];
759371c9d4SSatish Balay       x3           = xx[ncnt + 3];
769371c9d4SSatish Balay       x4           = xx[ncnt + 4];
779371c9d4SSatish Balay       x5           = xx[ncnt + 5];
789371c9d4SSatish Balay       x6           = xx[ncnt + 6];
790da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
800da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
810da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
820da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
830da83c2eSBarry Smith       yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
840da83c2eSBarry Smith       yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
850da83c2eSBarry Smith       yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
860da83c2eSBarry Smith       break;
870da83c2eSBarry Smith     default:
880da83c2eSBarry Smith       for (ib = 0; ib < bs; ib++) {
890da83c2eSBarry Smith         PetscScalar rowsum = 0;
90ad540459SPierre Jolivet         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
910da83c2eSBarry Smith         yy[ncnt + ib] = rowsum;
920da83c2eSBarry Smith       }
930da83c2eSBarry Smith     }
940da83c2eSBarry Smith     ncnt += bsizes[i];
950da83c2eSBarry Smith     diag += bsizes[i] * bsizes[i];
960da83c2eSBarry Smith   }
979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
990da83c2eSBarry Smith   PetscFunctionReturn(0);
1000da83c2eSBarry Smith }
1010da83c2eSBarry Smith 
1029371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) {
1030da83c2eSBarry Smith   PC_VPBJacobi   *jac = (PC_VPBJacobi *)pc->data;
1040da83c2eSBarry Smith   Mat             A   = pc->pmat;
1050da83c2eSBarry Smith   MatFactorError  err;
1060da83c2eSBarry Smith   PetscInt        i, nsize = 0, nlocal;
1070da83c2eSBarry Smith   PetscInt        nblocks;
1080da83c2eSBarry Smith   const PetscInt *bsizes;
1090da83c2eSBarry Smith 
1100da83c2eSBarry Smith   PetscFunctionBegin;
1119566063dSJacob Faibussowitsch   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
1129566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL));
11308401ef6SPierre Jolivet   PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
1140da83c2eSBarry Smith   if (!jac->diag) {
1150da83c2eSBarry Smith     for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i];
1169566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nsize, &jac->diag));
1170da83c2eSBarry Smith   }
1189566063dSJacob Faibussowitsch   PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag));
1199566063dSJacob Faibussowitsch   PetscCall(MatFactorGetError(A, &err));
1200da83c2eSBarry Smith   if (err) pc->failedreason = (PCFailedReason)err;
1210da83c2eSBarry Smith   pc->ops->apply = PCApply_VPBJacobi;
1220da83c2eSBarry Smith   PetscFunctionReturn(0);
1230da83c2eSBarry Smith }
1248a9c020eSBarry Smith 
1259371c9d4SSatish Balay static PetscErrorCode PCSetUp_VPBJacobi(PC pc) {
126f1be3500SJunchao Zhang   PetscFunctionBegin;
127f1be3500SJunchao Zhang   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
128f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
129f1be3500SJunchao Zhang   PetscBool isCuda;
130f1be3500SJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
131f1be3500SJunchao Zhang #endif
132f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
133f1be3500SJunchao Zhang   PetscBool isKok;
134f1be3500SJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
135f1be3500SJunchao Zhang #endif
136f1be3500SJunchao Zhang 
137f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
1389371c9d4SSatish Balay   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc));
1399371c9d4SSatish Balay   else
140f1be3500SJunchao Zhang #endif
141f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1429371c9d4SSatish Balay     if (isKok)
1439371c9d4SSatish Balay     PetscCall(PCSetUp_VPBJacobi_Kokkos(pc));
1449371c9d4SSatish Balay   else
145f1be3500SJunchao Zhang #endif
1469371c9d4SSatish Balay   {
1479371c9d4SSatish Balay     PetscCall(PCSetUp_VPBJacobi_Host(pc));
1489371c9d4SSatish Balay   }
149f1be3500SJunchao Zhang   PetscFunctionReturn(0);
150f1be3500SJunchao Zhang }
151f1be3500SJunchao Zhang 
1529371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) {
1530da83c2eSBarry Smith   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
1540da83c2eSBarry Smith 
1550da83c2eSBarry Smith   PetscFunctionBegin;
1560da83c2eSBarry Smith   /*
1570da83c2eSBarry Smith       Free the private data structure that was hanging off the PC
1580da83c2eSBarry Smith   */
1599566063dSJacob Faibussowitsch   PetscCall(PetscFree(jac->diag));
1609566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
1610da83c2eSBarry Smith   PetscFunctionReturn(0);
1620da83c2eSBarry Smith }
1630da83c2eSBarry Smith 
1640da83c2eSBarry Smith /*MC
1650da83c2eSBarry Smith      PCVPBJACOBI - Variable size point block Jacobi preconditioner
1660da83c2eSBarry Smith 
167*f1580f4eSBarry Smith    Level: beginner
1680da83c2eSBarry Smith 
169*f1580f4eSBarry Smith    Notes:
170*f1580f4eSBarry Smith      See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks
171*f1580f4eSBarry Smith 
172*f1580f4eSBarry Smith      This works for `MATAIJ` matrices
1730da83c2eSBarry Smith 
1740da83c2eSBarry Smith      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
1750da83c2eSBarry Smith      is detected a PETSc error is generated.
1760da83c2eSBarry Smith 
177*f1580f4eSBarry Smith      One must call `MatSetVariableBlockSizes()` to use this preconditioner
17890dfcc32SBarry Smith 
1790da83c2eSBarry Smith    Developer Notes:
180*f1580f4eSBarry Smith      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
1810da83c2eSBarry Smith      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
182*f1580f4eSBarry Smith      terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing
1830da83c2eSBarry Smith      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
1840da83c2eSBarry Smith 
1850da83c2eSBarry Smith      Perhaps should provide an option that allows generation of a valid preconditioner
186*f1580f4eSBarry Smith      even if a block is singular as the `PCJACOBI` does.
1870da83c2eSBarry Smith 
188db781477SPatrick Sanan .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
1890da83c2eSBarry Smith M*/
1900da83c2eSBarry Smith 
1919371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) {
1920da83c2eSBarry Smith   PC_VPBJacobi *jac;
1930da83c2eSBarry Smith 
1940da83c2eSBarry Smith   PetscFunctionBegin;
1950da83c2eSBarry Smith   /*
1960da83c2eSBarry Smith      Creates the private data structure for this preconditioner and
1970da83c2eSBarry Smith      attach it to the PC object.
1980da83c2eSBarry Smith   */
1999566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(pc, &jac));
2000da83c2eSBarry Smith   pc->data = (void *)jac;
2010da83c2eSBarry Smith 
2020da83c2eSBarry Smith   /*
2030da83c2eSBarry Smith      Initialize the pointers to vectors to ZERO; these will be used to store
2040da83c2eSBarry Smith      diagonal entries of the matrix for fast preconditioner application.
2050da83c2eSBarry Smith   */
2060da83c2eSBarry Smith   jac->diag = NULL;
2070da83c2eSBarry Smith 
2080da83c2eSBarry Smith   /*
2090da83c2eSBarry Smith       Set the pointers for the functions that are provided above.
2100da83c2eSBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
2110da83c2eSBarry Smith       are called, they will automatically call these functions.  Note we
2120da83c2eSBarry Smith       choose not to provide a couple of these functions since they are
2130da83c2eSBarry Smith       not needed.
2140da83c2eSBarry Smith   */
2150da83c2eSBarry Smith   pc->ops->apply               = PCApply_VPBJacobi;
2160a545947SLisandro Dalcin   pc->ops->applytranspose      = NULL;
2170da83c2eSBarry Smith   pc->ops->setup               = PCSetUp_VPBJacobi;
2180da83c2eSBarry Smith   pc->ops->destroy             = PCDestroy_VPBJacobi;
2190a545947SLisandro Dalcin   pc->ops->setfromoptions      = NULL;
2200a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
2210a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
2220a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
2230da83c2eSBarry Smith   PetscFunctionReturn(0);
2240da83c2eSBarry Smith }
225