xref: /petsc/src/ksp/pc/impls/vpbjacobi/vpbjacobi.c (revision ad540459ab38c4a232710a68077e487eb6627fe2)
1f1be3500SJunchao Zhang #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
20da83c2eSBarry Smith 
39371c9d4SSatish Balay static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) {
40da83c2eSBarry Smith   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
50da83c2eSBarry Smith   PetscInt           i, ncnt = 0;
60da83c2eSBarry Smith   const MatScalar   *diag = jac->diag;
70da83c2eSBarry Smith   PetscInt           ib, jb, bs;
80da83c2eSBarry Smith   const PetscScalar *xx;
90da83c2eSBarry Smith   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
100da83c2eSBarry Smith   PetscInt           nblocks;
110da83c2eSBarry Smith   const PetscInt    *bsizes;
120da83c2eSBarry Smith 
130da83c2eSBarry Smith   PetscFunctionBegin;
149566063dSJacob Faibussowitsch   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &xx));
169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &yy));
170da83c2eSBarry Smith   for (i = 0; i < nblocks; i++) {
180da83c2eSBarry Smith     bs = bsizes[i];
190da83c2eSBarry Smith     switch (bs) {
209371c9d4SSatish Balay     case 1: yy[ncnt] = *diag * xx[ncnt]; break;
210da83c2eSBarry Smith     case 2:
229371c9d4SSatish Balay       x0           = xx[ncnt];
239371c9d4SSatish Balay       x1           = xx[ncnt + 1];
240da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[2] * x1;
250da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
260da83c2eSBarry Smith       break;
270da83c2eSBarry Smith     case 3:
289371c9d4SSatish Balay       x0           = xx[ncnt];
299371c9d4SSatish Balay       x1           = xx[ncnt + 1];
309371c9d4SSatish Balay       x2           = xx[ncnt + 2];
310da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
320da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
330da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
340da83c2eSBarry Smith       break;
350da83c2eSBarry Smith     case 4:
369371c9d4SSatish Balay       x0           = xx[ncnt];
379371c9d4SSatish Balay       x1           = xx[ncnt + 1];
389371c9d4SSatish Balay       x2           = xx[ncnt + 2];
399371c9d4SSatish Balay       x3           = xx[ncnt + 3];
400da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
410da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
420da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
430da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
440da83c2eSBarry Smith       break;
450da83c2eSBarry Smith     case 5:
469371c9d4SSatish Balay       x0           = xx[ncnt];
479371c9d4SSatish Balay       x1           = xx[ncnt + 1];
489371c9d4SSatish Balay       x2           = xx[ncnt + 2];
499371c9d4SSatish Balay       x3           = xx[ncnt + 3];
509371c9d4SSatish Balay       x4           = xx[ncnt + 4];
510da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
520da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
530da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
540da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
550da83c2eSBarry Smith       yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
560da83c2eSBarry Smith       break;
570da83c2eSBarry Smith     case 6:
589371c9d4SSatish Balay       x0           = xx[ncnt];
599371c9d4SSatish Balay       x1           = xx[ncnt + 1];
609371c9d4SSatish Balay       x2           = xx[ncnt + 2];
619371c9d4SSatish Balay       x3           = xx[ncnt + 3];
629371c9d4SSatish Balay       x4           = xx[ncnt + 4];
639371c9d4SSatish Balay       x5           = xx[ncnt + 5];
640da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
650da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
660da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
670da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
680da83c2eSBarry Smith       yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
690da83c2eSBarry Smith       yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
700da83c2eSBarry Smith       break;
710da83c2eSBarry Smith     case 7:
729371c9d4SSatish Balay       x0           = xx[ncnt];
739371c9d4SSatish Balay       x1           = xx[ncnt + 1];
749371c9d4SSatish Balay       x2           = xx[ncnt + 2];
759371c9d4SSatish Balay       x3           = xx[ncnt + 3];
769371c9d4SSatish Balay       x4           = xx[ncnt + 4];
779371c9d4SSatish Balay       x5           = xx[ncnt + 5];
789371c9d4SSatish Balay       x6           = xx[ncnt + 6];
790da83c2eSBarry Smith       yy[ncnt]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
800da83c2eSBarry Smith       yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
810da83c2eSBarry Smith       yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
820da83c2eSBarry Smith       yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
830da83c2eSBarry Smith       yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
840da83c2eSBarry Smith       yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
850da83c2eSBarry Smith       yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
860da83c2eSBarry Smith       break;
870da83c2eSBarry Smith     default:
880da83c2eSBarry Smith       for (ib = 0; ib < bs; ib++) {
890da83c2eSBarry Smith         PetscScalar rowsum = 0;
90*ad540459SPierre Jolivet         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
910da83c2eSBarry Smith         yy[ncnt + ib] = rowsum;
920da83c2eSBarry Smith       }
930da83c2eSBarry Smith     }
940da83c2eSBarry Smith     ncnt += bsizes[i];
950da83c2eSBarry Smith     diag += bsizes[i] * bsizes[i];
960da83c2eSBarry Smith   }
979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &xx));
989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &yy));
990da83c2eSBarry Smith   PetscFunctionReturn(0);
1000da83c2eSBarry Smith }
1010da83c2eSBarry Smith 
1029371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) {
1030da83c2eSBarry Smith   PC_VPBJacobi   *jac = (PC_VPBJacobi *)pc->data;
1040da83c2eSBarry Smith   Mat             A   = pc->pmat;
1050da83c2eSBarry Smith   MatFactorError  err;
1060da83c2eSBarry Smith   PetscInt        i, nsize = 0, nlocal;
1070da83c2eSBarry Smith   PetscInt        nblocks;
1080da83c2eSBarry Smith   const PetscInt *bsizes;
1090da83c2eSBarry Smith 
1100da83c2eSBarry Smith   PetscFunctionBegin;
1119566063dSJacob Faibussowitsch   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
1129566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL));
11308401ef6SPierre Jolivet   PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
1140da83c2eSBarry Smith   if (!jac->diag) {
1150da83c2eSBarry Smith     for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i];
1169566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nsize, &jac->diag));
1170da83c2eSBarry Smith   }
1189566063dSJacob Faibussowitsch   PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag));
1199566063dSJacob Faibussowitsch   PetscCall(MatFactorGetError(A, &err));
1200da83c2eSBarry Smith   if (err) pc->failedreason = (PCFailedReason)err;
1210da83c2eSBarry Smith   pc->ops->apply = PCApply_VPBJacobi;
1220da83c2eSBarry Smith   PetscFunctionReturn(0);
1230da83c2eSBarry Smith }
1248a9c020eSBarry Smith 
1259371c9d4SSatish Balay static PetscErrorCode PCSetUp_VPBJacobi(PC pc) {
126f1be3500SJunchao Zhang   PetscFunctionBegin;
127f1be3500SJunchao Zhang   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
128f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
129f1be3500SJunchao Zhang   PetscBool isCuda;
130f1be3500SJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
131f1be3500SJunchao Zhang #endif
132f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
133f1be3500SJunchao Zhang   PetscBool isKok;
134f1be3500SJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
135f1be3500SJunchao Zhang #endif
136f1be3500SJunchao Zhang 
137f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
1389371c9d4SSatish Balay   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc));
1399371c9d4SSatish Balay   else
140f1be3500SJunchao Zhang #endif
141f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1429371c9d4SSatish Balay     if (isKok)
1439371c9d4SSatish Balay     PetscCall(PCSetUp_VPBJacobi_Kokkos(pc));
1449371c9d4SSatish Balay   else
145f1be3500SJunchao Zhang #endif
1469371c9d4SSatish Balay   {
1479371c9d4SSatish Balay     PetscCall(PCSetUp_VPBJacobi_Host(pc));
1489371c9d4SSatish Balay   }
149f1be3500SJunchao Zhang   PetscFunctionReturn(0);
150f1be3500SJunchao Zhang }
151f1be3500SJunchao Zhang 
1529371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) {
1530da83c2eSBarry Smith   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
1540da83c2eSBarry Smith 
1550da83c2eSBarry Smith   PetscFunctionBegin;
1560da83c2eSBarry Smith   /*
1570da83c2eSBarry Smith       Free the private data structure that was hanging off the PC
1580da83c2eSBarry Smith   */
1599566063dSJacob Faibussowitsch   PetscCall(PetscFree(jac->diag));
1609566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
1610da83c2eSBarry Smith   PetscFunctionReturn(0);
1620da83c2eSBarry Smith }
1630da83c2eSBarry Smith 
1640da83c2eSBarry Smith /*MC
1650da83c2eSBarry Smith      PCVPBJACOBI - Variable size point block Jacobi preconditioner
1660da83c2eSBarry Smith 
1670da83c2eSBarry Smith    Notes:
1680da83c2eSBarry Smith      See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks
1690da83c2eSBarry Smith 
1700da83c2eSBarry Smith      This works for AIJ matrices
1710da83c2eSBarry Smith 
1720da83c2eSBarry Smith      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
1730da83c2eSBarry Smith      is detected a PETSc error is generated.
1740da83c2eSBarry Smith 
1750da83c2eSBarry Smith      One must call MatSetVariableBlockSizes() to use this preconditioner
17690dfcc32SBarry Smith 
1770da83c2eSBarry Smith    Developer Notes:
1780da83c2eSBarry Smith      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
1790da83c2eSBarry Smith      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
1800da83c2eSBarry Smith      terminating KSP with a KSP_DIVERGED_NANORIF allowing
1810da83c2eSBarry Smith      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
1820da83c2eSBarry Smith 
1830da83c2eSBarry Smith      Perhaps should provide an option that allows generation of a valid preconditioner
1840da83c2eSBarry Smith      even if a block is singular as the PCJACOBI does.
1850da83c2eSBarry Smith 
1860da83c2eSBarry Smith    Level: beginner
1870da83c2eSBarry Smith 
188db781477SPatrick Sanan .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
1890da83c2eSBarry Smith 
1900da83c2eSBarry Smith M*/
1910da83c2eSBarry Smith 
1929371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) {
1930da83c2eSBarry Smith   PC_VPBJacobi *jac;
1940da83c2eSBarry Smith 
1950da83c2eSBarry Smith   PetscFunctionBegin;
1960da83c2eSBarry Smith   /*
1970da83c2eSBarry Smith      Creates the private data structure for this preconditioner and
1980da83c2eSBarry Smith      attach it to the PC object.
1990da83c2eSBarry Smith   */
2009566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(pc, &jac));
2010da83c2eSBarry Smith   pc->data = (void *)jac;
2020da83c2eSBarry Smith 
2030da83c2eSBarry Smith   /*
2040da83c2eSBarry Smith      Initialize the pointers to vectors to ZERO; these will be used to store
2050da83c2eSBarry Smith      diagonal entries of the matrix for fast preconditioner application.
2060da83c2eSBarry Smith   */
2070da83c2eSBarry Smith   jac->diag = NULL;
2080da83c2eSBarry Smith 
2090da83c2eSBarry Smith   /*
2100da83c2eSBarry Smith       Set the pointers for the functions that are provided above.
2110da83c2eSBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
2120da83c2eSBarry Smith       are called, they will automatically call these functions.  Note we
2130da83c2eSBarry Smith       choose not to provide a couple of these functions since they are
2140da83c2eSBarry Smith       not needed.
2150da83c2eSBarry Smith   */
2160da83c2eSBarry Smith   pc->ops->apply               = PCApply_VPBJacobi;
2170a545947SLisandro Dalcin   pc->ops->applytranspose      = NULL;
2180da83c2eSBarry Smith   pc->ops->setup               = PCSetUp_VPBJacobi;
2190da83c2eSBarry Smith   pc->ops->destroy             = PCDestroy_VPBJacobi;
2200a545947SLisandro Dalcin   pc->ops->setfromoptions      = NULL;
2210a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
2220a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
2230a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
2240da83c2eSBarry Smith   PetscFunctionReturn(0);
2250da83c2eSBarry Smith }
226