xref: /petsc/src/ksp/pc/impls/vpbjacobi/vpbjacobi.c (revision f1be35001843913b137132cb0fff0c609e27f59e)
1*f1be3500SJunchao Zhang #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
20da83c2eSBarry Smith 
30da83c2eSBarry Smith static PetscErrorCode PCApply_VPBJacobi(PC pc,Vec x,Vec y)
40da83c2eSBarry Smith {
50da83c2eSBarry Smith   PC_VPBJacobi      *jac = (PC_VPBJacobi*)pc->data;
60da83c2eSBarry Smith   PetscInt          i,ncnt = 0;
70da83c2eSBarry Smith   const MatScalar   *diag = jac->diag;
80da83c2eSBarry Smith   PetscInt          ib,jb,bs;
90da83c2eSBarry Smith   const PetscScalar *xx;
100da83c2eSBarry Smith   PetscScalar       *yy,x0,x1,x2,x3,x4,x5,x6;
110da83c2eSBarry Smith   PetscInt          nblocks;
120da83c2eSBarry Smith   const PetscInt    *bsizes;
130da83c2eSBarry Smith 
140da83c2eSBarry Smith   PetscFunctionBegin;
159566063dSJacob Faibussowitsch   PetscCall(MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes));
169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&xx));
179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&yy));
180da83c2eSBarry Smith   for (i=0; i<nblocks; i++) {
190da83c2eSBarry Smith     bs = bsizes[i];
200da83c2eSBarry Smith     switch (bs) {
210da83c2eSBarry Smith     case 1:
220da83c2eSBarry Smith       yy[ncnt] = *diag*xx[ncnt];
230da83c2eSBarry Smith       break;
240da83c2eSBarry Smith     case 2:
250da83c2eSBarry Smith       x0         = xx[ncnt]; x1 = xx[ncnt+1];
260da83c2eSBarry Smith       yy[ncnt]   = diag[0]*x0 + diag[2]*x1;
270da83c2eSBarry Smith       yy[ncnt+1] = diag[1]*x0 + diag[3]*x1;
280da83c2eSBarry Smith       break;
290da83c2eSBarry Smith     case 3:
300da83c2eSBarry Smith       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2];
310da83c2eSBarry Smith       yy[ncnt]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
320da83c2eSBarry Smith       yy[ncnt+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
330da83c2eSBarry Smith       yy[ncnt+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
340da83c2eSBarry Smith       break;
350da83c2eSBarry Smith     case 4:
360da83c2eSBarry Smith       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3];
370da83c2eSBarry Smith       yy[ncnt]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
380da83c2eSBarry Smith       yy[ncnt+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
390da83c2eSBarry Smith       yy[ncnt+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
400da83c2eSBarry Smith       yy[ncnt+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
410da83c2eSBarry Smith       break;
420da83c2eSBarry Smith     case 5:
430da83c2eSBarry Smith       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4];
440da83c2eSBarry Smith       yy[ncnt]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
450da83c2eSBarry Smith       yy[ncnt+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
460da83c2eSBarry Smith       yy[ncnt+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
470da83c2eSBarry Smith       yy[ncnt+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
480da83c2eSBarry Smith       yy[ncnt+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
490da83c2eSBarry Smith       break;
500da83c2eSBarry Smith     case 6:
510da83c2eSBarry Smith       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5];
520da83c2eSBarry Smith       yy[ncnt]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
530da83c2eSBarry Smith       yy[ncnt+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
540da83c2eSBarry Smith       yy[ncnt+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
550da83c2eSBarry Smith       yy[ncnt+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
560da83c2eSBarry Smith       yy[ncnt+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
570da83c2eSBarry Smith       yy[ncnt+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
580da83c2eSBarry Smith       break;
590da83c2eSBarry Smith     case 7:
600da83c2eSBarry Smith       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; x6 = xx[ncnt+6];
610da83c2eSBarry Smith       yy[ncnt]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
620da83c2eSBarry Smith       yy[ncnt+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
630da83c2eSBarry Smith       yy[ncnt+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
640da83c2eSBarry Smith       yy[ncnt+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
650da83c2eSBarry Smith       yy[ncnt+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
660da83c2eSBarry Smith       yy[ncnt+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
670da83c2eSBarry Smith       yy[ncnt+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
680da83c2eSBarry Smith       break;
690da83c2eSBarry Smith     default:
700da83c2eSBarry Smith       for (ib=0; ib<bs; ib++) {
710da83c2eSBarry Smith         PetscScalar rowsum = 0;
720da83c2eSBarry Smith         for (jb=0; jb<bs; jb++) {
730da83c2eSBarry Smith           rowsum += diag[ib+jb*bs] * xx[ncnt+jb];
740da83c2eSBarry Smith         }
750da83c2eSBarry Smith         yy[ncnt+ib] = rowsum;
760da83c2eSBarry Smith       }
770da83c2eSBarry Smith     }
780da83c2eSBarry Smith     ncnt += bsizes[i];
790da83c2eSBarry Smith     diag += bsizes[i]*bsizes[i];
800da83c2eSBarry Smith   }
819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&xx));
829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&yy));
830da83c2eSBarry Smith   PetscFunctionReturn(0);
840da83c2eSBarry Smith }
850da83c2eSBarry Smith 
86*f1be3500SJunchao Zhang PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc)
870da83c2eSBarry Smith {
880da83c2eSBarry Smith   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;
890da83c2eSBarry Smith   Mat            A = pc->pmat;
900da83c2eSBarry Smith   MatFactorError err;
910da83c2eSBarry Smith   PetscInt       i,nsize = 0,nlocal;
920da83c2eSBarry Smith   PetscInt       nblocks;
930da83c2eSBarry Smith   const PetscInt *bsizes;
940da83c2eSBarry Smith 
950da83c2eSBarry Smith   PetscFunctionBegin;
969566063dSJacob Faibussowitsch   PetscCall(MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes));
979566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(pc->pmat,&nlocal,NULL));
9808401ef6SPierre Jolivet   PetscCheck(!nlocal || nblocks,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
990da83c2eSBarry Smith   if (!jac->diag) {
1000da83c2eSBarry Smith     for (i=0; i<nblocks; i++) nsize += bsizes[i]*bsizes[i];
1019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nsize,&jac->diag));
1020da83c2eSBarry Smith   }
1039566063dSJacob Faibussowitsch   PetscCall(MatInvertVariableBlockDiagonal(A,nblocks,bsizes,jac->diag));
1049566063dSJacob Faibussowitsch   PetscCall(MatFactorGetError(A,&err));
1050da83c2eSBarry Smith   if (err) pc->failedreason = (PCFailedReason)err;
1060da83c2eSBarry Smith   pc->ops->apply = PCApply_VPBJacobi;
1070da83c2eSBarry Smith   PetscFunctionReturn(0);
1080da83c2eSBarry Smith }
1098a9c020eSBarry Smith 
110*f1be3500SJunchao Zhang static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
111*f1be3500SJunchao Zhang {
112*f1be3500SJunchao Zhang   PetscFunctionBegin;
113*f1be3500SJunchao Zhang   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
114*f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
115*f1be3500SJunchao Zhang   PetscBool isCuda;
116*f1be3500SJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat,&isCuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
117*f1be3500SJunchao Zhang #endif
118*f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
119*f1be3500SJunchao Zhang   PetscBool isKok;
120*f1be3500SJunchao Zhang   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat,&isKok,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
121*f1be3500SJunchao Zhang #endif
122*f1be3500SJunchao Zhang 
123*f1be3500SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
124*f1be3500SJunchao Zhang   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); else
125*f1be3500SJunchao Zhang #endif
126*f1be3500SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS_KERNELS)
127*f1be3500SJunchao Zhang   if (isKok) PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); else
128*f1be3500SJunchao Zhang #endif
129*f1be3500SJunchao Zhang   {PetscCall(PCSetUp_VPBJacobi_Host(pc));}
130*f1be3500SJunchao Zhang   PetscFunctionReturn(0);
131*f1be3500SJunchao Zhang }
132*f1be3500SJunchao Zhang 
133*f1be3500SJunchao Zhang PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc)
1340da83c2eSBarry Smith {
1350da83c2eSBarry Smith   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;
1360da83c2eSBarry Smith 
1370da83c2eSBarry Smith   PetscFunctionBegin;
1380da83c2eSBarry Smith   /*
1390da83c2eSBarry Smith       Free the private data structure that was hanging off the PC
1400da83c2eSBarry Smith   */
1419566063dSJacob Faibussowitsch   PetscCall(PetscFree(jac->diag));
1429566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
1430da83c2eSBarry Smith   PetscFunctionReturn(0);
1440da83c2eSBarry Smith }
1450da83c2eSBarry Smith 
1460da83c2eSBarry Smith /*MC
1470da83c2eSBarry Smith      PCVPBJACOBI - Variable size point block Jacobi preconditioner
1480da83c2eSBarry Smith 
1490da83c2eSBarry Smith    Notes:
1500da83c2eSBarry Smith      See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks
1510da83c2eSBarry Smith 
1520da83c2eSBarry Smith      This works for AIJ matrices
1530da83c2eSBarry Smith 
1540da83c2eSBarry Smith      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
1550da83c2eSBarry Smith      is detected a PETSc error is generated.
1560da83c2eSBarry Smith 
1570da83c2eSBarry Smith      One must call MatSetVariableBlockSizes() to use this preconditioner
15890dfcc32SBarry Smith 
1590da83c2eSBarry Smith    Developer Notes:
1600da83c2eSBarry Smith      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
1610da83c2eSBarry Smith      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
1620da83c2eSBarry Smith      terminating KSP with a KSP_DIVERGED_NANORIF allowing
1630da83c2eSBarry Smith      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
1640da83c2eSBarry Smith 
1650da83c2eSBarry Smith      Perhaps should provide an option that allows generation of a valid preconditioner
1660da83c2eSBarry Smith      even if a block is singular as the PCJACOBI does.
1670da83c2eSBarry Smith 
1680da83c2eSBarry Smith    Level: beginner
1690da83c2eSBarry Smith 
170db781477SPatrick Sanan .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
1710da83c2eSBarry Smith 
1720da83c2eSBarry Smith M*/
1730da83c2eSBarry Smith 
1740da83c2eSBarry Smith PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
1750da83c2eSBarry Smith {
1760da83c2eSBarry Smith   PC_VPBJacobi   *jac;
1770da83c2eSBarry Smith 
1780da83c2eSBarry Smith   PetscFunctionBegin;
1790da83c2eSBarry Smith   /*
1800da83c2eSBarry Smith      Creates the private data structure for this preconditioner and
1810da83c2eSBarry Smith      attach it to the PC object.
1820da83c2eSBarry Smith   */
1839566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(pc,&jac));
1840da83c2eSBarry Smith   pc->data = (void*)jac;
1850da83c2eSBarry Smith 
1860da83c2eSBarry Smith   /*
1870da83c2eSBarry Smith      Initialize the pointers to vectors to ZERO; these will be used to store
1880da83c2eSBarry Smith      diagonal entries of the matrix for fast preconditioner application.
1890da83c2eSBarry Smith   */
1900da83c2eSBarry Smith   jac->diag = NULL;
1910da83c2eSBarry Smith 
1920da83c2eSBarry Smith   /*
1930da83c2eSBarry Smith       Set the pointers for the functions that are provided above.
1940da83c2eSBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
1950da83c2eSBarry Smith       are called, they will automatically call these functions.  Note we
1960da83c2eSBarry Smith       choose not to provide a couple of these functions since they are
1970da83c2eSBarry Smith       not needed.
1980da83c2eSBarry Smith   */
1990da83c2eSBarry Smith   pc->ops->apply               = PCApply_VPBJacobi;
2000a545947SLisandro Dalcin   pc->ops->applytranspose      = NULL;
2010da83c2eSBarry Smith   pc->ops->setup               = PCSetUp_VPBJacobi;
2020da83c2eSBarry Smith   pc->ops->destroy             = PCDestroy_VPBJacobi;
2030a545947SLisandro Dalcin   pc->ops->setfromoptions      = NULL;
2040a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
2050a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
2060a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
2070da83c2eSBarry Smith   PetscFunctionReturn(0);
2080da83c2eSBarry Smith }
209