xref: /petsc/src/ksp/pc/impls/vpbjacobi/vpbjacobi.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
2 
3 static PetscErrorCode PCApply_VPBJacobi(PC pc,Vec x,Vec y)
4 {
5   PC_VPBJacobi      *jac = (PC_VPBJacobi*)pc->data;
6   PetscInt          i,ncnt = 0;
7   const MatScalar   *diag = jac->diag;
8   PetscInt          ib,jb,bs;
9   const PetscScalar *xx;
10   PetscScalar       *yy,x0,x1,x2,x3,x4,x5,x6;
11   PetscInt          nblocks;
12   const PetscInt    *bsizes;
13 
14   PetscFunctionBegin;
15   PetscCall(MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes));
16   PetscCall(VecGetArrayRead(x,&xx));
17   PetscCall(VecGetArray(y,&yy));
18   for (i=0; i<nblocks; i++) {
19     bs = bsizes[i];
20     switch (bs) {
21     case 1:
22       yy[ncnt] = *diag*xx[ncnt];
23       break;
24     case 2:
25       x0         = xx[ncnt]; x1 = xx[ncnt+1];
26       yy[ncnt]   = diag[0]*x0 + diag[2]*x1;
27       yy[ncnt+1] = diag[1]*x0 + diag[3]*x1;
28       break;
29     case 3:
30       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2];
31       yy[ncnt]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
32       yy[ncnt+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
33       yy[ncnt+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
34       break;
35     case 4:
36       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3];
37       yy[ncnt]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
38       yy[ncnt+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
39       yy[ncnt+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
40       yy[ncnt+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
41       break;
42     case 5:
43       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4];
44       yy[ncnt]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
45       yy[ncnt+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
46       yy[ncnt+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
47       yy[ncnt+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
48       yy[ncnt+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
49       break;
50     case 6:
51       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5];
52       yy[ncnt]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
53       yy[ncnt+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
54       yy[ncnt+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
55       yy[ncnt+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
56       yy[ncnt+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
57       yy[ncnt+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
58       break;
59     case 7:
60       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; x6 = xx[ncnt+6];
61       yy[ncnt]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
62       yy[ncnt+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
63       yy[ncnt+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
64       yy[ncnt+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
65       yy[ncnt+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
66       yy[ncnt+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
67       yy[ncnt+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
68       break;
69     default:
70       for (ib=0; ib<bs; ib++) {
71         PetscScalar rowsum = 0;
72         for (jb=0; jb<bs; jb++) {
73           rowsum += diag[ib+jb*bs] * xx[ncnt+jb];
74         }
75         yy[ncnt+ib] = rowsum;
76       }
77     }
78     ncnt += bsizes[i];
79     diag += bsizes[i]*bsizes[i];
80   }
81   PetscCall(VecRestoreArrayRead(x,&xx));
82   PetscCall(VecRestoreArray(y,&yy));
83   PetscFunctionReturn(0);
84 }
85 
86 PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc)
87 {
88   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;
89   Mat            A = pc->pmat;
90   MatFactorError err;
91   PetscInt       i,nsize = 0,nlocal;
92   PetscInt       nblocks;
93   const PetscInt *bsizes;
94 
95   PetscFunctionBegin;
96   PetscCall(MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes));
97   PetscCall(MatGetLocalSize(pc->pmat,&nlocal,NULL));
98   PetscCheck(!nlocal || nblocks,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
99   if (!jac->diag) {
100     for (i=0; i<nblocks; i++) nsize += bsizes[i]*bsizes[i];
101     PetscCall(PetscMalloc1(nsize,&jac->diag));
102   }
103   PetscCall(MatInvertVariableBlockDiagonal(A,nblocks,bsizes,jac->diag));
104   PetscCall(MatFactorGetError(A,&err));
105   if (err) pc->failedreason = (PCFailedReason)err;
106   pc->ops->apply = PCApply_VPBJacobi;
107   PetscFunctionReturn(0);
108 }
109 
110 static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
111 {
112   PetscFunctionBegin;
113   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
114 #if defined(PETSC_HAVE_CUDA)
115   PetscBool isCuda;
116   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat,&isCuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
117 #endif
118 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
119   PetscBool isKok;
120   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat,&isKok,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
121 #endif
122 
123 #if defined(PETSC_HAVE_CUDA)
124   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); else
125 #endif
126 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
127   if (isKok) PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); else
128 #endif
129   {PetscCall(PCSetUp_VPBJacobi_Host(pc));}
130   PetscFunctionReturn(0);
131 }
132 
133 PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc)
134 {
135   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;
136 
137   PetscFunctionBegin;
138   /*
139       Free the private data structure that was hanging off the PC
140   */
141   PetscCall(PetscFree(jac->diag));
142   PetscCall(PetscFree(pc->data));
143   PetscFunctionReturn(0);
144 }
145 
146 /*MC
147      PCVPBJACOBI - Variable size point block Jacobi preconditioner
148 
149    Notes:
150      See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks
151 
152      This works for AIJ matrices
153 
154      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
155      is detected a PETSc error is generated.
156 
157      One must call MatSetVariableBlockSizes() to use this preconditioner
158 
159    Developer Notes:
160      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
161      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
162      terminating KSP with a KSP_DIVERGED_NANORIF allowing
163      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
164 
165      Perhaps should provide an option that allows generation of a valid preconditioner
166      even if a block is singular as the PCJACOBI does.
167 
168    Level: beginner
169 
170 .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
171 
172 M*/
173 
174 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
175 {
176   PC_VPBJacobi   *jac;
177 
178   PetscFunctionBegin;
179   /*
180      Creates the private data structure for this preconditioner and
181      attach it to the PC object.
182   */
183   PetscCall(PetscNewLog(pc,&jac));
184   pc->data = (void*)jac;
185 
186   /*
187      Initialize the pointers to vectors to ZERO; these will be used to store
188      diagonal entries of the matrix for fast preconditioner application.
189   */
190   jac->diag = NULL;
191 
192   /*
193       Set the pointers for the functions that are provided above.
194       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
195       are called, they will automatically call these functions.  Note we
196       choose not to provide a couple of these functions since they are
197       not needed.
198   */
199   pc->ops->apply               = PCApply_VPBJacobi;
200   pc->ops->applytranspose      = NULL;
201   pc->ops->setup               = PCSetUp_VPBJacobi;
202   pc->ops->destroy             = PCDestroy_VPBJacobi;
203   pc->ops->setfromoptions      = NULL;
204   pc->ops->applyrichardson     = NULL;
205   pc->ops->applysymmetricleft  = NULL;
206   pc->ops->applysymmetricright = NULL;
207   PetscFunctionReturn(0);
208 }
209