1 #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h> 2 3 static PetscErrorCode PCApply_VPBJacobi(PC pc,Vec x,Vec y) 4 { 5 PC_VPBJacobi *jac = (PC_VPBJacobi*)pc->data; 6 PetscInt i,ncnt = 0; 7 const MatScalar *diag = jac->diag; 8 PetscInt ib,jb,bs; 9 const PetscScalar *xx; 10 PetscScalar *yy,x0,x1,x2,x3,x4,x5,x6; 11 PetscInt nblocks; 12 const PetscInt *bsizes; 13 14 PetscFunctionBegin; 15 PetscCall(MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes)); 16 PetscCall(VecGetArrayRead(x,&xx)); 17 PetscCall(VecGetArray(y,&yy)); 18 for (i=0; i<nblocks; i++) { 19 bs = bsizes[i]; 20 switch (bs) { 21 case 1: 22 yy[ncnt] = *diag*xx[ncnt]; 23 break; 24 case 2: 25 x0 = xx[ncnt]; x1 = xx[ncnt+1]; 26 yy[ncnt] = diag[0]*x0 + diag[2]*x1; 27 yy[ncnt+1] = diag[1]*x0 + diag[3]*x1; 28 break; 29 case 3: 30 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; 31 yy[ncnt] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 32 yy[ncnt+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 33 yy[ncnt+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 34 break; 35 case 4: 36 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; 37 yy[ncnt] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 38 yy[ncnt+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 39 yy[ncnt+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 40 yy[ncnt+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 41 break; 42 case 5: 43 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; 44 yy[ncnt] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 45 yy[ncnt+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 46 yy[ncnt+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 47 yy[ncnt+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 48 yy[ncnt+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 49 break; 50 case 6: 51 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; 52 yy[ncnt] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5; 53 yy[ncnt+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5; 54 yy[ncnt+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5; 55 yy[ncnt+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5; 56 yy[ncnt+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5; 57 yy[ncnt+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5; 58 break; 59 case 7: 60 x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; x6 = xx[ncnt+6]; 61 yy[ncnt] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; 62 yy[ncnt+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; 63 yy[ncnt+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; 64 yy[ncnt+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; 65 yy[ncnt+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; 66 yy[ncnt+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; 67 yy[ncnt+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; 68 break; 69 default: 70 for (ib=0; ib<bs; ib++) { 71 PetscScalar rowsum = 0; 72 for (jb=0; jb<bs; jb++) { 73 rowsum += diag[ib+jb*bs] * xx[ncnt+jb]; 74 } 75 yy[ncnt+ib] = rowsum; 76 } 77 } 78 ncnt += bsizes[i]; 79 diag += bsizes[i]*bsizes[i]; 80 } 81 PetscCall(VecRestoreArrayRead(x,&xx)); 82 PetscCall(VecRestoreArray(y,&yy)); 83 PetscFunctionReturn(0); 84 } 85 86 PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) 87 { 88 PC_VPBJacobi *jac = (PC_VPBJacobi*)pc->data; 89 Mat A = pc->pmat; 90 MatFactorError err; 91 PetscInt i,nsize = 0,nlocal; 92 PetscInt nblocks; 93 const PetscInt *bsizes; 94 95 PetscFunctionBegin; 96 PetscCall(MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes)); 97 PetscCall(MatGetLocalSize(pc->pmat,&nlocal,NULL)); 98 PetscCheck(!nlocal || nblocks,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 99 if (!jac->diag) { 100 for (i=0; i<nblocks; i++) nsize += bsizes[i]*bsizes[i]; 101 PetscCall(PetscMalloc1(nsize,&jac->diag)); 102 } 103 PetscCall(MatInvertVariableBlockDiagonal(A,nblocks,bsizes,jac->diag)); 104 PetscCall(MatFactorGetError(A,&err)); 105 if (err) pc->failedreason = (PCFailedReason)err; 106 pc->ops->apply = PCApply_VPBJacobi; 107 PetscFunctionReturn(0); 108 } 109 110 static PetscErrorCode PCSetUp_VPBJacobi(PC pc) 111 { 112 PetscFunctionBegin; 113 /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */ 114 #if defined(PETSC_HAVE_CUDA) 115 PetscBool isCuda; 116 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat,&isCuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 117 #endif 118 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 119 PetscBool isKok; 120 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat,&isKok,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 121 #endif 122 123 #if defined(PETSC_HAVE_CUDA) 124 if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); else 125 #endif 126 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 127 if (isKok) PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); else 128 #endif 129 {PetscCall(PCSetUp_VPBJacobi_Host(pc));} 130 PetscFunctionReturn(0); 131 } 132 133 PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) 134 { 135 PC_VPBJacobi *jac = (PC_VPBJacobi*)pc->data; 136 137 PetscFunctionBegin; 138 /* 139 Free the private data structure that was hanging off the PC 140 */ 141 PetscCall(PetscFree(jac->diag)); 142 PetscCall(PetscFree(pc->data)); 143 PetscFunctionReturn(0); 144 } 145 146 /*MC 147 PCVPBJACOBI - Variable size point block Jacobi preconditioner 148 149 Notes: 150 See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks 151 152 This works for AIJ matrices 153 154 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 155 is detected a PETSc error is generated. 156 157 One must call MatSetVariableBlockSizes() to use this preconditioner 158 159 Developer Notes: 160 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 161 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 162 terminating KSP with a KSP_DIVERGED_NANORIF allowing 163 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 164 165 Perhaps should provide an option that allows generation of a valid preconditioner 166 even if a block is singular as the PCJACOBI does. 167 168 Level: beginner 169 170 .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI` 171 172 M*/ 173 174 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) 175 { 176 PC_VPBJacobi *jac; 177 178 PetscFunctionBegin; 179 /* 180 Creates the private data structure for this preconditioner and 181 attach it to the PC object. 182 */ 183 PetscCall(PetscNewLog(pc,&jac)); 184 pc->data = (void*)jac; 185 186 /* 187 Initialize the pointers to vectors to ZERO; these will be used to store 188 diagonal entries of the matrix for fast preconditioner application. 189 */ 190 jac->diag = NULL; 191 192 /* 193 Set the pointers for the functions that are provided above. 194 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 195 are called, they will automatically call these functions. Note we 196 choose not to provide a couple of these functions since they are 197 not needed. 198 */ 199 pc->ops->apply = PCApply_VPBJacobi; 200 pc->ops->applytranspose = NULL; 201 pc->ops->setup = PCSetUp_VPBJacobi; 202 pc->ops->destroy = PCDestroy_VPBJacobi; 203 pc->ops->setfromoptions = NULL; 204 pc->ops->applyrichardson = NULL; 205 pc->ops->applysymmetricleft = NULL; 206 pc->ops->applysymmetricright = NULL; 207 PetscFunctionReturn(0); 208 } 209