1 #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h> 2 3 static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) { 4 PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 5 PetscInt i, ncnt = 0; 6 const MatScalar *diag = jac->diag; 7 PetscInt ib, jb, bs; 8 const PetscScalar *xx; 9 PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 10 PetscInt nblocks; 11 const PetscInt *bsizes; 12 13 PetscFunctionBegin; 14 PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 15 PetscCall(VecGetArrayRead(x, &xx)); 16 PetscCall(VecGetArray(y, &yy)); 17 for (i = 0; i < nblocks; i++) { 18 bs = bsizes[i]; 19 switch (bs) { 20 case 1: yy[ncnt] = *diag * xx[ncnt]; break; 21 case 2: 22 x0 = xx[ncnt]; 23 x1 = xx[ncnt + 1]; 24 yy[ncnt] = diag[0] * x0 + diag[2] * x1; 25 yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1; 26 break; 27 case 3: 28 x0 = xx[ncnt]; 29 x1 = xx[ncnt + 1]; 30 x2 = xx[ncnt + 2]; 31 yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 32 yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 33 yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 34 break; 35 case 4: 36 x0 = xx[ncnt]; 37 x1 = xx[ncnt + 1]; 38 x2 = xx[ncnt + 2]; 39 x3 = xx[ncnt + 3]; 40 yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 41 yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 42 yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 43 yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 44 break; 45 case 5: 46 x0 = xx[ncnt]; 47 x1 = xx[ncnt + 1]; 48 x2 = xx[ncnt + 2]; 49 x3 = xx[ncnt + 3]; 50 x4 = xx[ncnt + 4]; 51 yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 52 yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 53 yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 54 yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 55 yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 56 break; 57 case 6: 58 x0 = xx[ncnt]; 59 x1 = xx[ncnt + 1]; 60 x2 = xx[ncnt + 2]; 61 x3 = xx[ncnt + 3]; 62 x4 = xx[ncnt + 4]; 63 x5 = xx[ncnt + 5]; 64 yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 65 yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 66 yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 67 yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 68 yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 69 yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 70 break; 71 case 7: 72 x0 = xx[ncnt]; 73 x1 = xx[ncnt + 1]; 74 x2 = xx[ncnt + 2]; 75 x3 = xx[ncnt + 3]; 76 x4 = xx[ncnt + 4]; 77 x5 = xx[ncnt + 5]; 78 x6 = xx[ncnt + 6]; 79 yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 80 yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 81 yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 82 yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 83 yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 84 yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 85 yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 86 break; 87 default: 88 for (ib = 0; ib < bs; ib++) { 89 PetscScalar rowsum = 0; 90 for (jb = 0; jb < bs; jb++) { rowsum += diag[ib + jb * bs] * xx[ncnt + jb]; } 91 yy[ncnt + ib] = rowsum; 92 } 93 } 94 ncnt += bsizes[i]; 95 diag += bsizes[i] * bsizes[i]; 96 } 97 PetscCall(VecRestoreArrayRead(x, &xx)); 98 PetscCall(VecRestoreArray(y, &yy)); 99 PetscFunctionReturn(0); 100 } 101 102 PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) { 103 PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 104 Mat A = pc->pmat; 105 MatFactorError err; 106 PetscInt i, nsize = 0, nlocal; 107 PetscInt nblocks; 108 const PetscInt *bsizes; 109 110 PetscFunctionBegin; 111 PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 112 PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL)); 113 PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 114 if (!jac->diag) { 115 for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i]; 116 PetscCall(PetscMalloc1(nsize, &jac->diag)); 117 } 118 PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag)); 119 PetscCall(MatFactorGetError(A, &err)); 120 if (err) pc->failedreason = (PCFailedReason)err; 121 pc->ops->apply = PCApply_VPBJacobi; 122 PetscFunctionReturn(0); 123 } 124 125 static PetscErrorCode PCSetUp_VPBJacobi(PC pc) { 126 PetscFunctionBegin; 127 /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */ 128 #if defined(PETSC_HAVE_CUDA) 129 PetscBool isCuda; 130 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 131 #endif 132 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 133 PetscBool isKok; 134 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 135 #endif 136 137 #if defined(PETSC_HAVE_CUDA) 138 if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); 139 else 140 #endif 141 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 142 if (isKok) 143 PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); 144 else 145 #endif 146 { 147 PetscCall(PCSetUp_VPBJacobi_Host(pc)); 148 } 149 PetscFunctionReturn(0); 150 } 151 152 PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) { 153 PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 154 155 PetscFunctionBegin; 156 /* 157 Free the private data structure that was hanging off the PC 158 */ 159 PetscCall(PetscFree(jac->diag)); 160 PetscCall(PetscFree(pc->data)); 161 PetscFunctionReturn(0); 162 } 163 164 /*MC 165 PCVPBJACOBI - Variable size point block Jacobi preconditioner 166 167 Notes: 168 See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks 169 170 This works for AIJ matrices 171 172 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 173 is detected a PETSc error is generated. 174 175 One must call MatSetVariableBlockSizes() to use this preconditioner 176 177 Developer Notes: 178 This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow 179 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 180 terminating KSP with a KSP_DIVERGED_NANORIF allowing 181 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 182 183 Perhaps should provide an option that allows generation of a valid preconditioner 184 even if a block is singular as the PCJACOBI does. 185 186 Level: beginner 187 188 .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI` 189 190 M*/ 191 192 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) { 193 PC_VPBJacobi *jac; 194 195 PetscFunctionBegin; 196 /* 197 Creates the private data structure for this preconditioner and 198 attach it to the PC object. 199 */ 200 PetscCall(PetscNewLog(pc, &jac)); 201 pc->data = (void *)jac; 202 203 /* 204 Initialize the pointers to vectors to ZERO; these will be used to store 205 diagonal entries of the matrix for fast preconditioner application. 206 */ 207 jac->diag = NULL; 208 209 /* 210 Set the pointers for the functions that are provided above. 211 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 212 are called, they will automatically call these functions. Note we 213 choose not to provide a couple of these functions since they are 214 not needed. 215 */ 216 pc->ops->apply = PCApply_VPBJacobi; 217 pc->ops->applytranspose = NULL; 218 pc->ops->setup = PCSetUp_VPBJacobi; 219 pc->ops->destroy = PCDestroy_VPBJacobi; 220 pc->ops->setfromoptions = NULL; 221 pc->ops->applyrichardson = NULL; 222 pc->ops->applysymmetricleft = NULL; 223 pc->ops->applysymmetricright = NULL; 224 PetscFunctionReturn(0); 225 } 226