1 #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h> 2 3 static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y) { 4 PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 5 PetscInt i, ncnt = 0; 6 const MatScalar *diag = jac->diag; 7 PetscInt ib, jb, bs; 8 const PetscScalar *xx; 9 PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6; 10 PetscInt nblocks; 11 const PetscInt *bsizes; 12 13 PetscFunctionBegin; 14 PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 15 PetscCall(VecGetArrayRead(x, &xx)); 16 PetscCall(VecGetArray(y, &yy)); 17 for (i = 0; i < nblocks; i++) { 18 bs = bsizes[i]; 19 switch (bs) { 20 case 1: yy[ncnt] = *diag * xx[ncnt]; break; 21 case 2: 22 x0 = xx[ncnt]; 23 x1 = xx[ncnt + 1]; 24 yy[ncnt] = diag[0] * x0 + diag[2] * x1; 25 yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1; 26 break; 27 case 3: 28 x0 = xx[ncnt]; 29 x1 = xx[ncnt + 1]; 30 x2 = xx[ncnt + 2]; 31 yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2; 32 yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2; 33 yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2; 34 break; 35 case 4: 36 x0 = xx[ncnt]; 37 x1 = xx[ncnt + 1]; 38 x2 = xx[ncnt + 2]; 39 x3 = xx[ncnt + 3]; 40 yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3; 41 yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3; 42 yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3; 43 yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3; 44 break; 45 case 5: 46 x0 = xx[ncnt]; 47 x1 = xx[ncnt + 1]; 48 x2 = xx[ncnt + 2]; 49 x3 = xx[ncnt + 3]; 50 x4 = xx[ncnt + 4]; 51 yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4; 52 yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4; 53 yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4; 54 yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4; 55 yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4; 56 break; 57 case 6: 58 x0 = xx[ncnt]; 59 x1 = xx[ncnt + 1]; 60 x2 = xx[ncnt + 2]; 61 x3 = xx[ncnt + 3]; 62 x4 = xx[ncnt + 4]; 63 x5 = xx[ncnt + 5]; 64 yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5; 65 yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5; 66 yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5; 67 yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5; 68 yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5; 69 yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5; 70 break; 71 case 7: 72 x0 = xx[ncnt]; 73 x1 = xx[ncnt + 1]; 74 x2 = xx[ncnt + 2]; 75 x3 = xx[ncnt + 3]; 76 x4 = xx[ncnt + 4]; 77 x5 = xx[ncnt + 5]; 78 x6 = xx[ncnt + 6]; 79 yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6; 80 yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6; 81 yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6; 82 yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6; 83 yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6; 84 yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6; 85 yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6; 86 break; 87 default: 88 for (ib = 0; ib < bs; ib++) { 89 PetscScalar rowsum = 0; 90 for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb]; 91 yy[ncnt + ib] = rowsum; 92 } 93 } 94 ncnt += bsizes[i]; 95 diag += bsizes[i] * bsizes[i]; 96 } 97 PetscCall(VecRestoreArrayRead(x, &xx)); 98 PetscCall(VecRestoreArray(y, &yy)); 99 PetscFunctionReturn(0); 100 } 101 102 PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc) { 103 PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 104 Mat A = pc->pmat; 105 MatFactorError err; 106 PetscInt i, nsize = 0, nlocal; 107 PetscInt nblocks; 108 const PetscInt *bsizes; 109 110 PetscFunctionBegin; 111 PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes)); 112 PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL)); 113 PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI"); 114 if (!jac->diag) { 115 for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i]; 116 PetscCall(PetscMalloc1(nsize, &jac->diag)); 117 } 118 PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag)); 119 PetscCall(MatFactorGetError(A, &err)); 120 if (err) pc->failedreason = (PCFailedReason)err; 121 pc->ops->apply = PCApply_VPBJacobi; 122 PetscFunctionReturn(0); 123 } 124 125 static PetscErrorCode PCSetUp_VPBJacobi(PC pc) { 126 PetscFunctionBegin; 127 /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */ 128 #if defined(PETSC_HAVE_CUDA) 129 PetscBool isCuda; 130 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 131 #endif 132 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 133 PetscBool isKok; 134 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 135 #endif 136 137 #if defined(PETSC_HAVE_CUDA) 138 if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc)); 139 else 140 #endif 141 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 142 if (isKok) 143 PetscCall(PCSetUp_VPBJacobi_Kokkos(pc)); 144 else 145 #endif 146 { 147 PetscCall(PCSetUp_VPBJacobi_Host(pc)); 148 } 149 PetscFunctionReturn(0); 150 } 151 152 PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc) { 153 PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data; 154 155 PetscFunctionBegin; 156 /* 157 Free the private data structure that was hanging off the PC 158 */ 159 PetscCall(PetscFree(jac->diag)); 160 PetscCall(PetscFree(pc->data)); 161 PetscFunctionReturn(0); 162 } 163 164 /*MC 165 PCVPBJACOBI - Variable size point block Jacobi preconditioner 166 167 Level: beginner 168 169 Notes: 170 See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks 171 172 This works for `MATAIJ` matrices 173 174 Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot 175 is detected a PETSc error is generated. 176 177 One must call `MatSetVariableBlockSizes()` to use this preconditioner 178 179 Developer Notes: 180 This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow 181 the factorization to continue even after a zero pivot is found resulting in a Nan and hence 182 terminating `KSP` with a `KSP_DIVERGED_NANORIF` allowing 183 a nonlinear solver/ODE integrator to recover without stopping the program as currently happens. 184 185 Perhaps should provide an option that allows generation of a valid preconditioner 186 even if a block is singular as the `PCJACOBI` does. 187 188 .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI` 189 M*/ 190 191 PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc) { 192 PC_VPBJacobi *jac; 193 194 PetscFunctionBegin; 195 /* 196 Creates the private data structure for this preconditioner and 197 attach it to the PC object. 198 */ 199 PetscCall(PetscNew(&jac)); 200 pc->data = (void *)jac; 201 202 /* 203 Initialize the pointers to vectors to ZERO; these will be used to store 204 diagonal entries of the matrix for fast preconditioner application. 205 */ 206 jac->diag = NULL; 207 208 /* 209 Set the pointers for the functions that are provided above. 210 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 211 are called, they will automatically call these functions. Note we 212 choose not to provide a couple of these functions since they are 213 not needed. 214 */ 215 pc->ops->apply = PCApply_VPBJacobi; 216 pc->ops->applytranspose = NULL; 217 pc->ops->setup = PCSetUp_VPBJacobi; 218 pc->ops->destroy = PCDestroy_VPBJacobi; 219 pc->ops->setfromoptions = NULL; 220 pc->ops->applyrichardson = NULL; 221 pc->ops->applysymmetricleft = NULL; 222 pc->ops->applysymmetricright = NULL; 223 PetscFunctionReturn(0); 224 } 225