1 #include <petsc/private/pcimpl.h> 2 #include <petsc/private/matimpl.h> 3 #include <h2opusconf.h> 4 5 /* Use GPU only if H2OPUS is configured for GPU */ 6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 7 #define PETSC_H2OPUS_USE_GPU 8 #endif 9 10 typedef struct { 11 Mat A; 12 Mat M; 13 PetscScalar s0; 14 15 /* sampler for Newton-Schultz */ 16 Mat S; 17 PetscInt hyperorder; 18 Vec wns[4]; 19 Mat wnsmat[4]; 20 21 /* convergence testing */ 22 Mat T; 23 Vec w; 24 25 /* Support for PCSetCoordinates */ 26 PetscInt sdim; 27 PetscInt nlocc; 28 PetscReal *coords; 29 30 /* Newton-Schultz customization */ 31 PetscInt maxits; 32 PetscReal rtol, atol; 33 PetscBool monitor; 34 PetscBool useapproximatenorms; 35 NormType normtype; 36 37 /* Used when pmat != MATH2OPUS */ 38 PetscReal eta; 39 PetscInt leafsize; 40 PetscInt max_rank; 41 PetscInt bs; 42 PetscReal mrtol; 43 44 /* CPU/GPU */ 45 PetscBool forcecpu; 46 PetscBool boundtocpu; 47 } PC_H2OPUS; 48 49 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *); 50 51 static PetscErrorCode PCReset_H2OPUS(PC pc) 52 { 53 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 54 55 PetscFunctionBegin; 56 pch2opus->sdim = 0; 57 pch2opus->nlocc = 0; 58 PetscCall(PetscFree(pch2opus->coords)); 59 PetscCall(MatDestroy(&pch2opus->A)); 60 PetscCall(MatDestroy(&pch2opus->M)); 61 PetscCall(MatDestroy(&pch2opus->T)); 62 PetscCall(VecDestroy(&pch2opus->w)); 63 PetscCall(MatDestroy(&pch2opus->S)); 64 PetscCall(VecDestroy(&pch2opus->wns[0])); 65 PetscCall(VecDestroy(&pch2opus->wns[1])); 66 PetscCall(VecDestroy(&pch2opus->wns[2])); 67 PetscCall(VecDestroy(&pch2opus->wns[3])); 68 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 69 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 70 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 71 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) 76 { 77 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 78 PetscBool reset = PETSC_TRUE; 79 80 PetscFunctionBegin; 81 if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 82 PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset)); 83 reset = (PetscBool)!reset; 84 } 85 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 86 if (reset) { 87 PetscCall(PCReset_H2OPUS(pc)); 88 PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords)); 89 PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc)); 90 pch2opus->sdim = sdim; 91 pch2opus->nlocc = nlocc; 92 } 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode PCDestroy_H2OPUS(PC pc) 97 { 98 PetscFunctionBegin; 99 PetscCall(PCReset_H2OPUS(pc)); 100 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL)); 101 PetscCall(PetscFree(pc->data)); 102 PetscFunctionReturn(PETSC_SUCCESS); 103 } 104 105 static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject) 106 { 107 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 108 109 PetscFunctionBegin; 110 PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options"); 111 PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL)); 112 PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL)); 113 PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL)); 114 PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL)); 115 PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL)); 116 PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL)); 117 PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL)); 118 PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL)); 119 PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL)); 120 PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL)); 121 PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL)); 122 PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL)); 123 PetscOptionsHeadEnd(); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 typedef struct { 128 Mat A; 129 Mat M; 130 Vec w; 131 } AAtCtx; 132 133 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) 134 { 135 AAtCtx *aat; 136 137 PetscFunctionBegin; 138 PetscCall(MatShellGetContext(A, (void *)&aat)); 139 /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */ 140 PetscCall(MatMultTranspose(aat->A, x, aat->w)); 141 PetscCall(MatMult(aat->A, aat->w, y)); 142 PetscFunctionReturn(PETSC_SUCCESS); 143 } 144 145 static PetscErrorCode PCH2OpusSetUpInit(PC pc) 146 { 147 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 148 Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 149 PetscInt M, m; 150 VecType vtype; 151 PetscReal n; 152 AAtCtx aat; 153 154 PetscFunctionBegin; 155 aat.A = A; 156 aat.M = pch2opus->M; /* unused so far */ 157 PetscCall(MatCreateVecs(A, NULL, &aat.w)); 158 PetscCall(MatGetSize(A, &M, NULL)); 159 PetscCall(MatGetLocalSize(A, &m, NULL)); 160 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt)); 161 PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu)); 162 PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt)); 163 PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt)); 164 PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS)); 165 PetscCall(MatGetVecType(A, &vtype)); 166 PetscCall(MatShellSetVecType(AAt, vtype)); 167 PetscCall(MatNorm(AAt, NORM_1, &n)); 168 pch2opus->s0 = 1. / n; 169 PetscCall(VecDestroy(&aat.w)); 170 PetscCall(MatDestroy(&AAt)); 171 PetscFunctionReturn(PETSC_SUCCESS); 172 } 173 174 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) 175 { 176 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 177 178 PetscFunctionBegin; 179 if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y)); 180 else PetscCall(MatMult(pch2opus->M, x, y)); 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) 185 { 186 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 187 188 PetscFunctionBegin; 189 if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 190 else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) 195 { 196 PetscFunctionBegin; 197 PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) 202 { 203 PetscFunctionBegin; 204 PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE)); 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) 209 { 210 PetscFunctionBegin; 211 PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE)); 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) 216 { 217 PetscFunctionBegin; 218 PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE)); 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 222 /* used to test the norm of (M^-1 A - I) */ 223 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) 224 { 225 PC pc; 226 Mat A; 227 PC_H2OPUS *pch2opus; 228 PetscBool sideleft = PETSC_TRUE; 229 230 PetscFunctionBegin; 231 PetscCall(MatShellGetContext(M, (void *)&pc)); 232 pch2opus = (PC_H2OPUS *)pc->data; 233 if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL)); 234 A = pch2opus->A; 235 PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu)); 236 if (t) { 237 if (sideleft) { 238 PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w)); 239 PetscCall(MatMultTranspose(A, pch2opus->w, y)); 240 } else { 241 PetscCall(MatMultTranspose(A, x, pch2opus->w)); 242 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y)); 243 } 244 } else { 245 if (sideleft) { 246 PetscCall(MatMult(A, x, pch2opus->w)); 247 PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y)); 248 } else { 249 PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w)); 250 PetscCall(MatMult(A, pch2opus->w, y)); 251 } 252 } 253 PetscCall(VecAXPY(y, -1.0, x)); 254 PetscFunctionReturn(PETSC_SUCCESS); 255 } 256 257 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) 258 { 259 PetscFunctionBegin; 260 PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) 265 { 266 PetscFunctionBegin; 267 PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE)); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 /* HyperPower kernel: 272 Y = R = x 273 for i = 1 . . . l - 1 do 274 R = (I - A * Xk) * R 275 Y = Y + R 276 Y = Xk * Y 277 */ 278 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) 279 { 280 PC pc; 281 Mat A; 282 PC_H2OPUS *pch2opus; 283 284 PetscFunctionBegin; 285 PetscCall(MatShellGetContext(M, (void *)&pc)); 286 pch2opus = (PC_H2OPUS *)pc->data; 287 A = pch2opus->A; 288 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 289 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3])); 290 PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu)); 291 PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu)); 292 PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu)); 293 PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu)); 294 PetscCall(VecCopy(x, pch2opus->wns[0])); 295 PetscCall(VecCopy(x, pch2opus->wns[3])); 296 if (t) { 297 for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) { 298 PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1])); 299 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2])); 300 PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2])); 301 PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0])); 302 } 303 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y)); 304 } else { 305 for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) { 306 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1])); 307 PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2])); 308 PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2])); 309 PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0])); 310 } 311 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y)); 312 } 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) 317 { 318 PetscFunctionBegin; 319 PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE)); 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) 324 { 325 PetscFunctionBegin; 326 PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE)); 327 PetscFunctionReturn(PETSC_SUCCESS); 328 } 329 330 /* Hyper power kernel, MatMat version */ 331 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) 332 { 333 PC pc; 334 Mat A; 335 PC_H2OPUS *pch2opus; 336 PetscInt i; 337 338 PetscFunctionBegin; 339 PetscCall(MatShellGetContext(M, (void *)&pc)); 340 pch2opus = (PC_H2OPUS *)pc->data; 341 A = pch2opus->A; 342 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 343 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 344 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 345 } 346 if (!pch2opus->wnsmat[0]) { 347 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0])); 348 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1])); 349 } 350 if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 351 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 352 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 353 } 354 if (!pch2opus->wnsmat[2]) { 355 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2])); 356 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3])); 357 } 358 PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 359 PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN)); 360 if (t) { 361 for (i = 0; i < pch2opus->hyperorder - 1; i++) { 362 PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1])); 363 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2])); 364 PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN)); 365 PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 366 } 367 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y)); 368 } else { 369 for (i = 0; i < pch2opus->hyperorder - 1; i++) { 370 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1])); 371 PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2])); 372 PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN)); 373 PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 374 } 375 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y)); 376 } 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx) 381 { 382 PetscFunctionBegin; 383 PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE)); 384 PetscFunctionReturn(PETSC_SUCCESS); 385 } 386 387 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 388 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) 389 { 390 PC pc; 391 Mat A; 392 PC_H2OPUS *pch2opus; 393 394 PetscFunctionBegin; 395 PetscCall(MatShellGetContext(M, (void *)&pc)); 396 pch2opus = (PC_H2OPUS *)pc->data; 397 A = pch2opus->A; 398 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 399 PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu)); 400 PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu)); 401 if (t) { 402 PetscCall(PCApplyTranspose_H2OPUS(pc, x, y)); 403 PetscCall(MatMultTranspose(A, y, pch2opus->wns[1])); 404 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0])); 405 PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0])); 406 } else { 407 PetscCall(PCApply_H2OPUS(pc, x, y)); 408 PetscCall(MatMult(A, y, pch2opus->wns[0])); 409 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1])); 410 PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1])); 411 } 412 PetscFunctionReturn(PETSC_SUCCESS); 413 } 414 415 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) 416 { 417 PetscFunctionBegin; 418 PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE)); 419 PetscFunctionReturn(PETSC_SUCCESS); 420 } 421 422 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) 423 { 424 PetscFunctionBegin; 425 PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE)); 426 PetscFunctionReturn(PETSC_SUCCESS); 427 } 428 429 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 430 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) 431 { 432 PC pc; 433 Mat A; 434 PC_H2OPUS *pch2opus; 435 436 PetscFunctionBegin; 437 PetscCall(MatShellGetContext(M, (void *)&pc)); 438 pch2opus = (PC_H2OPUS *)pc->data; 439 A = pch2opus->A; 440 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 441 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 442 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 443 } 444 if (!pch2opus->wnsmat[0]) { 445 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0])); 446 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1])); 447 } 448 if (t) { 449 PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y)); 450 PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1])); 451 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0])); 452 PetscCall(MatScale(Y, 2.)); 453 PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 454 } else { 455 PetscCall(PCApplyMat_H2OPUS(pc, X, Y)); 456 PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0])); 457 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1])); 458 PetscCall(MatScale(Y, 2.)); 459 PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN)); 460 } 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx) 465 { 466 PetscFunctionBegin; 467 PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE)); 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) 472 { 473 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 474 Mat A = pc->useAmat ? pc->mat : pc->pmat; 475 476 PetscFunctionBegin; 477 if (!pch2opus->S) { 478 PetscInt M, N, m, n; 479 480 PetscCall(MatGetSize(A, &M, &N)); 481 PetscCall(MatGetLocalSize(A, &m, &n)); 482 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S)); 483 PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A)); 484 #if defined(PETSC_H2OPUS_USE_GPU) 485 PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA)); 486 #endif 487 } 488 if (pch2opus->hyperorder >= 2) { 489 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper)); 490 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper)); 491 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE)); 492 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA)); 493 } else { 494 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS)); 495 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS)); 496 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE)); 497 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA)); 498 } 499 PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S)); 500 PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu)); 501 /* XXX */ 502 PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE)); 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505 506 static PetscErrorCode PCSetUp_H2OPUS(PC pc) 507 { 508 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 509 Mat A = pc->useAmat ? pc->mat : pc->pmat; 510 NormType norm = pch2opus->normtype; 511 PetscReal initerr = 0.0, err; 512 PetscBool ish2opus; 513 514 PetscFunctionBegin; 515 if (!pch2opus->T) { 516 PetscInt M, N, m, n; 517 const char *prefix; 518 519 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 520 PetscCall(MatGetSize(A, &M, &N)); 521 PetscCall(MatGetLocalSize(A, &m, &n)); 522 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T)); 523 PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A)); 524 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI)); 525 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI)); 526 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS)); 527 #if defined(PETSC_H2OPUS_USE_GPU) 528 PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA)); 529 #endif 530 PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix)); 531 PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_")); 532 } 533 PetscCall(MatDestroy(&pch2opus->A)); 534 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus)); 535 if (ish2opus) { 536 PetscCall(PetscObjectReference((PetscObject)A)); 537 pch2opus->A = A; 538 } else { 539 const char *prefix; 540 PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A)); 541 /* XXX */ 542 PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE)); 543 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 544 PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix)); 545 PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_")); 546 PetscCall(MatSetFromOptions(pch2opus->A)); 547 PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY)); 548 PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY)); 549 /* XXX */ 550 PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE)); 551 552 /* always perform construction on the GPU unless forcecpu is true */ 553 PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu)); 554 } 555 #if defined(PETSC_H2OPUS_USE_GPU) 556 pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu; 557 #endif 558 PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu)); 559 if (pch2opus->M) { /* see if we can reuse M as initial guess */ 560 PetscReal reuse; 561 562 PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu)); 563 PetscCall(MatNorm(pch2opus->T, norm, &reuse)); 564 if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M)); 565 } 566 if (!pch2opus->M) { 567 const char *prefix; 568 PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M)); 569 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 570 PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix)); 571 PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_")); 572 PetscCall(MatSetFromOptions(pch2opus->M)); 573 PetscCall(PCH2OpusSetUpInit(pc)); 574 PetscCall(MatScale(pch2opus->M, pch2opus->s0)); 575 } 576 /* A and M have the same h2 matrix structure, save on reordering routines */ 577 PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE)); 578 PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE)); 579 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr)); 580 if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 581 err = initerr; 582 if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr))); 583 if (initerr > pch2opus->atol && !pc->failedreason) { 584 PetscInt i; 585 586 PetscCall(PCH2OpusSetUpSampler_Private(pc)); 587 for (i = 0; i < pch2opus->maxits; i++) { 588 Mat M; 589 const char *prefix; 590 591 PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M)); 592 PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix)); 593 PetscCall(MatSetOptionsPrefix(M, prefix)); 594 PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE)); 595 PetscCall(MatSetFromOptions(M)); 596 PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE)); 597 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 598 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 599 PetscCall(MatDestroy(&pch2opus->M)); 600 pch2opus->M = M; 601 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err)); 602 if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr))); 603 if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 604 if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break; 605 } 606 } 607 /* cleanup setup workspace */ 608 PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE)); 609 PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE)); 610 PetscCall(VecDestroy(&pch2opus->wns[0])); 611 PetscCall(VecDestroy(&pch2opus->wns[1])); 612 PetscCall(VecDestroy(&pch2opus->wns[2])); 613 PetscCall(VecDestroy(&pch2opus->wns[3])); 614 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 615 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 616 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 617 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 618 PetscFunctionReturn(PETSC_SUCCESS); 619 } 620 621 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) 622 { 623 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 624 PetscBool isascii; 625 626 PetscFunctionBegin; 627 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 628 if (isascii) { 629 if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 630 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n")); 631 PetscCall(PetscViewerASCIIPushTab(viewer)); 632 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL)); 633 PetscCall(MatView(pch2opus->A, viewer)); 634 PetscCall(PetscViewerPopFormat(viewer)); 635 PetscCall(PetscViewerASCIIPopTab(viewer)); 636 } 637 if (pch2opus->M) { 638 PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n")); 639 PetscCall(PetscViewerASCIIPushTab(viewer)); 640 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL)); 641 PetscCall(MatView(pch2opus->M, viewer)); 642 PetscCall(PetscViewerPopFormat(viewer)); 643 PetscCall(PetscViewerASCIIPopTab(viewer)); 644 } 645 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0)); 646 } 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 /*MC 651 PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package. 652 653 Options Database Keys: 654 + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()` 655 . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz 656 . -pc_h2opus_monitor - monitor Newton-Schultz convergence 657 . -pc_h2opus_atol - absolute tolerance 658 . -pc_h2opus_rtol - relative tolerance 659 . -pc_h2opus_norm_type - normtype 660 . -pc_h2opus_hyperorder - Hyper power order of sampling 661 . -pc_h2opus_leafsize - leaf size when constructed from kernel 662 . -pc_h2opus_eta - admissibility condition tolerance 663 . -pc_h2opus_maxrank - maximum rank when constructed from matvecs 664 . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs 665 . -pc_h2opus_mrtol - relative tolerance for construction from sampling 666 - -pc_h2opus_forcecpu - force construction of preconditioner on CPU 667 668 Level: intermediate 669 670 .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()` 671 M*/ 672 673 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) 674 { 675 PC_H2OPUS *pch2opus; 676 677 PetscFunctionBegin; 678 PetscCall(PetscNew(&pch2opus)); 679 pc->data = (void *)pch2opus; 680 681 pch2opus->atol = 1.e-2; 682 pch2opus->rtol = 1.e-6; 683 pch2opus->maxits = 50; 684 pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */ 685 pch2opus->normtype = NORM_2; 686 687 /* these are needed when we are sampling the pmat */ 688 pch2opus->eta = PETSC_DECIDE; 689 pch2opus->leafsize = PETSC_DECIDE; 690 pch2opus->max_rank = PETSC_DECIDE; 691 pch2opus->bs = PETSC_DECIDE; 692 pch2opus->mrtol = PETSC_DECIDE; 693 #if defined(PETSC_H2OPUS_USE_GPU) 694 pch2opus->boundtocpu = PETSC_FALSE; 695 #else 696 pch2opus->boundtocpu = PETSC_TRUE; 697 #endif 698 pc->ops->destroy = PCDestroy_H2OPUS; 699 pc->ops->setup = PCSetUp_H2OPUS; 700 pc->ops->apply = PCApply_H2OPUS; 701 pc->ops->matapply = PCApplyMat_H2OPUS; 702 pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 703 pc->ops->reset = PCReset_H2OPUS; 704 pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 705 pc->ops->view = PCView_H2OPUS; 706 707 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS)); 708 PetscFunctionReturn(PETSC_SUCCESS); 709 } 710