153022affSStefano Zampini #include <petsc/private/pcimpl.h> 253022affSStefano Zampini #include <petsc/private/matimpl.h> 353022affSStefano Zampini #include <h2opusconf.h> 453022affSStefano Zampini 553022affSStefano Zampini /* Use GPU only if H2OPUS is configured for GPU */ 653022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 753022affSStefano Zampini #define PETSC_H2OPUS_USE_GPU 853022affSStefano Zampini #endif 953022affSStefano Zampini 1053022affSStefano Zampini typedef struct { 1153022affSStefano Zampini Mat A; 1253022affSStefano Zampini Mat M; 1353022affSStefano Zampini PetscScalar s0; 1453022affSStefano Zampini 1553022affSStefano Zampini /* sampler for Newton-Schultz */ 1653022affSStefano Zampini Mat S; 1753022affSStefano Zampini PetscInt hyperorder; 1853022affSStefano Zampini Vec wns[4]; 1953022affSStefano Zampini Mat wnsmat[4]; 2053022affSStefano Zampini 2153022affSStefano Zampini /* convergence testing */ 2253022affSStefano Zampini Mat T; 2353022affSStefano Zampini Vec w; 2453022affSStefano Zampini 2553022affSStefano Zampini /* Support for PCSetCoordinates */ 2653022affSStefano Zampini PetscInt sdim; 2753022affSStefano Zampini PetscInt nlocc; 2853022affSStefano Zampini PetscReal *coords; 2953022affSStefano Zampini 3053022affSStefano Zampini /* Newton-Schultz customization */ 3153022affSStefano Zampini PetscInt maxits; 3253022affSStefano Zampini PetscReal rtol, atol; 3353022affSStefano Zampini PetscBool monitor; 3453022affSStefano Zampini PetscBool useapproximatenorms; 3553022affSStefano Zampini NormType normtype; 3653022affSStefano Zampini 3753022affSStefano Zampini /* Used when pmat != MATH2OPUS */ 3853022affSStefano Zampini PetscReal eta; 3953022affSStefano Zampini PetscInt leafsize; 4053022affSStefano Zampini PetscInt max_rank; 4153022affSStefano Zampini PetscInt bs; 4253022affSStefano Zampini PetscReal mrtol; 4353022affSStefano Zampini 448db51263SStefano Zampini /* CPU/GPU */ 458db51263SStefano Zampini PetscBool forcecpu; 4653022affSStefano Zampini PetscBool boundtocpu; 4753022affSStefano Zampini } PC_H2OPUS; 4853022affSStefano Zampini 4953022affSStefano Zampini PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *); 5053022affSStefano Zampini 519371c9d4SSatish Balay static PetscErrorCode PCReset_H2OPUS(PC pc) { 5253022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 5353022affSStefano Zampini 5453022affSStefano Zampini PetscFunctionBegin; 5553022affSStefano Zampini pch2opus->sdim = 0; 5653022affSStefano Zampini pch2opus->nlocc = 0; 579566063dSJacob Faibussowitsch PetscCall(PetscFree(pch2opus->coords)); 589566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->A)); 599566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->M)); 609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->T)); 619566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->w)); 629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->S)); 639566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[0])); 649566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[1])); 659566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[2])); 669566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[3])); 679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 699566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 709566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 7153022affSStefano Zampini PetscFunctionReturn(0); 7253022affSStefano Zampini } 7353022affSStefano Zampini 749371c9d4SSatish Balay static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) { 7553022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 7653022affSStefano Zampini PetscBool reset = PETSC_TRUE; 7753022affSStefano Zampini 7853022affSStefano Zampini PetscFunctionBegin; 7953022affSStefano Zampini if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 809566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset)); 8153022affSStefano Zampini reset = (PetscBool)!reset; 8253022affSStefano Zampini } 831c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8453022affSStefano Zampini if (reset) { 859566063dSJacob Faibussowitsch PetscCall(PCReset_H2OPUS(pc)); 869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords)); 879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc)); 8853022affSStefano Zampini pch2opus->sdim = sdim; 8953022affSStefano Zampini pch2opus->nlocc = nlocc; 9053022affSStefano Zampini } 9153022affSStefano Zampini PetscFunctionReturn(0); 9253022affSStefano Zampini } 9353022affSStefano Zampini 949371c9d4SSatish Balay static PetscErrorCode PCDestroy_H2OPUS(PC pc) { 9553022affSStefano Zampini PetscFunctionBegin; 969566063dSJacob Faibussowitsch PetscCall(PCReset_H2OPUS(pc)); 979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL)); 989566063dSJacob Faibussowitsch PetscCall(PetscFree(pc->data)); 9953022affSStefano Zampini PetscFunctionReturn(0); 10053022affSStefano Zampini } 10153022affSStefano Zampini 1029371c9d4SSatish Balay static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject) { 10353022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 10453022affSStefano Zampini 10553022affSStefano Zampini PetscFunctionBegin; 106d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options"); 1079566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL)); 1089566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL)); 1099566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL)); 1109566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL)); 1119566063dSJacob Faibussowitsch PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL)); 1129566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL)); 1139566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL)); 1149566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL)); 1159566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL)); 1169566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL)); 1179566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL)); 1188db51263SStefano Zampini PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL)); 119d0609cedSBarry Smith PetscOptionsHeadEnd(); 12053022affSStefano Zampini PetscFunctionReturn(0); 12153022affSStefano Zampini } 12253022affSStefano Zampini 12353022affSStefano Zampini typedef struct { 12453022affSStefano Zampini Mat A; 12553022affSStefano Zampini Mat M; 12653022affSStefano Zampini Vec w; 12753022affSStefano Zampini } AAtCtx; 12853022affSStefano Zampini 1299371c9d4SSatish Balay static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) { 13053022affSStefano Zampini AAtCtx *aat; 13153022affSStefano Zampini 13253022affSStefano Zampini PetscFunctionBegin; 1339566063dSJacob Faibussowitsch PetscCall(MatShellGetContext(A, (void *)&aat)); 1349566063dSJacob Faibussowitsch /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */ 1359566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(aat->A, x, aat->w)); 1369566063dSJacob Faibussowitsch PetscCall(MatMult(aat->A, aat->w, y)); 13753022affSStefano Zampini PetscFunctionReturn(0); 13853022affSStefano Zampini } 13953022affSStefano Zampini 1409371c9d4SSatish Balay static PetscErrorCode PCH2OpusSetUpInit(PC pc) { 14153022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 14253022affSStefano Zampini Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 14353022affSStefano Zampini PetscInt M, m; 14453022affSStefano Zampini VecType vtype; 14553022affSStefano Zampini PetscReal n; 14653022affSStefano Zampini AAtCtx aat; 14753022affSStefano Zampini 14853022affSStefano Zampini PetscFunctionBegin; 14953022affSStefano Zampini aat.A = A; 15053022affSStefano Zampini aat.M = pch2opus->M; /* unused so far */ 1519566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &aat.w)); 1529566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 1539566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 1549566063dSJacob Faibussowitsch PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt)); 1559566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu)); 1569566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt)); 1579566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt)); 1589566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS)); 1599566063dSJacob Faibussowitsch PetscCall(MatGetVecType(A, &vtype)); 1609566063dSJacob Faibussowitsch PetscCall(MatShellSetVecType(AAt, vtype)); 1619566063dSJacob Faibussowitsch PetscCall(MatNorm(AAt, NORM_1, &n)); 16253022affSStefano Zampini pch2opus->s0 = 1. / n; 1639566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aat.w)); 1649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AAt)); 16553022affSStefano Zampini PetscFunctionReturn(0); 16653022affSStefano Zampini } 16753022affSStefano Zampini 1689371c9d4SSatish Balay static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) { 16953022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 17053022affSStefano Zampini 17153022affSStefano Zampini PetscFunctionBegin; 1729566063dSJacob Faibussowitsch if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y)); 1739566063dSJacob Faibussowitsch else PetscCall(MatMult(pch2opus->M, x, y)); 17453022affSStefano Zampini PetscFunctionReturn(0); 17553022affSStefano Zampini } 17653022affSStefano Zampini 1779371c9d4SSatish Balay static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) { 17853022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 17953022affSStefano Zampini 18053022affSStefano Zampini PetscFunctionBegin; 1819566063dSJacob Faibussowitsch if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 1829566063dSJacob Faibussowitsch else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 18353022affSStefano Zampini PetscFunctionReturn(0); 18453022affSStefano Zampini } 18553022affSStefano Zampini 1869371c9d4SSatish Balay static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) { 18753022affSStefano Zampini PetscFunctionBegin; 1889566063dSJacob Faibussowitsch PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE)); 18953022affSStefano Zampini PetscFunctionReturn(0); 19053022affSStefano Zampini } 19153022affSStefano Zampini 1929371c9d4SSatish Balay static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) { 19353022affSStefano Zampini PetscFunctionBegin; 1949566063dSJacob Faibussowitsch PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE)); 19553022affSStefano Zampini PetscFunctionReturn(0); 19653022affSStefano Zampini } 19753022affSStefano Zampini 1989371c9d4SSatish Balay static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) { 19953022affSStefano Zampini PetscFunctionBegin; 2009566063dSJacob Faibussowitsch PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE)); 20153022affSStefano Zampini PetscFunctionReturn(0); 20253022affSStefano Zampini } 20353022affSStefano Zampini 2049371c9d4SSatish Balay static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) { 20553022affSStefano Zampini PetscFunctionBegin; 2069566063dSJacob Faibussowitsch PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE)); 20753022affSStefano Zampini PetscFunctionReturn(0); 20853022affSStefano Zampini } 20953022affSStefano Zampini 21053022affSStefano Zampini /* used to test the norm of (M^-1 A - I) */ 2119371c9d4SSatish Balay static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) { 21253022affSStefano Zampini PC pc; 21353022affSStefano Zampini Mat A; 21453022affSStefano Zampini PC_H2OPUS *pch2opus; 21553022affSStefano Zampini PetscBool sideleft = PETSC_TRUE; 21653022affSStefano Zampini 21753022affSStefano Zampini PetscFunctionBegin; 2189566063dSJacob Faibussowitsch PetscCall(MatShellGetContext(M, (void *)&pc)); 21953022affSStefano Zampini pch2opus = (PC_H2OPUS *)pc->data; 2209566063dSJacob Faibussowitsch if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL)); 22153022affSStefano Zampini A = pch2opus->A; 2229566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu)); 22353022affSStefano Zampini if (t) { 22453022affSStefano Zampini if (sideleft) { 2259566063dSJacob Faibussowitsch PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w)); 2269566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(A, pch2opus->w, y)); 22753022affSStefano Zampini } else { 2289566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(A, x, pch2opus->w)); 2299566063dSJacob Faibussowitsch PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y)); 23053022affSStefano Zampini } 23153022affSStefano Zampini } else { 23253022affSStefano Zampini if (sideleft) { 2339566063dSJacob Faibussowitsch PetscCall(MatMult(A, x, pch2opus->w)); 2349566063dSJacob Faibussowitsch PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y)); 23553022affSStefano Zampini } else { 2369566063dSJacob Faibussowitsch PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w)); 2379566063dSJacob Faibussowitsch PetscCall(MatMult(A, pch2opus->w, y)); 23853022affSStefano Zampini } 23953022affSStefano Zampini } 2408db51263SStefano Zampini PetscCall(VecAXPY(y, -1.0, x)); 24153022affSStefano Zampini PetscFunctionReturn(0); 24253022affSStefano Zampini } 24353022affSStefano Zampini 2449371c9d4SSatish Balay static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) { 24553022affSStefano Zampini PetscFunctionBegin; 2469566063dSJacob Faibussowitsch PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE)); 24753022affSStefano Zampini PetscFunctionReturn(0); 24853022affSStefano Zampini } 24953022affSStefano Zampini 2509371c9d4SSatish Balay static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) { 25153022affSStefano Zampini PetscFunctionBegin; 2529566063dSJacob Faibussowitsch PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE)); 25353022affSStefano Zampini PetscFunctionReturn(0); 25453022affSStefano Zampini } 25553022affSStefano Zampini 25653022affSStefano Zampini /* HyperPower kernel: 25753022affSStefano Zampini Y = R = x 25853022affSStefano Zampini for i = 1 . . . l - 1 do 25953022affSStefano Zampini R = (I - A * Xk) * R 26053022affSStefano Zampini Y = Y + R 26153022affSStefano Zampini Y = Xk * Y 26253022affSStefano Zampini */ 2639371c9d4SSatish Balay static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) { 26453022affSStefano Zampini PC pc; 26553022affSStefano Zampini Mat A; 26653022affSStefano Zampini PC_H2OPUS *pch2opus; 26753022affSStefano Zampini 26853022affSStefano Zampini PetscFunctionBegin; 2699566063dSJacob Faibussowitsch PetscCall(MatShellGetContext(M, (void *)&pc)); 27053022affSStefano Zampini pch2opus = (PC_H2OPUS *)pc->data; 27153022affSStefano Zampini A = pch2opus->A; 2729566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 2739566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3])); 2749566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu)); 2759566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu)); 2769566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu)); 2779566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu)); 2789566063dSJacob Faibussowitsch PetscCall(VecCopy(x, pch2opus->wns[0])); 2799566063dSJacob Faibussowitsch PetscCall(VecCopy(x, pch2opus->wns[3])); 28053022affSStefano Zampini if (t) { 2815f80ce2aSJacob Faibussowitsch for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) { 2829566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1])); 2839566063dSJacob Faibussowitsch PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2])); 2849566063dSJacob Faibussowitsch PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2])); 2859566063dSJacob Faibussowitsch PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0])); 28653022affSStefano Zampini } 2879566063dSJacob Faibussowitsch PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y)); 28853022affSStefano Zampini } else { 2895f80ce2aSJacob Faibussowitsch for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) { 2909566063dSJacob Faibussowitsch PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1])); 2919566063dSJacob Faibussowitsch PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2])); 2929566063dSJacob Faibussowitsch PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2])); 2939566063dSJacob Faibussowitsch PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0])); 29453022affSStefano Zampini } 2959566063dSJacob Faibussowitsch PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y)); 29653022affSStefano Zampini } 29753022affSStefano Zampini PetscFunctionReturn(0); 29853022affSStefano Zampini } 29953022affSStefano Zampini 3009371c9d4SSatish Balay static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) { 30153022affSStefano Zampini PetscFunctionBegin; 3029566063dSJacob Faibussowitsch PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE)); 30353022affSStefano Zampini PetscFunctionReturn(0); 30453022affSStefano Zampini } 30553022affSStefano Zampini 3069371c9d4SSatish Balay static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) { 30753022affSStefano Zampini PetscFunctionBegin; 3089566063dSJacob Faibussowitsch PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE)); 30953022affSStefano Zampini PetscFunctionReturn(0); 31053022affSStefano Zampini } 31153022affSStefano Zampini 31253022affSStefano Zampini /* Hyper power kernel, MatMat version */ 3139371c9d4SSatish Balay static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) { 31453022affSStefano Zampini PC pc; 31553022affSStefano Zampini Mat A; 31653022affSStefano Zampini PC_H2OPUS *pch2opus; 31753022affSStefano Zampini PetscInt i; 31853022affSStefano Zampini 31953022affSStefano Zampini PetscFunctionBegin; 3209566063dSJacob Faibussowitsch PetscCall(MatShellGetContext(M, (void *)&pc)); 32153022affSStefano Zampini pch2opus = (PC_H2OPUS *)pc->data; 32253022affSStefano Zampini A = pch2opus->A; 32353022affSStefano Zampini if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 3249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 3259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 32653022affSStefano Zampini } 32753022affSStefano Zampini if (!pch2opus->wnsmat[0]) { 3289566063dSJacob Faibussowitsch PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0])); 3299566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1])); 33053022affSStefano Zampini } 33153022affSStefano Zampini if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 3329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 3339566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 33453022affSStefano Zampini } 33553022affSStefano Zampini if (!pch2opus->wnsmat[2]) { 3369566063dSJacob Faibussowitsch PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2])); 3379566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3])); 33853022affSStefano Zampini } 3399566063dSJacob Faibussowitsch PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 3409566063dSJacob Faibussowitsch PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN)); 34153022affSStefano Zampini if (t) { 34253022affSStefano Zampini for (i = 0; i < pch2opus->hyperorder - 1; i++) { 3439566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1])); 3449566063dSJacob Faibussowitsch PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2])); 3459566063dSJacob Faibussowitsch PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN)); 3469566063dSJacob Faibussowitsch PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 34753022affSStefano Zampini } 3489566063dSJacob Faibussowitsch PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y)); 34953022affSStefano Zampini } else { 35053022affSStefano Zampini for (i = 0; i < pch2opus->hyperorder - 1; i++) { 3519566063dSJacob Faibussowitsch PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1])); 3529566063dSJacob Faibussowitsch PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2])); 3539566063dSJacob Faibussowitsch PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN)); 3549566063dSJacob Faibussowitsch PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 35553022affSStefano Zampini } 3569566063dSJacob Faibussowitsch PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y)); 35753022affSStefano Zampini } 35853022affSStefano Zampini PetscFunctionReturn(0); 35953022affSStefano Zampini } 36053022affSStefano Zampini 3619371c9d4SSatish Balay static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx) { 36253022affSStefano Zampini PetscFunctionBegin; 3639566063dSJacob Faibussowitsch PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE)); 36453022affSStefano Zampini PetscFunctionReturn(0); 36553022affSStefano Zampini } 36653022affSStefano Zampini 36753022affSStefano Zampini /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 3689371c9d4SSatish Balay static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) { 36953022affSStefano Zampini PC pc; 37053022affSStefano Zampini Mat A; 37153022affSStefano Zampini PC_H2OPUS *pch2opus; 37253022affSStefano Zampini 37353022affSStefano Zampini PetscFunctionBegin; 3749566063dSJacob Faibussowitsch PetscCall(MatShellGetContext(M, (void *)&pc)); 37553022affSStefano Zampini pch2opus = (PC_H2OPUS *)pc->data; 37653022affSStefano Zampini A = pch2opus->A; 3779566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 3789566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu)); 3799566063dSJacob Faibussowitsch PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu)); 38053022affSStefano Zampini if (t) { 3819566063dSJacob Faibussowitsch PetscCall(PCApplyTranspose_H2OPUS(pc, x, y)); 3829566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(A, y, pch2opus->wns[1])); 3839566063dSJacob Faibussowitsch PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0])); 3849566063dSJacob Faibussowitsch PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0])); 38553022affSStefano Zampini } else { 3869566063dSJacob Faibussowitsch PetscCall(PCApply_H2OPUS(pc, x, y)); 3879566063dSJacob Faibussowitsch PetscCall(MatMult(A, y, pch2opus->wns[0])); 3889566063dSJacob Faibussowitsch PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1])); 3899566063dSJacob Faibussowitsch PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1])); 39053022affSStefano Zampini } 39153022affSStefano Zampini PetscFunctionReturn(0); 39253022affSStefano Zampini } 39353022affSStefano Zampini 3949371c9d4SSatish Balay static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) { 39553022affSStefano Zampini PetscFunctionBegin; 3969566063dSJacob Faibussowitsch PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE)); 39753022affSStefano Zampini PetscFunctionReturn(0); 39853022affSStefano Zampini } 39953022affSStefano Zampini 4009371c9d4SSatish Balay static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) { 40153022affSStefano Zampini PetscFunctionBegin; 4029566063dSJacob Faibussowitsch PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE)); 40353022affSStefano Zampini PetscFunctionReturn(0); 40453022affSStefano Zampini } 40553022affSStefano Zampini 40653022affSStefano Zampini /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 4079371c9d4SSatish Balay static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) { 40853022affSStefano Zampini PC pc; 40953022affSStefano Zampini Mat A; 41053022affSStefano Zampini PC_H2OPUS *pch2opus; 41153022affSStefano Zampini 41253022affSStefano Zampini PetscFunctionBegin; 4139566063dSJacob Faibussowitsch PetscCall(MatShellGetContext(M, (void *)&pc)); 41453022affSStefano Zampini pch2opus = (PC_H2OPUS *)pc->data; 41553022affSStefano Zampini A = pch2opus->A; 41653022affSStefano Zampini if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 4179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 4189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 41953022affSStefano Zampini } 42053022affSStefano Zampini if (!pch2opus->wnsmat[0]) { 4219566063dSJacob Faibussowitsch PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0])); 4229566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1])); 42353022affSStefano Zampini } 42453022affSStefano Zampini if (t) { 4259566063dSJacob Faibussowitsch PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y)); 4269566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1])); 4279566063dSJacob Faibussowitsch PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0])); 4289566063dSJacob Faibussowitsch PetscCall(MatScale(Y, 2.)); 4299566063dSJacob Faibussowitsch PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 43053022affSStefano Zampini } else { 4319566063dSJacob Faibussowitsch PetscCall(PCApplyMat_H2OPUS(pc, X, Y)); 4329566063dSJacob Faibussowitsch PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0])); 4339566063dSJacob Faibussowitsch PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1])); 4349566063dSJacob Faibussowitsch PetscCall(MatScale(Y, 2.)); 4359566063dSJacob Faibussowitsch PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN)); 43653022affSStefano Zampini } 43753022affSStefano Zampini PetscFunctionReturn(0); 43853022affSStefano Zampini } 43953022affSStefano Zampini 4409371c9d4SSatish Balay static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx) { 44153022affSStefano Zampini PetscFunctionBegin; 4429566063dSJacob Faibussowitsch PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE)); 44353022affSStefano Zampini PetscFunctionReturn(0); 44453022affSStefano Zampini } 44553022affSStefano Zampini 4469371c9d4SSatish Balay static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) { 44753022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 44853022affSStefano Zampini Mat A = pc->useAmat ? pc->mat : pc->pmat; 44953022affSStefano Zampini 45053022affSStefano Zampini PetscFunctionBegin; 45153022affSStefano Zampini if (!pch2opus->S) { 45253022affSStefano Zampini PetscInt M, N, m, n; 45353022affSStefano Zampini 4549566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, &N)); 4559566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, &n)); 4569566063dSJacob Faibussowitsch PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S)); 4579566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A)); 45853022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 4599566063dSJacob Faibussowitsch PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA)); 46053022affSStefano Zampini #endif 46153022affSStefano Zampini } 46253022affSStefano Zampini if (pch2opus->hyperorder >= 2) { 4639566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper)); 4649566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper)); 4659566063dSJacob Faibussowitsch PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE)); 4669566063dSJacob Faibussowitsch PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA)); 46753022affSStefano Zampini } else { 4689566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS)); 4699566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS)); 4709566063dSJacob Faibussowitsch PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE)); 4719566063dSJacob Faibussowitsch PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA)); 47253022affSStefano Zampini } 4739566063dSJacob Faibussowitsch PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S)); 4749566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu)); 47553022affSStefano Zampini /* XXX */ 4769566063dSJacob Faibussowitsch PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE)); 47753022affSStefano Zampini PetscFunctionReturn(0); 47853022affSStefano Zampini } 47953022affSStefano Zampini 4809371c9d4SSatish Balay static PetscErrorCode PCSetUp_H2OPUS(PC pc) { 48153022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 48253022affSStefano Zampini Mat A = pc->useAmat ? pc->mat : pc->pmat; 48353022affSStefano Zampini NormType norm = pch2opus->normtype; 48453022affSStefano Zampini PetscReal initerr = 0.0, err; 48553022affSStefano Zampini PetscBool ish2opus; 48653022affSStefano Zampini 48753022affSStefano Zampini PetscFunctionBegin; 48853022affSStefano Zampini if (!pch2opus->T) { 48953022affSStefano Zampini PetscInt M, N, m, n; 49053022affSStefano Zampini const char *prefix; 49153022affSStefano Zampini 4929566063dSJacob Faibussowitsch PetscCall(PCGetOptionsPrefix(pc, &prefix)); 4939566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, &N)); 4949566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, &n)); 4959566063dSJacob Faibussowitsch PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T)); 4969566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A)); 4979566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI)); 4989566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI)); 4999566063dSJacob Faibussowitsch PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS)); 50053022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 5019566063dSJacob Faibussowitsch PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA)); 50253022affSStefano Zampini #endif 5039566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)pch2opus->T)); 5049566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix)); 5059566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_")); 50653022affSStefano Zampini } 5079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->A)); 5089566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus)); 50953022affSStefano Zampini if (ish2opus) { 5109566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 51153022affSStefano Zampini pch2opus->A = A; 51253022affSStefano Zampini } else { 51353022affSStefano Zampini const char *prefix; 5149566063dSJacob Faibussowitsch PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A)); 51553022affSStefano Zampini /* XXX */ 5169566063dSJacob Faibussowitsch PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE)); 5179566063dSJacob Faibussowitsch PetscCall(PCGetOptionsPrefix(pc, &prefix)); 5189566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix)); 5199566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_")); 5209566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(pch2opus->A)); 5219566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY)); 5229566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY)); 52353022affSStefano Zampini /* XXX */ 5249566063dSJacob Faibussowitsch PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE)); 5258db51263SStefano Zampini 5268db51263SStefano Zampini /* always perform construction on the GPU unless forcecpu is true */ 5278db51263SStefano Zampini PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu)); 52853022affSStefano Zampini } 52953022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 5308db51263SStefano Zampini pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu; 53153022affSStefano Zampini #endif 5329566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu)); 53353022affSStefano Zampini if (pch2opus->M) { /* see if we can reuse M as initial guess */ 53453022affSStefano Zampini PetscReal reuse; 53553022affSStefano Zampini 5369566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu)); 5379566063dSJacob Faibussowitsch PetscCall(MatNorm(pch2opus->T, norm, &reuse)); 5389566063dSJacob Faibussowitsch if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M)); 53953022affSStefano Zampini } 54053022affSStefano Zampini if (!pch2opus->M) { 54153022affSStefano Zampini const char *prefix; 5429566063dSJacob Faibussowitsch PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M)); 5439566063dSJacob Faibussowitsch PetscCall(PCGetOptionsPrefix(pc, &prefix)); 5449566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix)); 5459566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_")); 5469566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(pch2opus->M)); 5479566063dSJacob Faibussowitsch PetscCall(PCH2OpusSetUpInit(pc)); 5489566063dSJacob Faibussowitsch PetscCall(MatScale(pch2opus->M, pch2opus->s0)); 54953022affSStefano Zampini } 55053022affSStefano Zampini /* A and M have the same h2 matrix structure, save on reordering routines */ 5519566063dSJacob Faibussowitsch PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE)); 5529566063dSJacob Faibussowitsch PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE)); 5538db51263SStefano Zampini if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr)); 55453022affSStefano Zampini if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 55553022affSStefano Zampini err = initerr; 55648a46eb9SPierre Jolivet if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr))); 55753022affSStefano Zampini if (initerr > pch2opus->atol && !pc->failedreason) { 55853022affSStefano Zampini PetscInt i; 55953022affSStefano Zampini 5609566063dSJacob Faibussowitsch PetscCall(PCH2OpusSetUpSampler_Private(pc)); 56153022affSStefano Zampini for (i = 0; i < pch2opus->maxits; i++) { 56253022affSStefano Zampini Mat M; 56353022affSStefano Zampini const char *prefix; 56453022affSStefano Zampini 5659566063dSJacob Faibussowitsch PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M)); 5669566063dSJacob Faibussowitsch PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix)); 5679566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(M, prefix)); 5689566063dSJacob Faibussowitsch PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE)); 5699566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(M)); 5709566063dSJacob Faibussowitsch PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE)); 5719566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 5729566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 5739566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->M)); 57453022affSStefano Zampini pch2opus->M = M; 5758db51263SStefano Zampini if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err)); 57648a46eb9SPierre Jolivet if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr))); 57753022affSStefano Zampini if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 57853022affSStefano Zampini if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break; 57953022affSStefano Zampini } 58053022affSStefano Zampini } 58153022affSStefano Zampini /* cleanup setup workspace */ 5829566063dSJacob Faibussowitsch PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE)); 5839566063dSJacob Faibussowitsch PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE)); 5849566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[0])); 5859566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[1])); 5869566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[2])); 5879566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pch2opus->wns[3])); 5889566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 5899566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 5909566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 5919566063dSJacob Faibussowitsch PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 59253022affSStefano Zampini PetscFunctionReturn(0); 59353022affSStefano Zampini } 59453022affSStefano Zampini 5959371c9d4SSatish Balay static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) { 59653022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 59753022affSStefano Zampini PetscBool isascii; 59853022affSStefano Zampini 59953022affSStefano Zampini PetscFunctionBegin; 6009566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 60153022affSStefano Zampini if (isascii) { 60253022affSStefano Zampini if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 6039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n")); 6049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushTab(viewer)); 6059566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL)); 6069566063dSJacob Faibussowitsch PetscCall(MatView(pch2opus->A, viewer)); 6079566063dSJacob Faibussowitsch PetscCall(PetscViewerPopFormat(viewer)); 6089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 60953022affSStefano Zampini } 61053022affSStefano Zampini if (pch2opus->M) { 6119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n")); 6129566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushTab(viewer)); 6139566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL)); 6149566063dSJacob Faibussowitsch PetscCall(MatView(pch2opus->M, viewer)); 6159566063dSJacob Faibussowitsch PetscCall(PetscViewerPopFormat(viewer)); 6169566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer)); 61753022affSStefano Zampini } 6189566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0)); 61953022affSStefano Zampini } 62053022affSStefano Zampini PetscFunctionReturn(0); 62153022affSStefano Zampini } 62253022affSStefano Zampini 623*f1580f4eSBarry Smith /*MC 624*f1580f4eSBarry Smith PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package. 625*f1580f4eSBarry Smith 626*f1580f4eSBarry Smith Options Database Keys: 627*f1580f4eSBarry Smith + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()` 628*f1580f4eSBarry Smith . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz 629*f1580f4eSBarry Smith . -pc_h2opus_monitor - monitor Newton-Schultz convergence 630*f1580f4eSBarry Smith . -pc_h2opus_atol - absolute tolerance 631*f1580f4eSBarry Smith . -pc_h2opus_rtol - relative tolerance 632*f1580f4eSBarry Smith . -pc_h2opus_norm_type - normtype 633*f1580f4eSBarry Smith . -pc_h2opus_hyperorder - Hyper power order of sampling 634*f1580f4eSBarry Smith . -pc_h2opus_leafsize - leaf size when constructed from kernel 635*f1580f4eSBarry Smith . -pc_h2opus_eta - admissibility condition tolerance 636*f1580f4eSBarry Smith . -pc_h2opus_maxrank - maximum rank when constructed from matvecs 637*f1580f4eSBarry Smith . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs 638*f1580f4eSBarry Smith . -pc_h2opus_mrtol - relative tolerance for construction from sampling 639*f1580f4eSBarry Smith - -pc_h2opus_forcecpu - force construction of preconditioner on CPU 640*f1580f4eSBarry Smith 641*f1580f4eSBarry Smith Level: intermediate 642*f1580f4eSBarry Smith 643*f1580f4eSBarry Smith .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()` 644*f1580f4eSBarry Smith M*/ 645*f1580f4eSBarry Smith 6469371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) { 64753022affSStefano Zampini PC_H2OPUS *pch2opus; 64853022affSStefano Zampini 64953022affSStefano Zampini PetscFunctionBegin; 6509566063dSJacob Faibussowitsch PetscCall(PetscNewLog(pc, &pch2opus)); 65153022affSStefano Zampini pc->data = (void *)pch2opus; 65253022affSStefano Zampini 65353022affSStefano Zampini pch2opus->atol = 1.e-2; 65453022affSStefano Zampini pch2opus->rtol = 1.e-6; 65553022affSStefano Zampini pch2opus->maxits = 50; 656*f1580f4eSBarry Smith pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */ 65753022affSStefano Zampini pch2opus->normtype = NORM_2; 65853022affSStefano Zampini 65953022affSStefano Zampini /* these are needed when we are sampling the pmat */ 66053022affSStefano Zampini pch2opus->eta = PETSC_DECIDE; 66153022affSStefano Zampini pch2opus->leafsize = PETSC_DECIDE; 66253022affSStefano Zampini pch2opus->max_rank = PETSC_DECIDE; 66353022affSStefano Zampini pch2opus->bs = PETSC_DECIDE; 66453022affSStefano Zampini pch2opus->mrtol = PETSC_DECIDE; 66553022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 66653022affSStefano Zampini pch2opus->boundtocpu = PETSC_FALSE; 66753022affSStefano Zampini #else 66853022affSStefano Zampini pch2opus->boundtocpu = PETSC_TRUE; 66953022affSStefano Zampini #endif 67053022affSStefano Zampini pc->ops->destroy = PCDestroy_H2OPUS; 67153022affSStefano Zampini pc->ops->setup = PCSetUp_H2OPUS; 67253022affSStefano Zampini pc->ops->apply = PCApply_H2OPUS; 67353022affSStefano Zampini pc->ops->matapply = PCApplyMat_H2OPUS; 67453022affSStefano Zampini pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 67553022affSStefano Zampini pc->ops->reset = PCReset_H2OPUS; 67653022affSStefano Zampini pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 67753022affSStefano Zampini pc->ops->view = PCView_H2OPUS; 67853022affSStefano Zampini 6799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS)); 68053022affSStefano Zampini PetscFunctionReturn(0); 68153022affSStefano Zampini } 682