1*53022affSStefano Zampini #include <petsc/private/pcimpl.h> 2*53022affSStefano Zampini #include <petsc/private/matimpl.h> 3*53022affSStefano Zampini #include <h2opusconf.h> 4*53022affSStefano Zampini 5*53022affSStefano Zampini /* Use GPU only if H2OPUS is configured for GPU */ 6*53022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 7*53022affSStefano Zampini #define PETSC_H2OPUS_USE_GPU 8*53022affSStefano Zampini #endif 9*53022affSStefano Zampini 10*53022affSStefano Zampini typedef struct { 11*53022affSStefano Zampini Mat A; 12*53022affSStefano Zampini Mat M; 13*53022affSStefano Zampini PetscScalar s0; 14*53022affSStefano Zampini 15*53022affSStefano Zampini /* sampler for Newton-Schultz */ 16*53022affSStefano Zampini Mat S; 17*53022affSStefano Zampini PetscInt hyperorder; 18*53022affSStefano Zampini Vec wns[4]; 19*53022affSStefano Zampini Mat wnsmat[4]; 20*53022affSStefano Zampini 21*53022affSStefano Zampini /* convergence testing */ 22*53022affSStefano Zampini Mat T; 23*53022affSStefano Zampini Vec w; 24*53022affSStefano Zampini PetscBool testMA; 25*53022affSStefano Zampini 26*53022affSStefano Zampini /* Support for PCSetCoordinates */ 27*53022affSStefano Zampini PetscInt sdim; 28*53022affSStefano Zampini PetscInt nlocc; 29*53022affSStefano Zampini PetscReal *coords; 30*53022affSStefano Zampini 31*53022affSStefano Zampini /* Newton-Schultz customization */ 32*53022affSStefano Zampini PetscInt maxits; 33*53022affSStefano Zampini PetscReal rtol,atol; 34*53022affSStefano Zampini PetscBool monitor; 35*53022affSStefano Zampini PetscBool useapproximatenorms; 36*53022affSStefano Zampini NormType normtype; 37*53022affSStefano Zampini 38*53022affSStefano Zampini /* Used when pmat != MATH2OPUS */ 39*53022affSStefano Zampini PetscReal eta; 40*53022affSStefano Zampini PetscInt leafsize; 41*53022affSStefano Zampini PetscInt max_rank; 42*53022affSStefano Zampini PetscInt bs; 43*53022affSStefano Zampini PetscReal mrtol; 44*53022affSStefano Zampini 45*53022affSStefano Zampini PetscBool boundtocpu; 46*53022affSStefano Zampini } PC_H2OPUS; 47*53022affSStefano Zampini 48*53022affSStefano Zampini PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*); 49*53022affSStefano Zampini 50*53022affSStefano Zampini static PetscErrorCode PCReset_H2OPUS(PC pc) 51*53022affSStefano Zampini { 52*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 53*53022affSStefano Zampini PetscErrorCode ierr; 54*53022affSStefano Zampini 55*53022affSStefano Zampini PetscFunctionBegin; 56*53022affSStefano Zampini pch2opus->sdim = 0; 57*53022affSStefano Zampini pch2opus->nlocc = 0; 58*53022affSStefano Zampini ierr = PetscFree(pch2opus->coords);CHKERRQ(ierr); 59*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->A);CHKERRQ(ierr); 60*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr); 61*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->T);CHKERRQ(ierr); 62*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->w);CHKERRQ(ierr); 63*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->S);CHKERRQ(ierr); 64*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[0]);CHKERRQ(ierr); 65*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[1]);CHKERRQ(ierr); 66*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[2]);CHKERRQ(ierr); 67*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[3]);CHKERRQ(ierr); 68*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 69*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 70*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr); 71*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr); 72*53022affSStefano Zampini PetscFunctionReturn(0); 73*53022affSStefano Zampini } 74*53022affSStefano Zampini 75*53022affSStefano Zampini static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) 76*53022affSStefano Zampini { 77*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 78*53022affSStefano Zampini PetscBool reset = PETSC_TRUE; 79*53022affSStefano Zampini PetscErrorCode ierr; 80*53022affSStefano Zampini 81*53022affSStefano Zampini PetscFunctionBegin; 82*53022affSStefano Zampini if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 83*53022affSStefano Zampini ierr = PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);CHKERRQ(ierr); 84*53022affSStefano Zampini reset = (PetscBool)!reset; 85*53022affSStefano Zampini } 86*53022affSStefano Zampini ierr = MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 87*53022affSStefano Zampini if (reset) { 88*53022affSStefano Zampini ierr = PCReset_H2OPUS(pc);CHKERRQ(ierr); 89*53022affSStefano Zampini ierr = PetscMalloc1(sdim*nlocc,&pch2opus->coords);CHKERRQ(ierr); 90*53022affSStefano Zampini ierr = PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);CHKERRQ(ierr); 91*53022affSStefano Zampini pch2opus->sdim = sdim; 92*53022affSStefano Zampini pch2opus->nlocc = nlocc; 93*53022affSStefano Zampini } 94*53022affSStefano Zampini PetscFunctionReturn(0); 95*53022affSStefano Zampini } 96*53022affSStefano Zampini 97*53022affSStefano Zampini static PetscErrorCode PCDestroy_H2OPUS(PC pc) 98*53022affSStefano Zampini { 99*53022affSStefano Zampini PetscErrorCode ierr; 100*53022affSStefano Zampini 101*53022affSStefano Zampini PetscFunctionBegin; 102*53022affSStefano Zampini ierr = PCReset_H2OPUS(pc);CHKERRQ(ierr); 103*53022affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);CHKERRQ(ierr); 104*53022affSStefano Zampini ierr = PetscFree(pc->data);CHKERRQ(ierr); 105*53022affSStefano Zampini PetscFunctionReturn(0); 106*53022affSStefano Zampini } 107*53022affSStefano Zampini 108*53022affSStefano Zampini static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc) 109*53022affSStefano Zampini { 110*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 111*53022affSStefano Zampini PetscErrorCode ierr; 112*53022affSStefano Zampini 113*53022affSStefano Zampini PetscFunctionBegin; 114*53022affSStefano Zampini ierr = PetscOptionsHead(PetscOptionsObject,"H2OPUS options");CHKERRQ(ierr); 115*53022affSStefano Zampini ierr = PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);CHKERRQ(ierr); 116*53022affSStefano Zampini ierr = PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);CHKERRQ(ierr); 117*53022affSStefano Zampini ierr = PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);CHKERRQ(ierr); 118*53022affSStefano Zampini ierr = PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);CHKERRQ(ierr); 119*53022affSStefano Zampini ierr = PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);CHKERRQ(ierr); 120*53022affSStefano Zampini ierr = PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);CHKERRQ(ierr); 121*53022affSStefano Zampini ierr = PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);CHKERRQ(ierr); 122*53022affSStefano Zampini ierr = PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);CHKERRQ(ierr); 123*53022affSStefano Zampini ierr = PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);CHKERRQ(ierr); 124*53022affSStefano Zampini ierr = PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);CHKERRQ(ierr); 125*53022affSStefano Zampini ierr = PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);CHKERRQ(ierr); 126*53022affSStefano Zampini ierr = PetscOptionsTail();CHKERRQ(ierr); 127*53022affSStefano Zampini PetscFunctionReturn(0); 128*53022affSStefano Zampini } 129*53022affSStefano Zampini 130*53022affSStefano Zampini typedef struct { 131*53022affSStefano Zampini Mat A; 132*53022affSStefano Zampini Mat M; 133*53022affSStefano Zampini Vec w; 134*53022affSStefano Zampini } AAtCtx; 135*53022affSStefano Zampini 136*53022affSStefano Zampini static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) 137*53022affSStefano Zampini { 138*53022affSStefano Zampini AAtCtx *aat; 139*53022affSStefano Zampini PetscErrorCode ierr; 140*53022affSStefano Zampini 141*53022affSStefano Zampini PetscFunctionBegin; 142*53022affSStefano Zampini ierr = MatShellGetContext(A,(void*)&aat);CHKERRQ(ierr); 143*53022affSStefano Zampini /* ierr = MatMultTranspose(aat->M,x,aat->w);CHKERRQ(ierr); */ 144*53022affSStefano Zampini ierr = MatMultTranspose(aat->A,x,aat->w);CHKERRQ(ierr); 145*53022affSStefano Zampini ierr = MatMult(aat->A,aat->w,y);CHKERRQ(ierr); 146*53022affSStefano Zampini PetscFunctionReturn(0); 147*53022affSStefano Zampini } 148*53022affSStefano Zampini 149*53022affSStefano Zampini static PetscErrorCode PCH2OpusSetUpInit(PC pc) 150*53022affSStefano Zampini { 151*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 152*53022affSStefano Zampini Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 153*53022affSStefano Zampini PetscInt M,m; 154*53022affSStefano Zampini VecType vtype; 155*53022affSStefano Zampini PetscReal n; 156*53022affSStefano Zampini AAtCtx aat; 157*53022affSStefano Zampini PetscErrorCode ierr; 158*53022affSStefano Zampini 159*53022affSStefano Zampini PetscFunctionBegin; 160*53022affSStefano Zampini aat.A = A; 161*53022affSStefano Zampini aat.M = pch2opus->M; /* unused so far */ 162*53022affSStefano Zampini ierr = MatCreateVecs(A,NULL,&aat.w);CHKERRQ(ierr); 163*53022affSStefano Zampini ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 164*53022affSStefano Zampini ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 165*53022affSStefano Zampini ierr = MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);CHKERRQ(ierr); 166*53022affSStefano Zampini ierr = MatBindToCPU(AAt,pch2opus->boundtocpu);CHKERRQ(ierr); 167*53022affSStefano Zampini ierr = MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);CHKERRQ(ierr); 168*53022affSStefano Zampini ierr = MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);CHKERRQ(ierr); 169*53022affSStefano Zampini ierr = MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);CHKERRQ(ierr); 170*53022affSStefano Zampini ierr = MatGetVecType(A,&vtype);CHKERRQ(ierr); 171*53022affSStefano Zampini ierr = MatShellSetVecType(AAt,vtype);CHKERRQ(ierr); 172*53022affSStefano Zampini ierr = MatNorm(AAt,NORM_1,&n);CHKERRQ(ierr); 173*53022affSStefano Zampini pch2opus->s0 = 1./n; 174*53022affSStefano Zampini ierr = VecDestroy(&aat.w);CHKERRQ(ierr); 175*53022affSStefano Zampini ierr = MatDestroy(&AAt);CHKERRQ(ierr); 176*53022affSStefano Zampini PetscFunctionReturn(0); 177*53022affSStefano Zampini } 178*53022affSStefano Zampini 179*53022affSStefano Zampini static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) 180*53022affSStefano Zampini { 181*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 182*53022affSStefano Zampini PetscErrorCode ierr; 183*53022affSStefano Zampini 184*53022affSStefano Zampini PetscFunctionBegin; 185*53022affSStefano Zampini if (t) { 186*53022affSStefano Zampini ierr = MatMultTranspose(pch2opus->M,x,y);CHKERRQ(ierr); 187*53022affSStefano Zampini } else { 188*53022affSStefano Zampini ierr = MatMult(pch2opus->M,x,y);CHKERRQ(ierr); 189*53022affSStefano Zampini } 190*53022affSStefano Zampini PetscFunctionReturn(0); 191*53022affSStefano Zampini } 192*53022affSStefano Zampini 193*53022affSStefano Zampini static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) 194*53022affSStefano Zampini { 195*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 196*53022affSStefano Zampini PetscErrorCode ierr; 197*53022affSStefano Zampini 198*53022affSStefano Zampini PetscFunctionBegin; 199*53022affSStefano Zampini if (t) { 200*53022affSStefano Zampini ierr = MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 201*53022affSStefano Zampini } else { 202*53022affSStefano Zampini ierr = MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 203*53022affSStefano Zampini } 204*53022affSStefano Zampini PetscFunctionReturn(0); 205*53022affSStefano Zampini } 206*53022affSStefano Zampini 207*53022affSStefano Zampini static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) 208*53022affSStefano Zampini { 209*53022affSStefano Zampini PetscErrorCode ierr; 210*53022affSStefano Zampini 211*53022affSStefano Zampini PetscFunctionBegin; 212*53022affSStefano Zampini ierr = PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);CHKERRQ(ierr); 213*53022affSStefano Zampini PetscFunctionReturn(0); 214*53022affSStefano Zampini } 215*53022affSStefano Zampini 216*53022affSStefano Zampini static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) 217*53022affSStefano Zampini { 218*53022affSStefano Zampini PetscErrorCode ierr; 219*53022affSStefano Zampini 220*53022affSStefano Zampini PetscFunctionBegin; 221*53022affSStefano Zampini ierr = PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);CHKERRQ(ierr); 222*53022affSStefano Zampini PetscFunctionReturn(0); 223*53022affSStefano Zampini } 224*53022affSStefano Zampini 225*53022affSStefano Zampini static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) 226*53022affSStefano Zampini { 227*53022affSStefano Zampini PetscErrorCode ierr; 228*53022affSStefano Zampini 229*53022affSStefano Zampini PetscFunctionBegin; 230*53022affSStefano Zampini ierr = PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);CHKERRQ(ierr); 231*53022affSStefano Zampini PetscFunctionReturn(0); 232*53022affSStefano Zampini } 233*53022affSStefano Zampini 234*53022affSStefano Zampini static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) 235*53022affSStefano Zampini { 236*53022affSStefano Zampini PetscErrorCode ierr; 237*53022affSStefano Zampini 238*53022affSStefano Zampini PetscFunctionBegin; 239*53022affSStefano Zampini ierr = PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);CHKERRQ(ierr); 240*53022affSStefano Zampini PetscFunctionReturn(0); 241*53022affSStefano Zampini } 242*53022affSStefano Zampini 243*53022affSStefano Zampini /* used to test the norm of (M^-1 A - I) */ 244*53022affSStefano Zampini static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) 245*53022affSStefano Zampini { 246*53022affSStefano Zampini PC pc; 247*53022affSStefano Zampini Mat A; 248*53022affSStefano Zampini PC_H2OPUS *pch2opus; 249*53022affSStefano Zampini PetscErrorCode ierr; 250*53022affSStefano Zampini PetscBool sideleft = PETSC_TRUE; 251*53022affSStefano Zampini 252*53022affSStefano Zampini PetscFunctionBegin; 253*53022affSStefano Zampini ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 254*53022affSStefano Zampini pch2opus = (PC_H2OPUS*)pc->data; 255*53022affSStefano Zampini if (!pch2opus->w) { 256*53022affSStefano Zampini ierr = MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);CHKERRQ(ierr); 257*53022affSStefano Zampini } 258*53022affSStefano Zampini A = pch2opus->A; 259*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);CHKERRQ(ierr); 260*53022affSStefano Zampini if (t) { 261*53022affSStefano Zampini if (sideleft) { 262*53022affSStefano Zampini ierr = PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);CHKERRQ(ierr); 263*53022affSStefano Zampini ierr = MatMultTranspose(A,pch2opus->w,y);CHKERRQ(ierr); 264*53022affSStefano Zampini } else { 265*53022affSStefano Zampini ierr = MatMultTranspose(A,x,pch2opus->w);CHKERRQ(ierr); 266*53022affSStefano Zampini ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);CHKERRQ(ierr); 267*53022affSStefano Zampini } 268*53022affSStefano Zampini } else { 269*53022affSStefano Zampini if (sideleft) { 270*53022affSStefano Zampini ierr = MatMult(A,x,pch2opus->w);CHKERRQ(ierr); 271*53022affSStefano Zampini ierr = PCApply_H2OPUS(pc,pch2opus->w,y);CHKERRQ(ierr); 272*53022affSStefano Zampini } else { 273*53022affSStefano Zampini ierr = PCApply_H2OPUS(pc,x,pch2opus->w);CHKERRQ(ierr); 274*53022affSStefano Zampini ierr = MatMult(A,pch2opus->w,y);CHKERRQ(ierr); 275*53022affSStefano Zampini } 276*53022affSStefano Zampini } 277*53022affSStefano Zampini if (!pch2opus->testMA) { 278*53022affSStefano Zampini ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 279*53022affSStefano Zampini } 280*53022affSStefano Zampini PetscFunctionReturn(0); 281*53022affSStefano Zampini } 282*53022affSStefano Zampini 283*53022affSStefano Zampini static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) 284*53022affSStefano Zampini { 285*53022affSStefano Zampini PetscErrorCode ierr; 286*53022affSStefano Zampini 287*53022affSStefano Zampini PetscFunctionBegin; 288*53022affSStefano Zampini ierr = MatMultKernel_MAmI(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 289*53022affSStefano Zampini PetscFunctionReturn(0); 290*53022affSStefano Zampini } 291*53022affSStefano Zampini 292*53022affSStefano Zampini static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) 293*53022affSStefano Zampini { 294*53022affSStefano Zampini PetscErrorCode ierr; 295*53022affSStefano Zampini 296*53022affSStefano Zampini PetscFunctionBegin; 297*53022affSStefano Zampini ierr = MatMultKernel_MAmI(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 298*53022affSStefano Zampini PetscFunctionReturn(0); 299*53022affSStefano Zampini } 300*53022affSStefano Zampini 301*53022affSStefano Zampini /* HyperPower kernel: 302*53022affSStefano Zampini Y = R = x 303*53022affSStefano Zampini for i = 1 . . . l - 1 do 304*53022affSStefano Zampini R = (I - A * Xk) * R 305*53022affSStefano Zampini Y = Y + R 306*53022affSStefano Zampini Y = Xk * Y 307*53022affSStefano Zampini */ 308*53022affSStefano Zampini static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) 309*53022affSStefano Zampini { 310*53022affSStefano Zampini PC pc; 311*53022affSStefano Zampini Mat A; 312*53022affSStefano Zampini PC_H2OPUS *pch2opus; 313*53022affSStefano Zampini PetscInt i; 314*53022affSStefano Zampini PetscErrorCode ierr; 315*53022affSStefano Zampini 316*53022affSStefano Zampini PetscFunctionBegin; 317*53022affSStefano Zampini ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 318*53022affSStefano Zampini pch2opus = (PC_H2OPUS*)pc->data; 319*53022affSStefano Zampini A = pch2opus->A; 320*53022affSStefano Zampini ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);CHKERRQ(ierr); 321*53022affSStefano Zampini ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);CHKERRQ(ierr); 322*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);CHKERRQ(ierr); 323*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);CHKERRQ(ierr); 324*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);CHKERRQ(ierr); 325*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);CHKERRQ(ierr); 326*53022affSStefano Zampini ierr = VecCopy(x,pch2opus->wns[0]);CHKERRQ(ierr); 327*53022affSStefano Zampini ierr = VecCopy(x,pch2opus->wns[3]);CHKERRQ(ierr); 328*53022affSStefano Zampini if (t) { 329*53022affSStefano Zampini for (i=0;i<pch2opus->hyperorder-1;i++) { 330*53022affSStefano Zampini ierr = MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr); 331*53022affSStefano Zampini ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);CHKERRQ(ierr); 332*53022affSStefano Zampini ierr = VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);CHKERRQ(ierr); 333*53022affSStefano Zampini ierr = VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);CHKERRQ(ierr); 334*53022affSStefano Zampini } 335*53022affSStefano Zampini ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);CHKERRQ(ierr); 336*53022affSStefano Zampini } else { 337*53022affSStefano Zampini for (i=0;i<pch2opus->hyperorder-1;i++) { 338*53022affSStefano Zampini ierr = PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr); 339*53022affSStefano Zampini ierr = MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);CHKERRQ(ierr); 340*53022affSStefano Zampini ierr = VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);CHKERRQ(ierr); 341*53022affSStefano Zampini ierr = VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);CHKERRQ(ierr); 342*53022affSStefano Zampini } 343*53022affSStefano Zampini ierr = PCApply_H2OPUS(pc,pch2opus->wns[3],y);CHKERRQ(ierr); 344*53022affSStefano Zampini } 345*53022affSStefano Zampini PetscFunctionReturn(0); 346*53022affSStefano Zampini } 347*53022affSStefano Zampini 348*53022affSStefano Zampini static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) 349*53022affSStefano Zampini { 350*53022affSStefano Zampini PetscErrorCode ierr; 351*53022affSStefano Zampini 352*53022affSStefano Zampini PetscFunctionBegin; 353*53022affSStefano Zampini ierr = MatMultKernel_Hyper(M,x,y,PETSC_FALSE);CHKERRQ(ierr); 354*53022affSStefano Zampini PetscFunctionReturn(0); 355*53022affSStefano Zampini } 356*53022affSStefano Zampini 357*53022affSStefano Zampini static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) 358*53022affSStefano Zampini { 359*53022affSStefano Zampini PetscErrorCode ierr; 360*53022affSStefano Zampini 361*53022affSStefano Zampini PetscFunctionBegin; 362*53022affSStefano Zampini ierr = MatMultKernel_Hyper(M,x,y,PETSC_TRUE);CHKERRQ(ierr); 363*53022affSStefano Zampini PetscFunctionReturn(0); 364*53022affSStefano Zampini } 365*53022affSStefano Zampini 366*53022affSStefano Zampini /* Hyper power kernel, MatMat version */ 367*53022affSStefano Zampini static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) 368*53022affSStefano Zampini { 369*53022affSStefano Zampini PC pc; 370*53022affSStefano Zampini Mat A; 371*53022affSStefano Zampini PC_H2OPUS *pch2opus; 372*53022affSStefano Zampini PetscInt i; 373*53022affSStefano Zampini PetscErrorCode ierr; 374*53022affSStefano Zampini 375*53022affSStefano Zampini PetscFunctionBegin; 376*53022affSStefano Zampini ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 377*53022affSStefano Zampini pch2opus = (PC_H2OPUS*)pc->data; 378*53022affSStefano Zampini A = pch2opus->A; 379*53022affSStefano Zampini if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 380*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 381*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 382*53022affSStefano Zampini } 383*53022affSStefano Zampini if (!pch2opus->wnsmat[0]) { 384*53022affSStefano Zampini ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);CHKERRQ(ierr); 385*53022affSStefano Zampini ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 386*53022affSStefano Zampini } 387*53022affSStefano Zampini if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 388*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr); 389*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr); 390*53022affSStefano Zampini } 391*53022affSStefano Zampini if (!pch2opus->wnsmat[2]) { 392*53022affSStefano Zampini ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);CHKERRQ(ierr); 393*53022affSStefano Zampini ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);CHKERRQ(ierr); 394*53022affSStefano Zampini } 395*53022affSStefano Zampini ierr = MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 396*53022affSStefano Zampini ierr = MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 397*53022affSStefano Zampini if (t) { 398*53022affSStefano Zampini for (i=0;i<pch2opus->hyperorder-1;i++) { 399*53022affSStefano Zampini ierr = MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 400*53022affSStefano Zampini ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);CHKERRQ(ierr); 401*53022affSStefano Zampini ierr = MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 402*53022affSStefano Zampini ierr = MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 403*53022affSStefano Zampini } 404*53022affSStefano Zampini ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);CHKERRQ(ierr); 405*53022affSStefano Zampini } else { 406*53022affSStefano Zampini for (i=0;i<pch2opus->hyperorder-1;i++) { 407*53022affSStefano Zampini ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);CHKERRQ(ierr); 408*53022affSStefano Zampini ierr = MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);CHKERRQ(ierr); 409*53022affSStefano Zampini ierr = MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 410*53022affSStefano Zampini ierr = MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 411*53022affSStefano Zampini } 412*53022affSStefano Zampini ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);CHKERRQ(ierr); 413*53022affSStefano Zampini } 414*53022affSStefano Zampini PetscFunctionReturn(0); 415*53022affSStefano Zampini } 416*53022affSStefano Zampini 417*53022affSStefano Zampini static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx) 418*53022affSStefano Zampini { 419*53022affSStefano Zampini PetscErrorCode ierr; 420*53022affSStefano Zampini 421*53022affSStefano Zampini PetscFunctionBegin; 422*53022affSStefano Zampini ierr = MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);CHKERRQ(ierr); 423*53022affSStefano Zampini PetscFunctionReturn(0); 424*53022affSStefano Zampini } 425*53022affSStefano Zampini 426*53022affSStefano Zampini /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 427*53022affSStefano Zampini static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) 428*53022affSStefano Zampini { 429*53022affSStefano Zampini PC pc; 430*53022affSStefano Zampini Mat A; 431*53022affSStefano Zampini PC_H2OPUS *pch2opus; 432*53022affSStefano Zampini PetscErrorCode ierr; 433*53022affSStefano Zampini 434*53022affSStefano Zampini PetscFunctionBegin; 435*53022affSStefano Zampini ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 436*53022affSStefano Zampini pch2opus = (PC_H2OPUS*)pc->data; 437*53022affSStefano Zampini A = pch2opus->A; 438*53022affSStefano Zampini ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);CHKERRQ(ierr); 439*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);CHKERRQ(ierr); 440*53022affSStefano Zampini ierr = VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);CHKERRQ(ierr); 441*53022affSStefano Zampini if (t) { 442*53022affSStefano Zampini ierr = PCApplyTranspose_H2OPUS(pc,x,y);CHKERRQ(ierr); 443*53022affSStefano Zampini ierr = MatMultTranspose(A,y,pch2opus->wns[1]);CHKERRQ(ierr); 444*53022affSStefano Zampini ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);CHKERRQ(ierr); 445*53022affSStefano Zampini ierr = VecAXPBY(y,-1.,2.,pch2opus->wns[0]);CHKERRQ(ierr); 446*53022affSStefano Zampini } else { 447*53022affSStefano Zampini ierr = PCApply_H2OPUS(pc,x,y);CHKERRQ(ierr); 448*53022affSStefano Zampini ierr = MatMult(A,y,pch2opus->wns[0]);CHKERRQ(ierr); 449*53022affSStefano Zampini ierr = PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr); 450*53022affSStefano Zampini ierr = VecAXPBY(y,-1.,2.,pch2opus->wns[1]);CHKERRQ(ierr); 451*53022affSStefano Zampini } 452*53022affSStefano Zampini PetscFunctionReturn(0); 453*53022affSStefano Zampini } 454*53022affSStefano Zampini 455*53022affSStefano Zampini static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) 456*53022affSStefano Zampini { 457*53022affSStefano Zampini PetscErrorCode ierr; 458*53022affSStefano Zampini 459*53022affSStefano Zampini PetscFunctionBegin; 460*53022affSStefano Zampini ierr = MatMultKernel_NS(M,x,y,PETSC_FALSE);CHKERRQ(ierr); 461*53022affSStefano Zampini PetscFunctionReturn(0); 462*53022affSStefano Zampini } 463*53022affSStefano Zampini 464*53022affSStefano Zampini static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) 465*53022affSStefano Zampini { 466*53022affSStefano Zampini PetscErrorCode ierr; 467*53022affSStefano Zampini 468*53022affSStefano Zampini PetscFunctionBegin; 469*53022affSStefano Zampini ierr = MatMultKernel_NS(M,x,y,PETSC_TRUE);CHKERRQ(ierr); 470*53022affSStefano Zampini PetscFunctionReturn(0); 471*53022affSStefano Zampini } 472*53022affSStefano Zampini 473*53022affSStefano Zampini /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 474*53022affSStefano Zampini static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) 475*53022affSStefano Zampini { 476*53022affSStefano Zampini PC pc; 477*53022affSStefano Zampini Mat A; 478*53022affSStefano Zampini PC_H2OPUS *pch2opus; 479*53022affSStefano Zampini PetscErrorCode ierr; 480*53022affSStefano Zampini 481*53022affSStefano Zampini PetscFunctionBegin; 482*53022affSStefano Zampini ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 483*53022affSStefano Zampini pch2opus = (PC_H2OPUS*)pc->data; 484*53022affSStefano Zampini A = pch2opus->A; 485*53022affSStefano Zampini if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 486*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 487*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 488*53022affSStefano Zampini } 489*53022affSStefano Zampini if (!pch2opus->wnsmat[0]) { 490*53022affSStefano Zampini ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);CHKERRQ(ierr); 491*53022affSStefano Zampini ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 492*53022affSStefano Zampini } 493*53022affSStefano Zampini if (t) { 494*53022affSStefano Zampini ierr = PCApplyTransposeMat_H2OPUS(pc,X,Y);CHKERRQ(ierr); 495*53022affSStefano Zampini ierr = MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 496*53022affSStefano Zampini ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);CHKERRQ(ierr); 497*53022affSStefano Zampini ierr = MatScale(Y,2.);CHKERRQ(ierr); 498*53022affSStefano Zampini ierr = MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 499*53022affSStefano Zampini } else { 500*53022affSStefano Zampini ierr = PCApplyMat_H2OPUS(pc,X,Y);CHKERRQ(ierr); 501*53022affSStefano Zampini ierr = MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);CHKERRQ(ierr); 502*53022affSStefano Zampini ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);CHKERRQ(ierr); 503*53022affSStefano Zampini ierr = MatScale(Y,2.);CHKERRQ(ierr); 504*53022affSStefano Zampini ierr = MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 505*53022affSStefano Zampini } 506*53022affSStefano Zampini PetscFunctionReturn(0); 507*53022affSStefano Zampini } 508*53022affSStefano Zampini 509*53022affSStefano Zampini static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx) 510*53022affSStefano Zampini { 511*53022affSStefano Zampini PetscErrorCode ierr; 512*53022affSStefano Zampini 513*53022affSStefano Zampini PetscFunctionBegin; 514*53022affSStefano Zampini ierr = MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);CHKERRQ(ierr); 515*53022affSStefano Zampini PetscFunctionReturn(0); 516*53022affSStefano Zampini } 517*53022affSStefano Zampini 518*53022affSStefano Zampini static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) 519*53022affSStefano Zampini { 520*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 521*53022affSStefano Zampini Mat A = pc->useAmat ? pc->mat : pc->pmat; 522*53022affSStefano Zampini PetscErrorCode ierr; 523*53022affSStefano Zampini 524*53022affSStefano Zampini PetscFunctionBegin; 525*53022affSStefano Zampini if (!pch2opus->S) { 526*53022affSStefano Zampini PetscInt M,N,m,n; 527*53022affSStefano Zampini 528*53022affSStefano Zampini ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 529*53022affSStefano Zampini ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 530*53022affSStefano Zampini ierr = MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);CHKERRQ(ierr); 531*53022affSStefano Zampini ierr = MatSetBlockSizesFromMats(pch2opus->S,A,A);CHKERRQ(ierr); 532*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 533*53022affSStefano Zampini ierr = MatShellSetVecType(pch2opus->S,VECCUDA);CHKERRQ(ierr); 534*53022affSStefano Zampini #endif 535*53022affSStefano Zampini } 536*53022affSStefano Zampini if (pch2opus->hyperorder >= 2) { 537*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);CHKERRQ(ierr); 538*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);CHKERRQ(ierr); 539*53022affSStefano Zampini ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);CHKERRQ(ierr); 540*53022affSStefano Zampini ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);CHKERRQ(ierr); 541*53022affSStefano Zampini } else { 542*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);CHKERRQ(ierr); 543*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);CHKERRQ(ierr); 544*53022affSStefano Zampini ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);CHKERRQ(ierr); 545*53022affSStefano Zampini ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);CHKERRQ(ierr); 546*53022affSStefano Zampini } 547*53022affSStefano Zampini ierr = MatPropagateSymmetryOptions(A,pch2opus->S);CHKERRQ(ierr); 548*53022affSStefano Zampini ierr = MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);CHKERRQ(ierr); 549*53022affSStefano Zampini /* XXX */ 550*53022affSStefano Zampini ierr = MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 551*53022affSStefano Zampini PetscFunctionReturn(0); 552*53022affSStefano Zampini } 553*53022affSStefano Zampini 554*53022affSStefano Zampini static PetscErrorCode PCSetUp_H2OPUS(PC pc) 555*53022affSStefano Zampini { 556*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 557*53022affSStefano Zampini Mat A = pc->useAmat ? pc->mat : pc->pmat; 558*53022affSStefano Zampini NormType norm = pch2opus->normtype; 559*53022affSStefano Zampini PetscReal initerr = 0.0,err; 560*53022affSStefano Zampini PetscReal initerrMA = 0.0,errMA; 561*53022affSStefano Zampini PetscBool ish2opus; 562*53022affSStefano Zampini PetscErrorCode ierr; 563*53022affSStefano Zampini 564*53022affSStefano Zampini PetscFunctionBegin; 565*53022affSStefano Zampini if (!pch2opus->T) { 566*53022affSStefano Zampini PetscInt M,N,m,n; 567*53022affSStefano Zampini const char *prefix; 568*53022affSStefano Zampini 569*53022affSStefano Zampini ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 570*53022affSStefano Zampini ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 571*53022affSStefano Zampini ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 572*53022affSStefano Zampini ierr = MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);CHKERRQ(ierr); 573*53022affSStefano Zampini ierr = MatSetBlockSizesFromMats(pch2opus->T,A,A);CHKERRQ(ierr); 574*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);CHKERRQ(ierr); 575*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);CHKERRQ(ierr); 576*53022affSStefano Zampini ierr = MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);CHKERRQ(ierr); 577*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 578*53022affSStefano Zampini ierr = MatShellSetVecType(pch2opus->T,VECCUDA);CHKERRQ(ierr); 579*53022affSStefano Zampini #endif 580*53022affSStefano Zampini ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);CHKERRQ(ierr); 581*53022affSStefano Zampini ierr = MatSetOptionsPrefix(pch2opus->T,prefix);CHKERRQ(ierr); 582*53022affSStefano Zampini ierr = MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");CHKERRQ(ierr); 583*53022affSStefano Zampini } 584*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->A);CHKERRQ(ierr); 585*53022affSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);CHKERRQ(ierr); 586*53022affSStefano Zampini if (ish2opus) { 587*53022affSStefano Zampini ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 588*53022affSStefano Zampini pch2opus->A = A; 589*53022affSStefano Zampini } else { 590*53022affSStefano Zampini const char *prefix; 591*53022affSStefano Zampini ierr = MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);CHKERRQ(ierr); 592*53022affSStefano Zampini /* XXX */ 593*53022affSStefano Zampini ierr = MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 594*53022affSStefano Zampini ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 595*53022affSStefano Zampini ierr = MatSetOptionsPrefix(pch2opus->A,prefix);CHKERRQ(ierr); 596*53022affSStefano Zampini ierr = MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");CHKERRQ(ierr); 597*53022affSStefano Zampini ierr = MatSetFromOptions(pch2opus->A);CHKERRQ(ierr); 598*53022affSStefano Zampini ierr = MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 599*53022affSStefano Zampini ierr = MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 600*53022affSStefano Zampini /* XXX */ 601*53022affSStefano Zampini ierr = MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 602*53022affSStefano Zampini } 603*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 604*53022affSStefano Zampini pch2opus->boundtocpu = pch2opus->A->boundtocpu; 605*53022affSStefano Zampini #endif 606*53022affSStefano Zampini ierr = MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);CHKERRQ(ierr); 607*53022affSStefano Zampini if (pch2opus->M) { /* see if we can reuse M as initial guess */ 608*53022affSStefano Zampini PetscReal reuse; 609*53022affSStefano Zampini 610*53022affSStefano Zampini ierr = MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);CHKERRQ(ierr); 611*53022affSStefano Zampini ierr = MatNorm(pch2opus->T,norm,&reuse);CHKERRQ(ierr); 612*53022affSStefano Zampini if (reuse >= 1.0) { 613*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr); 614*53022affSStefano Zampini } 615*53022affSStefano Zampini } 616*53022affSStefano Zampini if (!pch2opus->M) { 617*53022affSStefano Zampini const char *prefix; 618*53022affSStefano Zampini ierr = MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);CHKERRQ(ierr); 619*53022affSStefano Zampini ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 620*53022affSStefano Zampini ierr = MatSetOptionsPrefix(pch2opus->M,prefix);CHKERRQ(ierr); 621*53022affSStefano Zampini ierr = MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");CHKERRQ(ierr); 622*53022affSStefano Zampini ierr = MatSetFromOptions(pch2opus->M);CHKERRQ(ierr); 623*53022affSStefano Zampini ierr = PCH2OpusSetUpInit(pc);CHKERRQ(ierr); 624*53022affSStefano Zampini ierr = MatScale(pch2opus->M,pch2opus->s0);CHKERRQ(ierr); 625*53022affSStefano Zampini } 626*53022affSStefano Zampini /* A and M have the same h2 matrix structure, save on reordering routines */ 627*53022affSStefano Zampini ierr = MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);CHKERRQ(ierr); 628*53022affSStefano Zampini ierr = MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);CHKERRQ(ierr); 629*53022affSStefano Zampini if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) { 630*53022affSStefano Zampini ierr = MatNorm(pch2opus->T,norm,&initerr);CHKERRQ(ierr); 631*53022affSStefano Zampini pch2opus->testMA = PETSC_TRUE; 632*53022affSStefano Zampini ierr = MatNorm(pch2opus->T,norm,&initerrMA);CHKERRQ(ierr); 633*53022affSStefano Zampini pch2opus->testMA = PETSC_FALSE; 634*53022affSStefano Zampini } 635*53022affSStefano Zampini if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 636*53022affSStefano Zampini err = initerr; 637*53022affSStefano Zampini errMA = initerrMA; 638*53022affSStefano Zampini if (pch2opus->monitor) { 639*53022affSStefano Zampini ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));CHKERRQ(ierr); 640*53022affSStefano Zampini ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));CHKERRQ(ierr); 641*53022affSStefano Zampini } 642*53022affSStefano Zampini if (initerr > pch2opus->atol && !pc->failedreason) { 643*53022affSStefano Zampini PetscInt i; 644*53022affSStefano Zampini 645*53022affSStefano Zampini ierr = PCH2OpusSetUpSampler_Private(pc);CHKERRQ(ierr); 646*53022affSStefano Zampini for (i = 0; i < pch2opus->maxits; i++) { 647*53022affSStefano Zampini Mat M; 648*53022affSStefano Zampini const char* prefix; 649*53022affSStefano Zampini 650*53022affSStefano Zampini ierr = MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);CHKERRQ(ierr); 651*53022affSStefano Zampini ierr = MatGetOptionsPrefix(pch2opus->M,&prefix);CHKERRQ(ierr); 652*53022affSStefano Zampini ierr = MatSetOptionsPrefix(M,prefix);CHKERRQ(ierr); 653*53022affSStefano Zampini ierr = MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 654*53022affSStefano Zampini ierr = MatSetFromOptions(M);CHKERRQ(ierr); 655*53022affSStefano Zampini ierr = MatH2OpusSetNativeMult(M,PETSC_TRUE);CHKERRQ(ierr); 656*53022affSStefano Zampini ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 657*53022affSStefano Zampini ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 658*53022affSStefano Zampini 659*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr); 660*53022affSStefano Zampini pch2opus->M = M; 661*53022affSStefano Zampini if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) { 662*53022affSStefano Zampini ierr = MatNorm(pch2opus->T,norm,&err);CHKERRQ(ierr); 663*53022affSStefano Zampini pch2opus->testMA = PETSC_TRUE; 664*53022affSStefano Zampini ierr = MatNorm(pch2opus->T,norm,&errMA);CHKERRQ(ierr); 665*53022affSStefano Zampini pch2opus->testMA = PETSC_FALSE; 666*53022affSStefano Zampini } 667*53022affSStefano Zampini if (pch2opus->monitor) { 668*53022affSStefano Zampini ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));CHKERRQ(ierr); 669*53022affSStefano Zampini ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));CHKERRQ(ierr); 670*53022affSStefano Zampini } 671*53022affSStefano Zampini if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 672*53022affSStefano Zampini if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break; 673*53022affSStefano Zampini } 674*53022affSStefano Zampini } 675*53022affSStefano Zampini /* cleanup setup workspace */ 676*53022affSStefano Zampini ierr = MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);CHKERRQ(ierr); 677*53022affSStefano Zampini ierr = MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);CHKERRQ(ierr); 678*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[0]);CHKERRQ(ierr); 679*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[1]);CHKERRQ(ierr); 680*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[2]);CHKERRQ(ierr); 681*53022affSStefano Zampini ierr = VecDestroy(&pch2opus->wns[3]);CHKERRQ(ierr); 682*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 683*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 684*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr); 685*53022affSStefano Zampini ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr); 686*53022affSStefano Zampini PetscFunctionReturn(0); 687*53022affSStefano Zampini } 688*53022affSStefano Zampini 689*53022affSStefano Zampini static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) 690*53022affSStefano Zampini { 691*53022affSStefano Zampini PetscErrorCode ierr; 692*53022affSStefano Zampini PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 693*53022affSStefano Zampini PetscBool isascii; 694*53022affSStefano Zampini 695*53022affSStefano Zampini PetscFunctionBegin; 696*53022affSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 697*53022affSStefano Zampini if (isascii) { 698*53022affSStefano Zampini if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 699*53022affSStefano Zampini ierr = PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");CHKERRQ(ierr); 700*53022affSStefano Zampini ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 701*53022affSStefano Zampini ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 702*53022affSStefano Zampini ierr = MatView(pch2opus->A,viewer);CHKERRQ(ierr); 703*53022affSStefano Zampini ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 704*53022affSStefano Zampini ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 705*53022affSStefano Zampini } 706*53022affSStefano Zampini if (pch2opus->M) { 707*53022affSStefano Zampini ierr = PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");CHKERRQ(ierr); 708*53022affSStefano Zampini ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 709*53022affSStefano Zampini ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 710*53022affSStefano Zampini ierr = MatView(pch2opus->M,viewer);CHKERRQ(ierr); 711*53022affSStefano Zampini ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 712*53022affSStefano Zampini ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 713*53022affSStefano Zampini } 714*53022affSStefano Zampini ierr = PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);CHKERRQ(ierr); 715*53022affSStefano Zampini } 716*53022affSStefano Zampini PetscFunctionReturn(0); 717*53022affSStefano Zampini } 718*53022affSStefano Zampini 719*53022affSStefano Zampini PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) 720*53022affSStefano Zampini { 721*53022affSStefano Zampini PetscErrorCode ierr; 722*53022affSStefano Zampini PC_H2OPUS *pch2opus; 723*53022affSStefano Zampini 724*53022affSStefano Zampini PetscFunctionBegin; 725*53022affSStefano Zampini ierr = PetscNewLog(pc,&pch2opus);CHKERRQ(ierr); 726*53022affSStefano Zampini pc->data = (void*)pch2opus; 727*53022affSStefano Zampini 728*53022affSStefano Zampini pch2opus->atol = 1.e-2; 729*53022affSStefano Zampini pch2opus->rtol = 1.e-6; 730*53022affSStefano Zampini pch2opus->maxits = 50; 731*53022affSStefano Zampini pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */ 732*53022affSStefano Zampini pch2opus->normtype = NORM_2; 733*53022affSStefano Zampini 734*53022affSStefano Zampini /* these are needed when we are sampling the pmat */ 735*53022affSStefano Zampini pch2opus->eta = PETSC_DECIDE; 736*53022affSStefano Zampini pch2opus->leafsize = PETSC_DECIDE; 737*53022affSStefano Zampini pch2opus->max_rank = PETSC_DECIDE; 738*53022affSStefano Zampini pch2opus->bs = PETSC_DECIDE; 739*53022affSStefano Zampini pch2opus->mrtol = PETSC_DECIDE; 740*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU) 741*53022affSStefano Zampini pch2opus->boundtocpu = PETSC_FALSE; 742*53022affSStefano Zampini #else 743*53022affSStefano Zampini pch2opus->boundtocpu = PETSC_TRUE; 744*53022affSStefano Zampini #endif 745*53022affSStefano Zampini pc->ops->destroy = PCDestroy_H2OPUS; 746*53022affSStefano Zampini pc->ops->setup = PCSetUp_H2OPUS; 747*53022affSStefano Zampini pc->ops->apply = PCApply_H2OPUS; 748*53022affSStefano Zampini pc->ops->matapply = PCApplyMat_H2OPUS; 749*53022affSStefano Zampini pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 750*53022affSStefano Zampini pc->ops->reset = PCReset_H2OPUS; 751*53022affSStefano Zampini pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 752*53022affSStefano Zampini pc->ops->view = PCView_H2OPUS; 753*53022affSStefano Zampini 754*53022affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);CHKERRQ(ierr); 755*53022affSStefano Zampini PetscFunctionReturn(0); 756*53022affSStefano Zampini } 757