xref: /petsc/src/ksp/pc/impls/h2opus/pch2opus.c (revision 53022affac82b2fcec7b6432d0d3b2c8aa0487f8)
1*53022affSStefano Zampini #include <petsc/private/pcimpl.h>
2*53022affSStefano Zampini #include <petsc/private/matimpl.h>
3*53022affSStefano Zampini #include <h2opusconf.h>
4*53022affSStefano Zampini 
5*53022affSStefano Zampini /* Use GPU only if H2OPUS is configured for GPU */
6*53022affSStefano Zampini #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7*53022affSStefano Zampini #define PETSC_H2OPUS_USE_GPU
8*53022affSStefano Zampini #endif
9*53022affSStefano Zampini 
10*53022affSStefano Zampini typedef struct {
11*53022affSStefano Zampini   Mat         A;
12*53022affSStefano Zampini   Mat         M;
13*53022affSStefano Zampini   PetscScalar s0;
14*53022affSStefano Zampini 
15*53022affSStefano Zampini   /* sampler for Newton-Schultz */
16*53022affSStefano Zampini   Mat      S;
17*53022affSStefano Zampini   PetscInt hyperorder;
18*53022affSStefano Zampini   Vec      wns[4];
19*53022affSStefano Zampini   Mat      wnsmat[4];
20*53022affSStefano Zampini 
21*53022affSStefano Zampini   /* convergence testing */
22*53022affSStefano Zampini   Mat       T;
23*53022affSStefano Zampini   Vec       w;
24*53022affSStefano Zampini   PetscBool testMA;
25*53022affSStefano Zampini 
26*53022affSStefano Zampini   /* Support for PCSetCoordinates */
27*53022affSStefano Zampini   PetscInt  sdim;
28*53022affSStefano Zampini   PetscInt  nlocc;
29*53022affSStefano Zampini   PetscReal *coords;
30*53022affSStefano Zampini 
31*53022affSStefano Zampini   /* Newton-Schultz customization */
32*53022affSStefano Zampini   PetscInt  maxits;
33*53022affSStefano Zampini   PetscReal rtol,atol;
34*53022affSStefano Zampini   PetscBool monitor;
35*53022affSStefano Zampini   PetscBool useapproximatenorms;
36*53022affSStefano Zampini   NormType  normtype;
37*53022affSStefano Zampini 
38*53022affSStefano Zampini   /* Used when pmat != MATH2OPUS */
39*53022affSStefano Zampini   PetscReal eta;
40*53022affSStefano Zampini   PetscInt  leafsize;
41*53022affSStefano Zampini   PetscInt  max_rank;
42*53022affSStefano Zampini   PetscInt  bs;
43*53022affSStefano Zampini   PetscReal mrtol;
44*53022affSStefano Zampini 
45*53022affSStefano Zampini   PetscBool boundtocpu;
46*53022affSStefano Zampini } PC_H2OPUS;
47*53022affSStefano Zampini 
48*53022affSStefano Zampini PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);
49*53022affSStefano Zampini 
50*53022affSStefano Zampini static PetscErrorCode PCReset_H2OPUS(PC pc)
51*53022affSStefano Zampini {
52*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
53*53022affSStefano Zampini   PetscErrorCode ierr;
54*53022affSStefano Zampini 
55*53022affSStefano Zampini   PetscFunctionBegin;
56*53022affSStefano Zampini   pch2opus->sdim  = 0;
57*53022affSStefano Zampini   pch2opus->nlocc = 0;
58*53022affSStefano Zampini   ierr = PetscFree(pch2opus->coords);CHKERRQ(ierr);
59*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->A);CHKERRQ(ierr);
60*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr);
61*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->T);CHKERRQ(ierr);
62*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->w);CHKERRQ(ierr);
63*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->S);CHKERRQ(ierr);
64*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[0]);CHKERRQ(ierr);
65*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[1]);CHKERRQ(ierr);
66*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[2]);CHKERRQ(ierr);
67*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[3]);CHKERRQ(ierr);
68*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr);
69*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr);
70*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr);
71*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr);
72*53022affSStefano Zampini   PetscFunctionReturn(0);
73*53022affSStefano Zampini }
74*53022affSStefano Zampini 
75*53022affSStefano Zampini static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
76*53022affSStefano Zampini {
77*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
78*53022affSStefano Zampini   PetscBool      reset = PETSC_TRUE;
79*53022affSStefano Zampini   PetscErrorCode ierr;
80*53022affSStefano Zampini 
81*53022affSStefano Zampini   PetscFunctionBegin;
82*53022affSStefano Zampini   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
83*53022affSStefano Zampini     ierr  = PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);CHKERRQ(ierr);
84*53022affSStefano Zampini     reset = (PetscBool)!reset;
85*53022affSStefano Zampini   }
86*53022affSStefano Zampini   ierr = MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
87*53022affSStefano Zampini   if (reset) {
88*53022affSStefano Zampini     ierr = PCReset_H2OPUS(pc);CHKERRQ(ierr);
89*53022affSStefano Zampini     ierr = PetscMalloc1(sdim*nlocc,&pch2opus->coords);CHKERRQ(ierr);
90*53022affSStefano Zampini     ierr = PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);CHKERRQ(ierr);
91*53022affSStefano Zampini     pch2opus->sdim  = sdim;
92*53022affSStefano Zampini     pch2opus->nlocc = nlocc;
93*53022affSStefano Zampini   }
94*53022affSStefano Zampini   PetscFunctionReturn(0);
95*53022affSStefano Zampini }
96*53022affSStefano Zampini 
97*53022affSStefano Zampini static PetscErrorCode PCDestroy_H2OPUS(PC pc)
98*53022affSStefano Zampini {
99*53022affSStefano Zampini   PetscErrorCode ierr;
100*53022affSStefano Zampini 
101*53022affSStefano Zampini   PetscFunctionBegin;
102*53022affSStefano Zampini   ierr = PCReset_H2OPUS(pc);CHKERRQ(ierr);
103*53022affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);CHKERRQ(ierr);
104*53022affSStefano Zampini   ierr = PetscFree(pc->data);CHKERRQ(ierr);
105*53022affSStefano Zampini   PetscFunctionReturn(0);
106*53022affSStefano Zampini }
107*53022affSStefano Zampini 
108*53022affSStefano Zampini static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
109*53022affSStefano Zampini {
110*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
111*53022affSStefano Zampini   PetscErrorCode ierr;
112*53022affSStefano Zampini 
113*53022affSStefano Zampini   PetscFunctionBegin;
114*53022affSStefano Zampini   ierr = PetscOptionsHead(PetscOptionsObject,"H2OPUS options");CHKERRQ(ierr);
115*53022affSStefano Zampini   ierr = PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);CHKERRQ(ierr);
116*53022affSStefano Zampini   ierr = PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);CHKERRQ(ierr);
117*53022affSStefano Zampini   ierr = PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);CHKERRQ(ierr);
118*53022affSStefano Zampini   ierr = PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);CHKERRQ(ierr);
119*53022affSStefano Zampini   ierr = PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);CHKERRQ(ierr);
120*53022affSStefano Zampini   ierr = PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);CHKERRQ(ierr);
121*53022affSStefano Zampini   ierr = PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);CHKERRQ(ierr);
122*53022affSStefano Zampini   ierr = PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);CHKERRQ(ierr);
123*53022affSStefano Zampini   ierr = PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);CHKERRQ(ierr);
124*53022affSStefano Zampini   ierr = PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);CHKERRQ(ierr);
125*53022affSStefano Zampini   ierr = PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);CHKERRQ(ierr);
126*53022affSStefano Zampini   ierr = PetscOptionsTail();CHKERRQ(ierr);
127*53022affSStefano Zampini   PetscFunctionReturn(0);
128*53022affSStefano Zampini }
129*53022affSStefano Zampini 
130*53022affSStefano Zampini typedef struct {
131*53022affSStefano Zampini   Mat A;
132*53022affSStefano Zampini   Mat M;
133*53022affSStefano Zampini   Vec w;
134*53022affSStefano Zampini } AAtCtx;
135*53022affSStefano Zampini 
136*53022affSStefano Zampini static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
137*53022affSStefano Zampini {
138*53022affSStefano Zampini   AAtCtx         *aat;
139*53022affSStefano Zampini   PetscErrorCode ierr;
140*53022affSStefano Zampini 
141*53022affSStefano Zampini   PetscFunctionBegin;
142*53022affSStefano Zampini   ierr = MatShellGetContext(A,(void*)&aat);CHKERRQ(ierr);
143*53022affSStefano Zampini   /* ierr = MatMultTranspose(aat->M,x,aat->w);CHKERRQ(ierr); */
144*53022affSStefano Zampini   ierr = MatMultTranspose(aat->A,x,aat->w);CHKERRQ(ierr);
145*53022affSStefano Zampini   ierr = MatMult(aat->A,aat->w,y);CHKERRQ(ierr);
146*53022affSStefano Zampini   PetscFunctionReturn(0);
147*53022affSStefano Zampini }
148*53022affSStefano Zampini 
149*53022affSStefano Zampini static PetscErrorCode PCH2OpusSetUpInit(PC pc)
150*53022affSStefano Zampini {
151*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
152*53022affSStefano Zampini   Mat            A = pc->useAmat ? pc->mat : pc->pmat, AAt;
153*53022affSStefano Zampini   PetscInt       M,m;
154*53022affSStefano Zampini   VecType        vtype;
155*53022affSStefano Zampini   PetscReal      n;
156*53022affSStefano Zampini   AAtCtx         aat;
157*53022affSStefano Zampini   PetscErrorCode ierr;
158*53022affSStefano Zampini 
159*53022affSStefano Zampini   PetscFunctionBegin;
160*53022affSStefano Zampini   aat.A = A;
161*53022affSStefano Zampini   aat.M = pch2opus->M; /* unused so far */
162*53022affSStefano Zampini   ierr = MatCreateVecs(A,NULL,&aat.w);CHKERRQ(ierr);
163*53022affSStefano Zampini   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
164*53022affSStefano Zampini   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
165*53022affSStefano Zampini   ierr = MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);CHKERRQ(ierr);
166*53022affSStefano Zampini   ierr = MatBindToCPU(AAt,pch2opus->boundtocpu);CHKERRQ(ierr);
167*53022affSStefano Zampini   ierr = MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);CHKERRQ(ierr);
168*53022affSStefano Zampini   ierr = MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);CHKERRQ(ierr);
169*53022affSStefano Zampini   ierr = MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);CHKERRQ(ierr);
170*53022affSStefano Zampini   ierr = MatGetVecType(A,&vtype);CHKERRQ(ierr);
171*53022affSStefano Zampini   ierr = MatShellSetVecType(AAt,vtype);CHKERRQ(ierr);
172*53022affSStefano Zampini   ierr = MatNorm(AAt,NORM_1,&n);CHKERRQ(ierr);
173*53022affSStefano Zampini   pch2opus->s0 = 1./n;
174*53022affSStefano Zampini   ierr = VecDestroy(&aat.w);CHKERRQ(ierr);
175*53022affSStefano Zampini   ierr = MatDestroy(&AAt);CHKERRQ(ierr);
176*53022affSStefano Zampini   PetscFunctionReturn(0);
177*53022affSStefano Zampini }
178*53022affSStefano Zampini 
179*53022affSStefano Zampini static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
180*53022affSStefano Zampini {
181*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
182*53022affSStefano Zampini   PetscErrorCode ierr;
183*53022affSStefano Zampini 
184*53022affSStefano Zampini   PetscFunctionBegin;
185*53022affSStefano Zampini   if (t) {
186*53022affSStefano Zampini     ierr = MatMultTranspose(pch2opus->M,x,y);CHKERRQ(ierr);
187*53022affSStefano Zampini   } else {
188*53022affSStefano Zampini     ierr = MatMult(pch2opus->M,x,y);CHKERRQ(ierr);
189*53022affSStefano Zampini   }
190*53022affSStefano Zampini   PetscFunctionReturn(0);
191*53022affSStefano Zampini }
192*53022affSStefano Zampini 
193*53022affSStefano Zampini static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
194*53022affSStefano Zampini {
195*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
196*53022affSStefano Zampini   PetscErrorCode ierr;
197*53022affSStefano Zampini 
198*53022affSStefano Zampini   PetscFunctionBegin;
199*53022affSStefano Zampini   if (t) {
200*53022affSStefano Zampini     ierr = MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
201*53022affSStefano Zampini   } else {
202*53022affSStefano Zampini     ierr = MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
203*53022affSStefano Zampini   }
204*53022affSStefano Zampini   PetscFunctionReturn(0);
205*53022affSStefano Zampini }
206*53022affSStefano Zampini 
207*53022affSStefano Zampini static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
208*53022affSStefano Zampini {
209*53022affSStefano Zampini   PetscErrorCode ierr;
210*53022affSStefano Zampini 
211*53022affSStefano Zampini   PetscFunctionBegin;
212*53022affSStefano Zampini   ierr = PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);CHKERRQ(ierr);
213*53022affSStefano Zampini   PetscFunctionReturn(0);
214*53022affSStefano Zampini }
215*53022affSStefano Zampini 
216*53022affSStefano Zampini static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
217*53022affSStefano Zampini {
218*53022affSStefano Zampini   PetscErrorCode ierr;
219*53022affSStefano Zampini 
220*53022affSStefano Zampini   PetscFunctionBegin;
221*53022affSStefano Zampini   ierr = PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);CHKERRQ(ierr);
222*53022affSStefano Zampini   PetscFunctionReturn(0);
223*53022affSStefano Zampini }
224*53022affSStefano Zampini 
225*53022affSStefano Zampini static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
226*53022affSStefano Zampini {
227*53022affSStefano Zampini   PetscErrorCode ierr;
228*53022affSStefano Zampini 
229*53022affSStefano Zampini   PetscFunctionBegin;
230*53022affSStefano Zampini   ierr = PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);CHKERRQ(ierr);
231*53022affSStefano Zampini   PetscFunctionReturn(0);
232*53022affSStefano Zampini }
233*53022affSStefano Zampini 
234*53022affSStefano Zampini static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
235*53022affSStefano Zampini {
236*53022affSStefano Zampini   PetscErrorCode ierr;
237*53022affSStefano Zampini 
238*53022affSStefano Zampini   PetscFunctionBegin;
239*53022affSStefano Zampini   ierr = PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);CHKERRQ(ierr);
240*53022affSStefano Zampini   PetscFunctionReturn(0);
241*53022affSStefano Zampini }
242*53022affSStefano Zampini 
243*53022affSStefano Zampini /* used to test the norm of (M^-1 A - I) */
244*53022affSStefano Zampini static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
245*53022affSStefano Zampini {
246*53022affSStefano Zampini   PC             pc;
247*53022affSStefano Zampini   Mat            A;
248*53022affSStefano Zampini   PC_H2OPUS      *pch2opus;
249*53022affSStefano Zampini   PetscErrorCode ierr;
250*53022affSStefano Zampini   PetscBool      sideleft = PETSC_TRUE;
251*53022affSStefano Zampini 
252*53022affSStefano Zampini   PetscFunctionBegin;
253*53022affSStefano Zampini   ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr);
254*53022affSStefano Zampini   pch2opus = (PC_H2OPUS*)pc->data;
255*53022affSStefano Zampini   if (!pch2opus->w) {
256*53022affSStefano Zampini     ierr = MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);CHKERRQ(ierr);
257*53022affSStefano Zampini   }
258*53022affSStefano Zampini   A = pch2opus->A;
259*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);CHKERRQ(ierr);
260*53022affSStefano Zampini   if (t) {
261*53022affSStefano Zampini     if (sideleft) {
262*53022affSStefano Zampini       ierr = PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);CHKERRQ(ierr);
263*53022affSStefano Zampini       ierr = MatMultTranspose(A,pch2opus->w,y);CHKERRQ(ierr);
264*53022affSStefano Zampini     } else {
265*53022affSStefano Zampini       ierr = MatMultTranspose(A,x,pch2opus->w);CHKERRQ(ierr);
266*53022affSStefano Zampini       ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);CHKERRQ(ierr);
267*53022affSStefano Zampini     }
268*53022affSStefano Zampini   } else {
269*53022affSStefano Zampini     if (sideleft) {
270*53022affSStefano Zampini       ierr = MatMult(A,x,pch2opus->w);CHKERRQ(ierr);
271*53022affSStefano Zampini       ierr = PCApply_H2OPUS(pc,pch2opus->w,y);CHKERRQ(ierr);
272*53022affSStefano Zampini     } else {
273*53022affSStefano Zampini       ierr = PCApply_H2OPUS(pc,x,pch2opus->w);CHKERRQ(ierr);
274*53022affSStefano Zampini       ierr = MatMult(A,pch2opus->w,y);CHKERRQ(ierr);
275*53022affSStefano Zampini     }
276*53022affSStefano Zampini   }
277*53022affSStefano Zampini   if (!pch2opus->testMA) {
278*53022affSStefano Zampini     ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr);
279*53022affSStefano Zampini   }
280*53022affSStefano Zampini   PetscFunctionReturn(0);
281*53022affSStefano Zampini }
282*53022affSStefano Zampini 
283*53022affSStefano Zampini static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
284*53022affSStefano Zampini {
285*53022affSStefano Zampini   PetscErrorCode ierr;
286*53022affSStefano Zampini 
287*53022affSStefano Zampini   PetscFunctionBegin;
288*53022affSStefano Zampini   ierr = MatMultKernel_MAmI(A,x,y,PETSC_FALSE);CHKERRQ(ierr);
289*53022affSStefano Zampini   PetscFunctionReturn(0);
290*53022affSStefano Zampini }
291*53022affSStefano Zampini 
292*53022affSStefano Zampini static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
293*53022affSStefano Zampini {
294*53022affSStefano Zampini   PetscErrorCode ierr;
295*53022affSStefano Zampini 
296*53022affSStefano Zampini   PetscFunctionBegin;
297*53022affSStefano Zampini   ierr = MatMultKernel_MAmI(A,x,y,PETSC_TRUE);CHKERRQ(ierr);
298*53022affSStefano Zampini   PetscFunctionReturn(0);
299*53022affSStefano Zampini }
300*53022affSStefano Zampini 
301*53022affSStefano Zampini /* HyperPower kernel:
302*53022affSStefano Zampini Y = R = x
303*53022affSStefano Zampini for i = 1 . . . l - 1 do
304*53022affSStefano Zampini   R = (I - A * Xk) * R
305*53022affSStefano Zampini   Y = Y + R
306*53022affSStefano Zampini Y = Xk * Y
307*53022affSStefano Zampini */
308*53022affSStefano Zampini static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
309*53022affSStefano Zampini {
310*53022affSStefano Zampini   PC             pc;
311*53022affSStefano Zampini   Mat            A;
312*53022affSStefano Zampini   PC_H2OPUS      *pch2opus;
313*53022affSStefano Zampini   PetscInt       i;
314*53022affSStefano Zampini   PetscErrorCode ierr;
315*53022affSStefano Zampini 
316*53022affSStefano Zampini   PetscFunctionBegin;
317*53022affSStefano Zampini   ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr);
318*53022affSStefano Zampini   pch2opus = (PC_H2OPUS*)pc->data;
319*53022affSStefano Zampini   A = pch2opus->A;
320*53022affSStefano Zampini   ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);CHKERRQ(ierr);
321*53022affSStefano Zampini   ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);CHKERRQ(ierr);
322*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);CHKERRQ(ierr);
323*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);CHKERRQ(ierr);
324*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);CHKERRQ(ierr);
325*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);CHKERRQ(ierr);
326*53022affSStefano Zampini   ierr = VecCopy(x,pch2opus->wns[0]);CHKERRQ(ierr);
327*53022affSStefano Zampini   ierr = VecCopy(x,pch2opus->wns[3]);CHKERRQ(ierr);
328*53022affSStefano Zampini   if (t) {
329*53022affSStefano Zampini     for (i=0;i<pch2opus->hyperorder-1;i++) {
330*53022affSStefano Zampini       ierr = MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr);
331*53022affSStefano Zampini       ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);CHKERRQ(ierr);
332*53022affSStefano Zampini       ierr = VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);CHKERRQ(ierr);
333*53022affSStefano Zampini       ierr = VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);CHKERRQ(ierr);
334*53022affSStefano Zampini     }
335*53022affSStefano Zampini     ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);CHKERRQ(ierr);
336*53022affSStefano Zampini   } else {
337*53022affSStefano Zampini     for (i=0;i<pch2opus->hyperorder-1;i++) {
338*53022affSStefano Zampini       ierr = PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr);
339*53022affSStefano Zampini       ierr = MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);CHKERRQ(ierr);
340*53022affSStefano Zampini       ierr = VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);CHKERRQ(ierr);
341*53022affSStefano Zampini       ierr = VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);CHKERRQ(ierr);
342*53022affSStefano Zampini     }
343*53022affSStefano Zampini     ierr = PCApply_H2OPUS(pc,pch2opus->wns[3],y);CHKERRQ(ierr);
344*53022affSStefano Zampini   }
345*53022affSStefano Zampini   PetscFunctionReturn(0);
346*53022affSStefano Zampini }
347*53022affSStefano Zampini 
348*53022affSStefano Zampini static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
349*53022affSStefano Zampini {
350*53022affSStefano Zampini   PetscErrorCode ierr;
351*53022affSStefano Zampini 
352*53022affSStefano Zampini   PetscFunctionBegin;
353*53022affSStefano Zampini   ierr = MatMultKernel_Hyper(M,x,y,PETSC_FALSE);CHKERRQ(ierr);
354*53022affSStefano Zampini   PetscFunctionReturn(0);
355*53022affSStefano Zampini }
356*53022affSStefano Zampini 
357*53022affSStefano Zampini static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
358*53022affSStefano Zampini {
359*53022affSStefano Zampini   PetscErrorCode ierr;
360*53022affSStefano Zampini 
361*53022affSStefano Zampini   PetscFunctionBegin;
362*53022affSStefano Zampini   ierr = MatMultKernel_Hyper(M,x,y,PETSC_TRUE);CHKERRQ(ierr);
363*53022affSStefano Zampini   PetscFunctionReturn(0);
364*53022affSStefano Zampini }
365*53022affSStefano Zampini 
366*53022affSStefano Zampini /* Hyper power kernel, MatMat version */
367*53022affSStefano Zampini static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
368*53022affSStefano Zampini {
369*53022affSStefano Zampini   PC             pc;
370*53022affSStefano Zampini   Mat            A;
371*53022affSStefano Zampini   PC_H2OPUS      *pch2opus;
372*53022affSStefano Zampini   PetscInt       i;
373*53022affSStefano Zampini   PetscErrorCode ierr;
374*53022affSStefano Zampini 
375*53022affSStefano Zampini   PetscFunctionBegin;
376*53022affSStefano Zampini   ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr);
377*53022affSStefano Zampini   pch2opus = (PC_H2OPUS*)pc->data;
378*53022affSStefano Zampini   A = pch2opus->A;
379*53022affSStefano Zampini   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
380*53022affSStefano Zampini     ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr);
381*53022affSStefano Zampini     ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr);
382*53022affSStefano Zampini   }
383*53022affSStefano Zampini   if (!pch2opus->wnsmat[0]) {
384*53022affSStefano Zampini     ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);CHKERRQ(ierr);
385*53022affSStefano Zampini     ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);CHKERRQ(ierr);
386*53022affSStefano Zampini   }
387*53022affSStefano Zampini   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
388*53022affSStefano Zampini     ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr);
389*53022affSStefano Zampini     ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr);
390*53022affSStefano Zampini   }
391*53022affSStefano Zampini   if (!pch2opus->wnsmat[2]) {
392*53022affSStefano Zampini     ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);CHKERRQ(ierr);
393*53022affSStefano Zampini     ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);CHKERRQ(ierr);
394*53022affSStefano Zampini   }
395*53022affSStefano Zampini   ierr = MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
396*53022affSStefano Zampini   ierr = MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
397*53022affSStefano Zampini   if (t) {
398*53022affSStefano Zampini     for (i=0;i<pch2opus->hyperorder-1;i++) {
399*53022affSStefano Zampini       ierr = MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);CHKERRQ(ierr);
400*53022affSStefano Zampini       ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);CHKERRQ(ierr);
401*53022affSStefano Zampini       ierr = MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
402*53022affSStefano Zampini       ierr = MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
403*53022affSStefano Zampini     }
404*53022affSStefano Zampini     ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);CHKERRQ(ierr);
405*53022affSStefano Zampini   } else {
406*53022affSStefano Zampini     for (i=0;i<pch2opus->hyperorder-1;i++) {
407*53022affSStefano Zampini       ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);CHKERRQ(ierr);
408*53022affSStefano Zampini       ierr = MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);CHKERRQ(ierr);
409*53022affSStefano Zampini       ierr = MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
410*53022affSStefano Zampini       ierr = MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
411*53022affSStefano Zampini     }
412*53022affSStefano Zampini     ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);CHKERRQ(ierr);
413*53022affSStefano Zampini   }
414*53022affSStefano Zampini   PetscFunctionReturn(0);
415*53022affSStefano Zampini }
416*53022affSStefano Zampini 
417*53022affSStefano Zampini static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
418*53022affSStefano Zampini {
419*53022affSStefano Zampini   PetscErrorCode ierr;
420*53022affSStefano Zampini 
421*53022affSStefano Zampini   PetscFunctionBegin;
422*53022affSStefano Zampini   ierr = MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);CHKERRQ(ierr);
423*53022affSStefano Zampini   PetscFunctionReturn(0);
424*53022affSStefano Zampini }
425*53022affSStefano Zampini 
426*53022affSStefano Zampini /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
427*53022affSStefano Zampini static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
428*53022affSStefano Zampini {
429*53022affSStefano Zampini   PC             pc;
430*53022affSStefano Zampini   Mat            A;
431*53022affSStefano Zampini   PC_H2OPUS      *pch2opus;
432*53022affSStefano Zampini   PetscErrorCode ierr;
433*53022affSStefano Zampini 
434*53022affSStefano Zampini   PetscFunctionBegin;
435*53022affSStefano Zampini   ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr);
436*53022affSStefano Zampini   pch2opus = (PC_H2OPUS*)pc->data;
437*53022affSStefano Zampini   A = pch2opus->A;
438*53022affSStefano Zampini   ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);CHKERRQ(ierr);
439*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);CHKERRQ(ierr);
440*53022affSStefano Zampini   ierr = VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);CHKERRQ(ierr);
441*53022affSStefano Zampini   if (t) {
442*53022affSStefano Zampini     ierr = PCApplyTranspose_H2OPUS(pc,x,y);CHKERRQ(ierr);
443*53022affSStefano Zampini     ierr = MatMultTranspose(A,y,pch2opus->wns[1]);CHKERRQ(ierr);
444*53022affSStefano Zampini     ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);CHKERRQ(ierr);
445*53022affSStefano Zampini     ierr = VecAXPBY(y,-1.,2.,pch2opus->wns[0]);CHKERRQ(ierr);
446*53022affSStefano Zampini   } else {
447*53022affSStefano Zampini     ierr = PCApply_H2OPUS(pc,x,y);CHKERRQ(ierr);
448*53022affSStefano Zampini     ierr = MatMult(A,y,pch2opus->wns[0]);CHKERRQ(ierr);
449*53022affSStefano Zampini     ierr = PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr);
450*53022affSStefano Zampini     ierr = VecAXPBY(y,-1.,2.,pch2opus->wns[1]);CHKERRQ(ierr);
451*53022affSStefano Zampini   }
452*53022affSStefano Zampini   PetscFunctionReturn(0);
453*53022affSStefano Zampini }
454*53022affSStefano Zampini 
455*53022affSStefano Zampini static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
456*53022affSStefano Zampini {
457*53022affSStefano Zampini   PetscErrorCode ierr;
458*53022affSStefano Zampini 
459*53022affSStefano Zampini   PetscFunctionBegin;
460*53022affSStefano Zampini   ierr = MatMultKernel_NS(M,x,y,PETSC_FALSE);CHKERRQ(ierr);
461*53022affSStefano Zampini   PetscFunctionReturn(0);
462*53022affSStefano Zampini }
463*53022affSStefano Zampini 
464*53022affSStefano Zampini static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
465*53022affSStefano Zampini {
466*53022affSStefano Zampini   PetscErrorCode ierr;
467*53022affSStefano Zampini 
468*53022affSStefano Zampini   PetscFunctionBegin;
469*53022affSStefano Zampini   ierr = MatMultKernel_NS(M,x,y,PETSC_TRUE);CHKERRQ(ierr);
470*53022affSStefano Zampini   PetscFunctionReturn(0);
471*53022affSStefano Zampini }
472*53022affSStefano Zampini 
473*53022affSStefano Zampini /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
474*53022affSStefano Zampini static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
475*53022affSStefano Zampini {
476*53022affSStefano Zampini   PC             pc;
477*53022affSStefano Zampini   Mat            A;
478*53022affSStefano Zampini   PC_H2OPUS      *pch2opus;
479*53022affSStefano Zampini   PetscErrorCode ierr;
480*53022affSStefano Zampini 
481*53022affSStefano Zampini   PetscFunctionBegin;
482*53022affSStefano Zampini   ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr);
483*53022affSStefano Zampini   pch2opus = (PC_H2OPUS*)pc->data;
484*53022affSStefano Zampini   A = pch2opus->A;
485*53022affSStefano Zampini   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
486*53022affSStefano Zampini     ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr);
487*53022affSStefano Zampini     ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr);
488*53022affSStefano Zampini   }
489*53022affSStefano Zampini   if (!pch2opus->wnsmat[0]) {
490*53022affSStefano Zampini     ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);CHKERRQ(ierr);
491*53022affSStefano Zampini     ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);CHKERRQ(ierr);
492*53022affSStefano Zampini   }
493*53022affSStefano Zampini   if (t) {
494*53022affSStefano Zampini     ierr = PCApplyTransposeMat_H2OPUS(pc,X,Y);CHKERRQ(ierr);
495*53022affSStefano Zampini     ierr = MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);CHKERRQ(ierr);
496*53022affSStefano Zampini     ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);CHKERRQ(ierr);
497*53022affSStefano Zampini     ierr = MatScale(Y,2.);CHKERRQ(ierr);
498*53022affSStefano Zampini     ierr = MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
499*53022affSStefano Zampini   } else {
500*53022affSStefano Zampini     ierr = PCApplyMat_H2OPUS(pc,X,Y);CHKERRQ(ierr);
501*53022affSStefano Zampini     ierr = MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);CHKERRQ(ierr);
502*53022affSStefano Zampini     ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);CHKERRQ(ierr);
503*53022affSStefano Zampini     ierr = MatScale(Y,2.);CHKERRQ(ierr);
504*53022affSStefano Zampini     ierr = MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
505*53022affSStefano Zampini   }
506*53022affSStefano Zampini   PetscFunctionReturn(0);
507*53022affSStefano Zampini }
508*53022affSStefano Zampini 
509*53022affSStefano Zampini static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
510*53022affSStefano Zampini {
511*53022affSStefano Zampini   PetscErrorCode ierr;
512*53022affSStefano Zampini 
513*53022affSStefano Zampini   PetscFunctionBegin;
514*53022affSStefano Zampini   ierr = MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);CHKERRQ(ierr);
515*53022affSStefano Zampini   PetscFunctionReturn(0);
516*53022affSStefano Zampini }
517*53022affSStefano Zampini 
518*53022affSStefano Zampini static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
519*53022affSStefano Zampini {
520*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
521*53022affSStefano Zampini   Mat            A = pc->useAmat ? pc->mat : pc->pmat;
522*53022affSStefano Zampini   PetscErrorCode ierr;
523*53022affSStefano Zampini 
524*53022affSStefano Zampini   PetscFunctionBegin;
525*53022affSStefano Zampini   if (!pch2opus->S) {
526*53022affSStefano Zampini     PetscInt M,N,m,n;
527*53022affSStefano Zampini 
528*53022affSStefano Zampini     ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
529*53022affSStefano Zampini     ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
530*53022affSStefano Zampini     ierr = MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);CHKERRQ(ierr);
531*53022affSStefano Zampini     ierr = MatSetBlockSizesFromMats(pch2opus->S,A,A);CHKERRQ(ierr);
532*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU)
533*53022affSStefano Zampini     ierr = MatShellSetVecType(pch2opus->S,VECCUDA);CHKERRQ(ierr);
534*53022affSStefano Zampini #endif
535*53022affSStefano Zampini   }
536*53022affSStefano Zampini   if (pch2opus->hyperorder >= 2) {
537*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);CHKERRQ(ierr);
538*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);CHKERRQ(ierr);
539*53022affSStefano Zampini     ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);CHKERRQ(ierr);
540*53022affSStefano Zampini     ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);CHKERRQ(ierr);
541*53022affSStefano Zampini   } else {
542*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);CHKERRQ(ierr);
543*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);CHKERRQ(ierr);
544*53022affSStefano Zampini     ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);CHKERRQ(ierr);
545*53022affSStefano Zampini     ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);CHKERRQ(ierr);
546*53022affSStefano Zampini   }
547*53022affSStefano Zampini   ierr = MatPropagateSymmetryOptions(A,pch2opus->S);CHKERRQ(ierr);
548*53022affSStefano Zampini   ierr = MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);CHKERRQ(ierr);
549*53022affSStefano Zampini   /* XXX */
550*53022affSStefano Zampini   ierr = MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
551*53022affSStefano Zampini   PetscFunctionReturn(0);
552*53022affSStefano Zampini }
553*53022affSStefano Zampini 
554*53022affSStefano Zampini static PetscErrorCode PCSetUp_H2OPUS(PC pc)
555*53022affSStefano Zampini {
556*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
557*53022affSStefano Zampini   Mat            A = pc->useAmat ? pc->mat : pc->pmat;
558*53022affSStefano Zampini   NormType       norm = pch2opus->normtype;
559*53022affSStefano Zampini   PetscReal      initerr = 0.0,err;
560*53022affSStefano Zampini   PetscReal      initerrMA = 0.0,errMA;
561*53022affSStefano Zampini   PetscBool      ish2opus;
562*53022affSStefano Zampini   PetscErrorCode ierr;
563*53022affSStefano Zampini 
564*53022affSStefano Zampini   PetscFunctionBegin;
565*53022affSStefano Zampini   if (!pch2opus->T) {
566*53022affSStefano Zampini     PetscInt    M,N,m,n;
567*53022affSStefano Zampini     const char *prefix;
568*53022affSStefano Zampini 
569*53022affSStefano Zampini     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
570*53022affSStefano Zampini     ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
571*53022affSStefano Zampini     ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
572*53022affSStefano Zampini     ierr = MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);CHKERRQ(ierr);
573*53022affSStefano Zampini     ierr = MatSetBlockSizesFromMats(pch2opus->T,A,A);CHKERRQ(ierr);
574*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);CHKERRQ(ierr);
575*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);CHKERRQ(ierr);
576*53022affSStefano Zampini     ierr = MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);CHKERRQ(ierr);
577*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU)
578*53022affSStefano Zampini     ierr = MatShellSetVecType(pch2opus->T,VECCUDA);CHKERRQ(ierr);
579*53022affSStefano Zampini #endif
580*53022affSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);CHKERRQ(ierr);
581*53022affSStefano Zampini     ierr = MatSetOptionsPrefix(pch2opus->T,prefix);CHKERRQ(ierr);
582*53022affSStefano Zampini     ierr = MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");CHKERRQ(ierr);
583*53022affSStefano Zampini   }
584*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->A);CHKERRQ(ierr);
585*53022affSStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);CHKERRQ(ierr);
586*53022affSStefano Zampini   if (ish2opus) {
587*53022affSStefano Zampini     ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
588*53022affSStefano Zampini     pch2opus->A = A;
589*53022affSStefano Zampini   } else {
590*53022affSStefano Zampini     const char *prefix;
591*53022affSStefano Zampini     ierr = MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);CHKERRQ(ierr);
592*53022affSStefano Zampini     /* XXX */
593*53022affSStefano Zampini     ierr = MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
594*53022affSStefano Zampini     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
595*53022affSStefano Zampini     ierr = MatSetOptionsPrefix(pch2opus->A,prefix);CHKERRQ(ierr);
596*53022affSStefano Zampini     ierr = MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");CHKERRQ(ierr);
597*53022affSStefano Zampini     ierr = MatSetFromOptions(pch2opus->A);CHKERRQ(ierr);
598*53022affSStefano Zampini     ierr = MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
599*53022affSStefano Zampini     ierr = MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
600*53022affSStefano Zampini     /* XXX */
601*53022affSStefano Zampini     ierr = MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
602*53022affSStefano Zampini   }
603*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU)
604*53022affSStefano Zampini   pch2opus->boundtocpu = pch2opus->A->boundtocpu;
605*53022affSStefano Zampini #endif
606*53022affSStefano Zampini   ierr = MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);CHKERRQ(ierr);
607*53022affSStefano Zampini   if (pch2opus->M) { /* see if we can reuse M as initial guess */
608*53022affSStefano Zampini     PetscReal reuse;
609*53022affSStefano Zampini 
610*53022affSStefano Zampini     ierr = MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);CHKERRQ(ierr);
611*53022affSStefano Zampini     ierr = MatNorm(pch2opus->T,norm,&reuse);CHKERRQ(ierr);
612*53022affSStefano Zampini     if (reuse >= 1.0) {
613*53022affSStefano Zampini       ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr);
614*53022affSStefano Zampini     }
615*53022affSStefano Zampini   }
616*53022affSStefano Zampini   if (!pch2opus->M) {
617*53022affSStefano Zampini     const char *prefix;
618*53022affSStefano Zampini     ierr = MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);CHKERRQ(ierr);
619*53022affSStefano Zampini     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
620*53022affSStefano Zampini     ierr = MatSetOptionsPrefix(pch2opus->M,prefix);CHKERRQ(ierr);
621*53022affSStefano Zampini     ierr = MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");CHKERRQ(ierr);
622*53022affSStefano Zampini     ierr = MatSetFromOptions(pch2opus->M);CHKERRQ(ierr);
623*53022affSStefano Zampini     ierr = PCH2OpusSetUpInit(pc);CHKERRQ(ierr);
624*53022affSStefano Zampini     ierr = MatScale(pch2opus->M,pch2opus->s0);CHKERRQ(ierr);
625*53022affSStefano Zampini   }
626*53022affSStefano Zampini   /* A and M have the same h2 matrix structure, save on reordering routines */
627*53022affSStefano Zampini   ierr = MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);CHKERRQ(ierr);
628*53022affSStefano Zampini   ierr = MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);CHKERRQ(ierr);
629*53022affSStefano Zampini   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
630*53022affSStefano Zampini     ierr = MatNorm(pch2opus->T,norm,&initerr);CHKERRQ(ierr);
631*53022affSStefano Zampini     pch2opus->testMA = PETSC_TRUE;
632*53022affSStefano Zampini     ierr = MatNorm(pch2opus->T,norm,&initerrMA);CHKERRQ(ierr);
633*53022affSStefano Zampini     pch2opus->testMA = PETSC_FALSE;
634*53022affSStefano Zampini   }
635*53022affSStefano Zampini   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
636*53022affSStefano Zampini   err   = initerr;
637*53022affSStefano Zampini   errMA = initerrMA;
638*53022affSStefano Zampini   if (pch2opus->monitor) {
639*53022affSStefano Zampini     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));CHKERRQ(ierr);
640*53022affSStefano Zampini     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));CHKERRQ(ierr);
641*53022affSStefano Zampini   }
642*53022affSStefano Zampini   if (initerr > pch2opus->atol && !pc->failedreason) {
643*53022affSStefano Zampini     PetscInt i;
644*53022affSStefano Zampini 
645*53022affSStefano Zampini     ierr = PCH2OpusSetUpSampler_Private(pc);CHKERRQ(ierr);
646*53022affSStefano Zampini     for (i = 0; i < pch2opus->maxits; i++) {
647*53022affSStefano Zampini       Mat         M;
648*53022affSStefano Zampini       const char* prefix;
649*53022affSStefano Zampini 
650*53022affSStefano Zampini       ierr = MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);CHKERRQ(ierr);
651*53022affSStefano Zampini       ierr = MatGetOptionsPrefix(pch2opus->M,&prefix);CHKERRQ(ierr);
652*53022affSStefano Zampini       ierr = MatSetOptionsPrefix(M,prefix);CHKERRQ(ierr);
653*53022affSStefano Zampini       ierr = MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
654*53022affSStefano Zampini       ierr = MatSetFromOptions(M);CHKERRQ(ierr);
655*53022affSStefano Zampini       ierr = MatH2OpusSetNativeMult(M,PETSC_TRUE);CHKERRQ(ierr);
656*53022affSStefano Zampini       ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
657*53022affSStefano Zampini       ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
658*53022affSStefano Zampini 
659*53022affSStefano Zampini       ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr);
660*53022affSStefano Zampini       pch2opus->M = M;
661*53022affSStefano Zampini       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
662*53022affSStefano Zampini         ierr = MatNorm(pch2opus->T,norm,&err);CHKERRQ(ierr);
663*53022affSStefano Zampini         pch2opus->testMA = PETSC_TRUE;
664*53022affSStefano Zampini         ierr = MatNorm(pch2opus->T,norm,&errMA);CHKERRQ(ierr);
665*53022affSStefano Zampini         pch2opus->testMA = PETSC_FALSE;
666*53022affSStefano Zampini       }
667*53022affSStefano Zampini       if (pch2opus->monitor) {
668*53022affSStefano Zampini         ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));CHKERRQ(ierr);
669*53022affSStefano Zampini         ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));CHKERRQ(ierr);
670*53022affSStefano Zampini       }
671*53022affSStefano Zampini       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
672*53022affSStefano Zampini       if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
673*53022affSStefano Zampini     }
674*53022affSStefano Zampini   }
675*53022affSStefano Zampini   /* cleanup setup workspace */
676*53022affSStefano Zampini   ierr = MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);CHKERRQ(ierr);
677*53022affSStefano Zampini   ierr = MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);CHKERRQ(ierr);
678*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[0]);CHKERRQ(ierr);
679*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[1]);CHKERRQ(ierr);
680*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[2]);CHKERRQ(ierr);
681*53022affSStefano Zampini   ierr = VecDestroy(&pch2opus->wns[3]);CHKERRQ(ierr);
682*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr);
683*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr);
684*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr);
685*53022affSStefano Zampini   ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr);
686*53022affSStefano Zampini   PetscFunctionReturn(0);
687*53022affSStefano Zampini }
688*53022affSStefano Zampini 
689*53022affSStefano Zampini static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
690*53022affSStefano Zampini {
691*53022affSStefano Zampini   PetscErrorCode ierr;
692*53022affSStefano Zampini   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
693*53022affSStefano Zampini   PetscBool      isascii;
694*53022affSStefano Zampini 
695*53022affSStefano Zampini   PetscFunctionBegin;
696*53022affSStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
697*53022affSStefano Zampini   if (isascii) {
698*53022affSStefano Zampini     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
699*53022affSStefano Zampini       ierr = PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");CHKERRQ(ierr);
700*53022affSStefano Zampini       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
701*53022affSStefano Zampini       ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr);
702*53022affSStefano Zampini       ierr = MatView(pch2opus->A,viewer);CHKERRQ(ierr);
703*53022affSStefano Zampini       ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
704*53022affSStefano Zampini       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
705*53022affSStefano Zampini     }
706*53022affSStefano Zampini     if (pch2opus->M) {
707*53022affSStefano Zampini       ierr = PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");CHKERRQ(ierr);
708*53022affSStefano Zampini       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
709*53022affSStefano Zampini       ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr);
710*53022affSStefano Zampini       ierr = MatView(pch2opus->M,viewer);CHKERRQ(ierr);
711*53022affSStefano Zampini       ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
712*53022affSStefano Zampini       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
713*53022affSStefano Zampini     }
714*53022affSStefano Zampini     ierr = PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);CHKERRQ(ierr);
715*53022affSStefano Zampini   }
716*53022affSStefano Zampini   PetscFunctionReturn(0);
717*53022affSStefano Zampini }
718*53022affSStefano Zampini 
719*53022affSStefano Zampini PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
720*53022affSStefano Zampini {
721*53022affSStefano Zampini   PetscErrorCode ierr;
722*53022affSStefano Zampini   PC_H2OPUS      *pch2opus;
723*53022affSStefano Zampini 
724*53022affSStefano Zampini   PetscFunctionBegin;
725*53022affSStefano Zampini   ierr     = PetscNewLog(pc,&pch2opus);CHKERRQ(ierr);
726*53022affSStefano Zampini   pc->data = (void*)pch2opus;
727*53022affSStefano Zampini 
728*53022affSStefano Zampini   pch2opus->atol       = 1.e-2;
729*53022affSStefano Zampini   pch2opus->rtol       = 1.e-6;
730*53022affSStefano Zampini   pch2opus->maxits     = 50;
731*53022affSStefano Zampini   pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
732*53022affSStefano Zampini   pch2opus->normtype   = NORM_2;
733*53022affSStefano Zampini 
734*53022affSStefano Zampini   /* these are needed when we are sampling the pmat */
735*53022affSStefano Zampini   pch2opus->eta        = PETSC_DECIDE;
736*53022affSStefano Zampini   pch2opus->leafsize   = PETSC_DECIDE;
737*53022affSStefano Zampini   pch2opus->max_rank   = PETSC_DECIDE;
738*53022affSStefano Zampini   pch2opus->bs         = PETSC_DECIDE;
739*53022affSStefano Zampini   pch2opus->mrtol      = PETSC_DECIDE;
740*53022affSStefano Zampini #if defined(PETSC_H2OPUS_USE_GPU)
741*53022affSStefano Zampini   pch2opus->boundtocpu = PETSC_FALSE;
742*53022affSStefano Zampini #else
743*53022affSStefano Zampini   pch2opus->boundtocpu = PETSC_TRUE;
744*53022affSStefano Zampini #endif
745*53022affSStefano Zampini   pc->ops->destroy        = PCDestroy_H2OPUS;
746*53022affSStefano Zampini   pc->ops->setup          = PCSetUp_H2OPUS;
747*53022affSStefano Zampini   pc->ops->apply          = PCApply_H2OPUS;
748*53022affSStefano Zampini   pc->ops->matapply       = PCApplyMat_H2OPUS;
749*53022affSStefano Zampini   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
750*53022affSStefano Zampini   pc->ops->reset          = PCReset_H2OPUS;
751*53022affSStefano Zampini   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
752*53022affSStefano Zampini   pc->ops->view           = PCView_H2OPUS;
753*53022affSStefano Zampini 
754*53022affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);CHKERRQ(ierr);
755*53022affSStefano Zampini   PetscFunctionReturn(0);
756*53022affSStefano Zampini }
757