xref: /petsc/src/ksp/pc/impls/h2opus/pch2opus.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 #include <petsc/private/pcimpl.h>
2 #include <petsc/private/matimpl.h>
3 #include <h2opusconf.h>
4 
5 /* Use GPU only if H2OPUS is configured for GPU */
6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7 #define PETSC_H2OPUS_USE_GPU
8 #endif
9 
10 typedef struct {
11   Mat         A;
12   Mat         M;
13   PetscScalar s0;
14 
15   /* sampler for Newton-Schultz */
16   Mat      S;
17   PetscInt hyperorder;
18   Vec      wns[4];
19   Mat      wnsmat[4];
20 
21   /* convergence testing */
22   Mat       T;
23   Vec       w;
24   PetscBool testMA;
25 
26   /* Support for PCSetCoordinates */
27   PetscInt  sdim;
28   PetscInt  nlocc;
29   PetscReal *coords;
30 
31   /* Newton-Schultz customization */
32   PetscInt  maxits;
33   PetscReal rtol,atol;
34   PetscBool monitor;
35   PetscBool useapproximatenorms;
36   NormType  normtype;
37 
38   /* Used when pmat != MATH2OPUS */
39   PetscReal eta;
40   PetscInt  leafsize;
41   PetscInt  max_rank;
42   PetscInt  bs;
43   PetscReal mrtol;
44 
45   PetscBool boundtocpu;
46 } PC_H2OPUS;
47 
48 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);
49 
50 static PetscErrorCode PCReset_H2OPUS(PC pc)
51 {
52   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
53 
54   PetscFunctionBegin;
55   pch2opus->sdim  = 0;
56   pch2opus->nlocc = 0;
57   CHKERRQ(PetscFree(pch2opus->coords));
58   CHKERRQ(MatDestroy(&pch2opus->A));
59   CHKERRQ(MatDestroy(&pch2opus->M));
60   CHKERRQ(MatDestroy(&pch2opus->T));
61   CHKERRQ(VecDestroy(&pch2opus->w));
62   CHKERRQ(MatDestroy(&pch2opus->S));
63   CHKERRQ(VecDestroy(&pch2opus->wns[0]));
64   CHKERRQ(VecDestroy(&pch2opus->wns[1]));
65   CHKERRQ(VecDestroy(&pch2opus->wns[2]));
66   CHKERRQ(VecDestroy(&pch2opus->wns[3]));
67   CHKERRQ(MatDestroy(&pch2opus->wnsmat[0]));
68   CHKERRQ(MatDestroy(&pch2opus->wnsmat[1]));
69   CHKERRQ(MatDestroy(&pch2opus->wnsmat[2]));
70   CHKERRQ(MatDestroy(&pch2opus->wnsmat[3]));
71   PetscFunctionReturn(0);
72 }
73 
74 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
75 {
76   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
77   PetscBool  reset    = PETSC_TRUE;
78 
79   PetscFunctionBegin;
80   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
81     CHKERRQ(PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset));
82     reset = (PetscBool)!reset;
83   }
84   CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
85   if (reset) {
86     CHKERRQ(PCReset_H2OPUS(pc));
87     CHKERRQ(PetscMalloc1(sdim*nlocc,&pch2opus->coords));
88     CHKERRQ(PetscArraycpy(pch2opus->coords,coords,sdim*nlocc));
89     pch2opus->sdim  = sdim;
90     pch2opus->nlocc = nlocc;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 static PetscErrorCode PCDestroy_H2OPUS(PC pc)
96 {
97   PetscFunctionBegin;
98   CHKERRQ(PCReset_H2OPUS(pc));
99   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL));
100   CHKERRQ(PetscFree(pc->data));
101   PetscFunctionReturn(0);
102 }
103 
104 static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
105 {
106   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
107 
108   PetscFunctionBegin;
109   CHKERRQ(PetscOptionsHead(PetscOptionsObject,"H2OPUS options"));
110   CHKERRQ(PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL));
111   CHKERRQ(PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL));
112   CHKERRQ(PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL));
113   CHKERRQ(PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL));
114   CHKERRQ(PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL));
115   CHKERRQ(PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL));
116   CHKERRQ(PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL));
117   CHKERRQ(PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL));
118   CHKERRQ(PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL));
119   CHKERRQ(PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL));
120   CHKERRQ(PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL));
121   CHKERRQ(PetscOptionsTail());
122   PetscFunctionReturn(0);
123 }
124 
125 typedef struct {
126   Mat A;
127   Mat M;
128   Vec w;
129 } AAtCtx;
130 
131 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
132 {
133   AAtCtx *aat;
134 
135   PetscFunctionBegin;
136   CHKERRQ(MatShellGetContext(A,(void*)&aat));
137   /* CHKERRQ(MatMultTranspose(aat->M,x,aat->w)); */
138   CHKERRQ(MatMultTranspose(aat->A,x,aat->w));
139   CHKERRQ(MatMult(aat->A,aat->w,y));
140   PetscFunctionReturn(0);
141 }
142 
143 static PetscErrorCode PCH2OpusSetUpInit(PC pc)
144 {
145   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
146   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
147   PetscInt   M,m;
148   VecType    vtype;
149   PetscReal  n;
150   AAtCtx     aat;
151 
152   PetscFunctionBegin;
153   aat.A = A;
154   aat.M = pch2opus->M; /* unused so far */
155   CHKERRQ(MatCreateVecs(A,NULL,&aat.w));
156   CHKERRQ(MatGetSize(A,&M,NULL));
157   CHKERRQ(MatGetLocalSize(A,&m,NULL));
158   CHKERRQ(MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt));
159   CHKERRQ(MatBindToCPU(AAt,pch2opus->boundtocpu));
160   CHKERRQ(MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt));
161   CHKERRQ(MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt));
162   CHKERRQ(MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS));
163   CHKERRQ(MatGetVecType(A,&vtype));
164   CHKERRQ(MatShellSetVecType(AAt,vtype));
165   CHKERRQ(MatNorm(AAt,NORM_1,&n));
166   pch2opus->s0 = 1./n;
167   CHKERRQ(VecDestroy(&aat.w));
168   CHKERRQ(MatDestroy(&AAt));
169   PetscFunctionReturn(0);
170 }
171 
172 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
173 {
174   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
175 
176   PetscFunctionBegin;
177   if (t) CHKERRQ(MatMultTranspose(pch2opus->M,x,y));
178   else  CHKERRQ(MatMult(pch2opus->M,x,y));
179   PetscFunctionReturn(0);
180 }
181 
182 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
183 {
184   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
185 
186   PetscFunctionBegin;
187   if (t) CHKERRQ(MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y));
188   else   CHKERRQ(MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y));
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
193 {
194   PetscFunctionBegin;
195   CHKERRQ(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE));
196   PetscFunctionReturn(0);
197 }
198 
199 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
200 {
201   PetscFunctionBegin;
202   CHKERRQ(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE));
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
207 {
208   PetscFunctionBegin;
209   CHKERRQ(PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE));
210   PetscFunctionReturn(0);
211 }
212 
213 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
214 {
215   PetscFunctionBegin;
216   CHKERRQ(PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE));
217   PetscFunctionReturn(0);
218 }
219 
220 /* used to test the norm of (M^-1 A - I) */
221 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
222 {
223   PC         pc;
224   Mat        A;
225   PC_H2OPUS *pch2opus;
226   PetscBool  sideleft = PETSC_TRUE;
227 
228   PetscFunctionBegin;
229   CHKERRQ(MatShellGetContext(M,(void*)&pc));
230   pch2opus = (PC_H2OPUS*)pc->data;
231   if (!pch2opus->w) CHKERRQ(MatCreateVecs(pch2opus->M,&pch2opus->w,NULL));
232   A = pch2opus->A;
233   CHKERRQ(VecBindToCPU(pch2opus->w,pch2opus->boundtocpu));
234   if (t) {
235     if (sideleft) {
236       CHKERRQ(PCApplyTranspose_H2OPUS(pc,x,pch2opus->w));
237       CHKERRQ(MatMultTranspose(A,pch2opus->w,y));
238     } else {
239       CHKERRQ(MatMultTranspose(A,x,pch2opus->w));
240       CHKERRQ(PCApplyTranspose_H2OPUS(pc,pch2opus->w,y));
241     }
242   } else {
243     if (sideleft) {
244       CHKERRQ(MatMult(A,x,pch2opus->w));
245       CHKERRQ(PCApply_H2OPUS(pc,pch2opus->w,y));
246     } else {
247       CHKERRQ(PCApply_H2OPUS(pc,x,pch2opus->w));
248       CHKERRQ(MatMult(A,pch2opus->w,y));
249     }
250   }
251   if (!pch2opus->testMA) CHKERRQ(VecAXPY(y,-1.0,x));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
256 {
257   PetscFunctionBegin;
258   CHKERRQ(MatMultKernel_MAmI(A,x,y,PETSC_FALSE));
259   PetscFunctionReturn(0);
260 }
261 
262 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
263 {
264   PetscFunctionBegin;
265   CHKERRQ(MatMultKernel_MAmI(A,x,y,PETSC_TRUE));
266   PetscFunctionReturn(0);
267 }
268 
269 /* HyperPower kernel:
270 Y = R = x
271 for i = 1 . . . l - 1 do
272   R = (I - A * Xk) * R
273   Y = Y + R
274 Y = Xk * Y
275 */
276 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
277 {
278   PC         pc;
279   Mat        A;
280   PC_H2OPUS *pch2opus;
281 
282   PetscFunctionBegin;
283   CHKERRQ(MatShellGetContext(M,(void*)&pc));
284   pch2opus = (PC_H2OPUS*)pc->data;
285   A = pch2opus->A;
286   CHKERRQ(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
287   CHKERRQ(MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
288   CHKERRQ(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu));
289   CHKERRQ(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu));
290   CHKERRQ(VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu));
291   CHKERRQ(VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu));
292   CHKERRQ(VecCopy(x,pch2opus->wns[0]));
293   CHKERRQ(VecCopy(x,pch2opus->wns[3]));
294   if (t) {
295     for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
296       CHKERRQ(MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]));
297       CHKERRQ(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]));
298       CHKERRQ(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]));
299       CHKERRQ(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]));
300     }
301     CHKERRQ(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y));
302   } else {
303     for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
304       CHKERRQ(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]));
305       CHKERRQ(MatMult(A,pch2opus->wns[1],pch2opus->wns[2]));
306       CHKERRQ(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]));
307       CHKERRQ(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]));
308     }
309     CHKERRQ(PCApply_H2OPUS(pc,pch2opus->wns[3],y));
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
315 {
316   PetscFunctionBegin;
317   CHKERRQ(MatMultKernel_Hyper(M,x,y,PETSC_FALSE));
318   PetscFunctionReturn(0);
319 }
320 
321 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
322 {
323   PetscFunctionBegin;
324   CHKERRQ(MatMultKernel_Hyper(M,x,y,PETSC_TRUE));
325   PetscFunctionReturn(0);
326 }
327 
328 /* Hyper power kernel, MatMat version */
329 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
330 {
331   PC         pc;
332   Mat        A;
333   PC_H2OPUS *pch2opus;
334   PetscInt   i;
335 
336   PetscFunctionBegin;
337   CHKERRQ(MatShellGetContext(M,(void*)&pc));
338   pch2opus = (PC_H2OPUS*)pc->data;
339   A = pch2opus->A;
340   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
341     CHKERRQ(MatDestroy(&pch2opus->wnsmat[0]));
342     CHKERRQ(MatDestroy(&pch2opus->wnsmat[1]));
343   }
344   if (!pch2opus->wnsmat[0]) {
345     CHKERRQ(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]));
346     CHKERRQ(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]));
347   }
348   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
349     CHKERRQ(MatDestroy(&pch2opus->wnsmat[2]));
350     CHKERRQ(MatDestroy(&pch2opus->wnsmat[3]));
351   }
352   if (!pch2opus->wnsmat[2]) {
353     CHKERRQ(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]));
354     CHKERRQ(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]));
355   }
356   CHKERRQ(MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
357   CHKERRQ(MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN));
358   if (t) {
359     for (i=0;i<pch2opus->hyperorder-1;i++) {
360       CHKERRQ(MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]));
361       CHKERRQ(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]));
362       CHKERRQ(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN));
363       CHKERRQ(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
364     }
365     CHKERRQ(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y));
366   } else {
367     for (i=0;i<pch2opus->hyperorder-1;i++) {
368       CHKERRQ(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]));
369       CHKERRQ(MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]));
370       CHKERRQ(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN));
371       CHKERRQ(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
372     }
373     CHKERRQ(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y));
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
379 {
380   PetscFunctionBegin;
381   CHKERRQ(MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE));
382   PetscFunctionReturn(0);
383 }
384 
385 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
386 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
387 {
388   PC         pc;
389   Mat        A;
390   PC_H2OPUS *pch2opus;
391 
392   PetscFunctionBegin;
393   CHKERRQ(MatShellGetContext(M,(void*)&pc));
394   pch2opus = (PC_H2OPUS*)pc->data;
395   A = pch2opus->A;
396   CHKERRQ(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
397   CHKERRQ(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu));
398   CHKERRQ(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu));
399   if (t) {
400     CHKERRQ(PCApplyTranspose_H2OPUS(pc,x,y));
401     CHKERRQ(MatMultTranspose(A,y,pch2opus->wns[1]));
402     CHKERRQ(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]));
403     CHKERRQ(VecAXPBY(y,-1.,2.,pch2opus->wns[0]));
404   } else {
405     CHKERRQ(PCApply_H2OPUS(pc,x,y));
406     CHKERRQ(MatMult(A,y,pch2opus->wns[0]));
407     CHKERRQ(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]));
408     CHKERRQ(VecAXPBY(y,-1.,2.,pch2opus->wns[1]));
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
414 {
415   PetscFunctionBegin;
416   CHKERRQ(MatMultKernel_NS(M,x,y,PETSC_FALSE));
417   PetscFunctionReturn(0);
418 }
419 
420 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
421 {
422   PetscFunctionBegin;
423   CHKERRQ(MatMultKernel_NS(M,x,y,PETSC_TRUE));
424   PetscFunctionReturn(0);
425 }
426 
427 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
428 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
429 {
430   PC         pc;
431   Mat        A;
432   PC_H2OPUS *pch2opus;
433 
434   PetscFunctionBegin;
435   CHKERRQ(MatShellGetContext(M,(void*)&pc));
436   pch2opus = (PC_H2OPUS*)pc->data;
437   A = pch2opus->A;
438   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
439     CHKERRQ(MatDestroy(&pch2opus->wnsmat[0]));
440     CHKERRQ(MatDestroy(&pch2opus->wnsmat[1]));
441   }
442   if (!pch2opus->wnsmat[0]) {
443     CHKERRQ(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]));
444     CHKERRQ(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]));
445   }
446   if (t) {
447     CHKERRQ(PCApplyTransposeMat_H2OPUS(pc,X,Y));
448     CHKERRQ(MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]));
449     CHKERRQ(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]));
450     CHKERRQ(MatScale(Y,2.));
451     CHKERRQ(MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
452   } else {
453     CHKERRQ(PCApplyMat_H2OPUS(pc,X,Y));
454     CHKERRQ(MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]));
455     CHKERRQ(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]));
456     CHKERRQ(MatScale(Y,2.));
457     CHKERRQ(MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN));
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
463 {
464   PetscFunctionBegin;
465   CHKERRQ(MatMatMultKernel_NS(M,X,Y,PETSC_FALSE));
466   PetscFunctionReturn(0);
467 }
468 
469 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
470 {
471   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
472   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
473 
474   PetscFunctionBegin;
475   if (!pch2opus->S) {
476     PetscInt M,N,m,n;
477 
478     CHKERRQ(MatGetSize(A,&M,&N));
479     CHKERRQ(MatGetLocalSize(A,&m,&n));
480     CHKERRQ(MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S));
481     CHKERRQ(MatSetBlockSizesFromMats(pch2opus->S,A,A));
482 #if defined(PETSC_H2OPUS_USE_GPU)
483     CHKERRQ(MatShellSetVecType(pch2opus->S,VECCUDA));
484 #endif
485   }
486   if (pch2opus->hyperorder >= 2) {
487     CHKERRQ(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper));
488     CHKERRQ(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper));
489     CHKERRQ(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE));
490     CHKERRQ(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA));
491   } else {
492     CHKERRQ(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS));
493     CHKERRQ(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS));
494     CHKERRQ(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE));
495     CHKERRQ(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA));
496   }
497   CHKERRQ(MatPropagateSymmetryOptions(A,pch2opus->S));
498   CHKERRQ(MatBindToCPU(pch2opus->S,pch2opus->boundtocpu));
499   /* XXX */
500   CHKERRQ(MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE));
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode PCSetUp_H2OPUS(PC pc)
505 {
506   PC_H2OPUS *pch2opus  = (PC_H2OPUS*)pc->data;
507   Mat        A         = pc->useAmat ? pc->mat : pc->pmat;
508   NormType   norm      = pch2opus->normtype;
509   PetscReal  initerr   = 0.0,err;
510   PetscReal  initerrMA = 0.0,errMA;
511   PetscBool  ish2opus;
512 
513   PetscFunctionBegin;
514   if (!pch2opus->T) {
515     PetscInt    M,N,m,n;
516     const char *prefix;
517 
518     CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
519     CHKERRQ(MatGetSize(A,&M,&N));
520     CHKERRQ(MatGetLocalSize(A,&m,&n));
521     CHKERRQ(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T));
522     CHKERRQ(MatSetBlockSizesFromMats(pch2opus->T,A,A));
523     CHKERRQ(MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI));
524     CHKERRQ(MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI));
525     CHKERRQ(MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS));
526 #if defined(PETSC_H2OPUS_USE_GPU)
527     CHKERRQ(MatShellSetVecType(pch2opus->T,VECCUDA));
528 #endif
529     CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T));
530     CHKERRQ(MatSetOptionsPrefix(pch2opus->T,prefix));
531     CHKERRQ(MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_"));
532   }
533   CHKERRQ(MatDestroy(&pch2opus->A));
534   CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus));
535   if (ish2opus) {
536     CHKERRQ(PetscObjectReference((PetscObject)A));
537     pch2opus->A = A;
538   } else {
539     const char *prefix;
540     CHKERRQ(MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A));
541     /* XXX */
542     CHKERRQ(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE));
543     CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
544     CHKERRQ(MatSetOptionsPrefix(pch2opus->A,prefix));
545     CHKERRQ(MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_"));
546     CHKERRQ(MatSetFromOptions(pch2opus->A));
547     CHKERRQ(MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY));
548     CHKERRQ(MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY));
549     /* XXX */
550     CHKERRQ(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE));
551   }
552 #if defined(PETSC_H2OPUS_USE_GPU)
553   pch2opus->boundtocpu = pch2opus->A->boundtocpu;
554 #endif
555   CHKERRQ(MatBindToCPU(pch2opus->T,pch2opus->boundtocpu));
556   if (pch2opus->M) { /* see if we can reuse M as initial guess */
557     PetscReal reuse;
558 
559     CHKERRQ(MatBindToCPU(pch2opus->M,pch2opus->boundtocpu));
560     CHKERRQ(MatNorm(pch2opus->T,norm,&reuse));
561     if (reuse >= 1.0) CHKERRQ(MatDestroy(&pch2opus->M));
562   }
563   if (!pch2opus->M) {
564     const char *prefix;
565     CHKERRQ(MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M));
566     CHKERRQ(PCGetOptionsPrefix(pc,&prefix));
567     CHKERRQ(MatSetOptionsPrefix(pch2opus->M,prefix));
568     CHKERRQ(MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_"));
569     CHKERRQ(MatSetFromOptions(pch2opus->M));
570     CHKERRQ(PCH2OpusSetUpInit(pc));
571     CHKERRQ(MatScale(pch2opus->M,pch2opus->s0));
572   }
573   /* A and M have the same h2 matrix structure, save on reordering routines */
574   CHKERRQ(MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE));
575   CHKERRQ(MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE));
576   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
577     CHKERRQ(MatNorm(pch2opus->T,norm,&initerr));
578     pch2opus->testMA = PETSC_TRUE;
579     CHKERRQ(MatNorm(pch2opus->T,norm,&initerrMA));
580     pch2opus->testMA = PETSC_FALSE;
581   }
582   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
583   err   = initerr;
584   errMA = initerrMA;
585   if (pch2opus->monitor) {
586     CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr)));
587     CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA)));
588   }
589   if (initerr > pch2opus->atol && !pc->failedreason) {
590     PetscInt i;
591 
592     CHKERRQ(PCH2OpusSetUpSampler_Private(pc));
593     for (i = 0; i < pch2opus->maxits; i++) {
594       Mat         M;
595       const char* prefix;
596 
597       CHKERRQ(MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M));
598       CHKERRQ(MatGetOptionsPrefix(pch2opus->M,&prefix));
599       CHKERRQ(MatSetOptionsPrefix(M,prefix));
600       CHKERRQ(MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE));
601       CHKERRQ(MatSetFromOptions(M));
602       CHKERRQ(MatH2OpusSetNativeMult(M,PETSC_TRUE));
603       CHKERRQ(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
604       CHKERRQ(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
605 
606       CHKERRQ(MatDestroy(&pch2opus->M));
607       pch2opus->M = M;
608       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
609         CHKERRQ(MatNorm(pch2opus->T,norm,&err));
610         pch2opus->testMA = PETSC_TRUE;
611         CHKERRQ(MatNorm(pch2opus->T,norm,&errMA));
612         pch2opus->testMA = PETSC_FALSE;
613       }
614       if (pch2opus->monitor) {
615         CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr)));
616         CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA)));
617       }
618       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
619       if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
620     }
621   }
622   /* cleanup setup workspace */
623   CHKERRQ(MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE));
624   CHKERRQ(MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE));
625   CHKERRQ(VecDestroy(&pch2opus->wns[0]));
626   CHKERRQ(VecDestroy(&pch2opus->wns[1]));
627   CHKERRQ(VecDestroy(&pch2opus->wns[2]));
628   CHKERRQ(VecDestroy(&pch2opus->wns[3]));
629   CHKERRQ(MatDestroy(&pch2opus->wnsmat[0]));
630   CHKERRQ(MatDestroy(&pch2opus->wnsmat[1]));
631   CHKERRQ(MatDestroy(&pch2opus->wnsmat[2]));
632   CHKERRQ(MatDestroy(&pch2opus->wnsmat[3]));
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
637 {
638   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
639   PetscBool  isascii;
640 
641   PetscFunctionBegin;
642   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
643   if (isascii) {
644     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
645       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n"));
646       CHKERRQ(PetscViewerASCIIPushTab(viewer));
647       CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL));
648       CHKERRQ(MatView(pch2opus->A,viewer));
649       CHKERRQ(PetscViewerPopFormat(viewer));
650       CHKERRQ(PetscViewerASCIIPopTab(viewer));
651     }
652     if (pch2opus->M) {
653       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n"));
654       CHKERRQ(PetscViewerASCIIPushTab(viewer));
655       CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL));
656       CHKERRQ(MatView(pch2opus->M,viewer));
657       CHKERRQ(PetscViewerPopFormat(viewer));
658       CHKERRQ(PetscViewerASCIIPopTab(viewer));
659     }
660     CHKERRQ(PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0));
661   }
662   PetscFunctionReturn(0);
663 }
664 
665 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
666 {
667   PC_H2OPUS *pch2opus;
668 
669   PetscFunctionBegin;
670   CHKERRQ(PetscNewLog(pc,&pch2opus));
671   pc->data = (void*)pch2opus;
672 
673   pch2opus->atol       = 1.e-2;
674   pch2opus->rtol       = 1.e-6;
675   pch2opus->maxits     = 50;
676   pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
677   pch2opus->normtype   = NORM_2;
678 
679   /* these are needed when we are sampling the pmat */
680   pch2opus->eta        = PETSC_DECIDE;
681   pch2opus->leafsize   = PETSC_DECIDE;
682   pch2opus->max_rank   = PETSC_DECIDE;
683   pch2opus->bs         = PETSC_DECIDE;
684   pch2opus->mrtol      = PETSC_DECIDE;
685 #if defined(PETSC_H2OPUS_USE_GPU)
686   pch2opus->boundtocpu = PETSC_FALSE;
687 #else
688   pch2opus->boundtocpu = PETSC_TRUE;
689 #endif
690   pc->ops->destroy        = PCDestroy_H2OPUS;
691   pc->ops->setup          = PCSetUp_H2OPUS;
692   pc->ops->apply          = PCApply_H2OPUS;
693   pc->ops->matapply       = PCApplyMat_H2OPUS;
694   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
695   pc->ops->reset          = PCReset_H2OPUS;
696   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
697   pc->ops->view           = PCView_H2OPUS;
698 
699   CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS));
700   PetscFunctionReturn(0);
701 }
702