xref: /petsc/src/ksp/pc/impls/h2opus/pch2opus.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 #include <petsc/private/pcimpl.h>
2 #include <petsc/private/matimpl.h>
3 #include <h2opusconf.h>
4 
5 /* Use GPU only if H2OPUS is configured for GPU */
6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7   #define PETSC_H2OPUS_USE_GPU
8 #endif
9 
10 typedef struct {
11   Mat         A;
12   Mat         M;
13   PetscScalar s0;
14 
15   /* sampler for Newton-Schultz */
16   Mat      S;
17   PetscInt hyperorder;
18   Vec      wns[4];
19   Mat      wnsmat[4];
20 
21   /* convergence testing */
22   Mat T;
23   Vec w;
24 
25   /* Support for PCSetCoordinates */
26   PetscInt   sdim;
27   PetscInt   nlocc;
28   PetscReal *coords;
29 
30   /* Newton-Schultz customization */
31   PetscInt  maxits;
32   PetscReal rtol, atol;
33   PetscBool monitor;
34   PetscBool useapproximatenorms;
35   NormType  normtype;
36 
37   /* Used when pmat != MATH2OPUS */
38   PetscReal eta;
39   PetscInt  leafsize;
40   PetscInt  max_rank;
41   PetscInt  bs;
42   PetscReal mrtol;
43 
44   /* CPU/GPU */
45   PetscBool forcecpu;
46   PetscBool boundtocpu;
47 } PC_H2OPUS;
48 
49 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
50 
51 static PetscErrorCode PCReset_H2OPUS(PC pc)
52 {
53   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
54 
55   PetscFunctionBegin;
56   pch2opus->sdim  = 0;
57   pch2opus->nlocc = 0;
58   PetscCall(PetscFree(pch2opus->coords));
59   PetscCall(MatDestroy(&pch2opus->A));
60   PetscCall(MatDestroy(&pch2opus->M));
61   PetscCall(MatDestroy(&pch2opus->T));
62   PetscCall(VecDestroy(&pch2opus->w));
63   PetscCall(MatDestroy(&pch2opus->S));
64   PetscCall(VecDestroy(&pch2opus->wns[0]));
65   PetscCall(VecDestroy(&pch2opus->wns[1]));
66   PetscCall(VecDestroy(&pch2opus->wns[2]));
67   PetscCall(VecDestroy(&pch2opus->wns[3]));
68   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
69   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
70   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
71   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
76 {
77   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
78   PetscBool  reset    = PETSC_TRUE;
79 
80   PetscFunctionBegin;
81   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
82     PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
83     reset = (PetscBool)!reset;
84   }
85   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
86   if (reset) {
87     PetscCall(PCReset_H2OPUS(pc));
88     PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
89     PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
90     pch2opus->sdim  = sdim;
91     pch2opus->nlocc = nlocc;
92   }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode PCDestroy_H2OPUS(PC pc)
97 {
98   PetscFunctionBegin;
99   PetscCall(PCReset_H2OPUS(pc));
100   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
101   PetscCall(PetscFree(pc->data));
102   PetscFunctionReturn(PETSC_SUCCESS);
103 }
104 
105 static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject)
106 {
107   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
108 
109   PetscFunctionBegin;
110   PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
111   PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
112   PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
113   PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
114   PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
115   PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
116   PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
117   PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
118   PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
119   PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
120   PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
121   PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
122   PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
123   PetscOptionsHeadEnd();
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 typedef struct {
128   Mat A;
129   Mat M;
130   Vec w;
131 } AAtCtx;
132 
133 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
134 {
135   AAtCtx *aat;
136 
137   PetscFunctionBegin;
138   PetscCall(MatShellGetContext(A, (void *)&aat));
139   /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
140   PetscCall(MatMultTranspose(aat->A, x, aat->w));
141   PetscCall(MatMult(aat->A, aat->w, y));
142   PetscFunctionReturn(PETSC_SUCCESS);
143 }
144 
145 static PetscErrorCode PCH2OpusSetUpInit(PC pc)
146 {
147   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
148   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
149   PetscInt   M, m;
150   VecType    vtype;
151   PetscReal  n;
152   AAtCtx     aat;
153 
154   PetscFunctionBegin;
155   aat.A = A;
156   aat.M = pch2opus->M; /* unused so far */
157   PetscCall(MatCreateVecs(A, NULL, &aat.w));
158   PetscCall(MatGetSize(A, &M, NULL));
159   PetscCall(MatGetLocalSize(A, &m, NULL));
160   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
161   PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
162   PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt));
163   PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt));
164   PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
165   PetscCall(MatGetVecType(A, &vtype));
166   PetscCall(MatShellSetVecType(AAt, vtype));
167   PetscCall(MatNorm(AAt, NORM_1, &n));
168   pch2opus->s0 = 1. / n;
169   PetscCall(VecDestroy(&aat.w));
170   PetscCall(MatDestroy(&AAt));
171   PetscFunctionReturn(PETSC_SUCCESS);
172 }
173 
174 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
175 {
176   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
177 
178   PetscFunctionBegin;
179   if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
180   else PetscCall(MatMult(pch2opus->M, x, y));
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
185 {
186   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
187 
188   PetscFunctionBegin;
189   if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
190   else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
195 {
196   PetscFunctionBegin;
197   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
202 {
203   PetscFunctionBegin;
204   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
209 {
210   PetscFunctionBegin;
211   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
216 {
217   PetscFunctionBegin;
218   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
222 /* used to test the norm of (M^-1 A - I) */
223 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
224 {
225   PC         pc;
226   Mat        A;
227   PC_H2OPUS *pch2opus;
228   PetscBool  sideleft = PETSC_TRUE;
229 
230   PetscFunctionBegin;
231   PetscCall(MatShellGetContext(M, (void *)&pc));
232   pch2opus = (PC_H2OPUS *)pc->data;
233   if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
234   A = pch2opus->A;
235   PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
236   if (t) {
237     if (sideleft) {
238       PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
239       PetscCall(MatMultTranspose(A, pch2opus->w, y));
240     } else {
241       PetscCall(MatMultTranspose(A, x, pch2opus->w));
242       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
243     }
244   } else {
245     if (sideleft) {
246       PetscCall(MatMult(A, x, pch2opus->w));
247       PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
248     } else {
249       PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
250       PetscCall(MatMult(A, pch2opus->w, y));
251     }
252   }
253   PetscCall(VecAXPY(y, -1.0, x));
254   PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 
257 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
258 {
259   PetscFunctionBegin;
260   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
265 {
266   PetscFunctionBegin;
267   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 /* HyperPower kernel:
272 Y = R = x
273 for i = 1 . . . l - 1 do
274   R = (I - A * Xk) * R
275   Y = Y + R
276 Y = Xk * Y
277 */
278 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
279 {
280   PC         pc;
281   Mat        A;
282   PC_H2OPUS *pch2opus;
283 
284   PetscFunctionBegin;
285   PetscCall(MatShellGetContext(M, (void *)&pc));
286   pch2opus = (PC_H2OPUS *)pc->data;
287   A        = pch2opus->A;
288   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
289   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
290   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
291   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
292   PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
293   PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
294   PetscCall(VecCopy(x, pch2opus->wns[0]));
295   PetscCall(VecCopy(x, pch2opus->wns[3]));
296   if (t) {
297     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
298       PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
299       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
300       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
301       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
302     }
303     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
304   } else {
305     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
306       PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
307       PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
308       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
309       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
310     }
311     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
312   }
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
317 {
318   PetscFunctionBegin;
319   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
324 {
325   PetscFunctionBegin;
326   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
327   PetscFunctionReturn(PETSC_SUCCESS);
328 }
329 
330 /* Hyper power kernel, MatMat version */
331 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
332 {
333   PC         pc;
334   Mat        A;
335   PC_H2OPUS *pch2opus;
336   PetscInt   i;
337 
338   PetscFunctionBegin;
339   PetscCall(MatShellGetContext(M, (void *)&pc));
340   pch2opus = (PC_H2OPUS *)pc->data;
341   A        = pch2opus->A;
342   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
343     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
344     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
345   }
346   if (!pch2opus->wnsmat[0]) {
347     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
348     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
349   }
350   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
351     PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
352     PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
353   }
354   if (!pch2opus->wnsmat[2]) {
355     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
356     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
357   }
358   PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
359   PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
360   if (t) {
361     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
362       PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]));
363       PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
364       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
365       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
366     }
367     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
368   } else {
369     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
370       PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
371       PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2]));
372       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
373       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
374     }
375     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
376   }
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx)
381 {
382   PetscFunctionBegin;
383   PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
384   PetscFunctionReturn(PETSC_SUCCESS);
385 }
386 
387 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
388 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
389 {
390   PC         pc;
391   Mat        A;
392   PC_H2OPUS *pch2opus;
393 
394   PetscFunctionBegin;
395   PetscCall(MatShellGetContext(M, (void *)&pc));
396   pch2opus = (PC_H2OPUS *)pc->data;
397   A        = pch2opus->A;
398   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
399   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
400   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
401   if (t) {
402     PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
403     PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
404     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
405     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
406   } else {
407     PetscCall(PCApply_H2OPUS(pc, x, y));
408     PetscCall(MatMult(A, y, pch2opus->wns[0]));
409     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
410     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
411   }
412   PetscFunctionReturn(PETSC_SUCCESS);
413 }
414 
415 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
416 {
417   PetscFunctionBegin;
418   PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 
422 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
423 {
424   PetscFunctionBegin;
425   PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
426   PetscFunctionReturn(PETSC_SUCCESS);
427 }
428 
429 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
430 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
431 {
432   PC         pc;
433   Mat        A;
434   PC_H2OPUS *pch2opus;
435 
436   PetscFunctionBegin;
437   PetscCall(MatShellGetContext(M, (void *)&pc));
438   pch2opus = (PC_H2OPUS *)pc->data;
439   A        = pch2opus->A;
440   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
441     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
442     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
443   }
444   if (!pch2opus->wnsmat[0]) {
445     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
446     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
447   }
448   if (t) {
449     PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
450     PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]));
451     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
452     PetscCall(MatScale(Y, 2.));
453     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
454   } else {
455     PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
456     PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0]));
457     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
458     PetscCall(MatScale(Y, 2.));
459     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
460   }
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
465 {
466   PetscFunctionBegin;
467   PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
472 {
473   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
474   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
475 
476   PetscFunctionBegin;
477   if (!pch2opus->S) {
478     PetscInt M, N, m, n;
479 
480     PetscCall(MatGetSize(A, &M, &N));
481     PetscCall(MatGetLocalSize(A, &m, &n));
482     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
483     PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
484 #if defined(PETSC_H2OPUS_USE_GPU)
485     PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
486 #endif
487   }
488   if (pch2opus->hyperorder >= 2) {
489     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper));
490     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper));
491     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
492     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
493   } else {
494     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS));
495     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS));
496     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
497     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
498   }
499   PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
500   PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
501   /* XXX */
502   PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505 
506 static PetscErrorCode PCSetUp_H2OPUS(PC pc)
507 {
508   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
509   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
510   NormType   norm     = pch2opus->normtype;
511   PetscReal  initerr  = 0.0, err;
512   PetscBool  ish2opus;
513 
514   PetscFunctionBegin;
515   if (!pch2opus->T) {
516     PetscInt    M, N, m, n;
517     const char *prefix;
518 
519     PetscCall(PCGetOptionsPrefix(pc, &prefix));
520     PetscCall(MatGetSize(A, &M, &N));
521     PetscCall(MatGetLocalSize(A, &m, &n));
522     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
523     PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
524     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI));
525     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI));
526     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
527 #if defined(PETSC_H2OPUS_USE_GPU)
528     PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
529 #endif
530     PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
531     PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
532   }
533   PetscCall(MatDestroy(&pch2opus->A));
534   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
535   if (ish2opus) {
536     PetscCall(PetscObjectReference((PetscObject)A));
537     pch2opus->A = A;
538   } else {
539     const char *prefix;
540     PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
541     /* XXX */
542     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
543     PetscCall(PCGetOptionsPrefix(pc, &prefix));
544     PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
545     PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
546     PetscCall(MatSetFromOptions(pch2opus->A));
547     PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
548     PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
549     /* XXX */
550     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
551 
552     /* always perform construction on the GPU unless forcecpu is true */
553     PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
554   }
555 #if defined(PETSC_H2OPUS_USE_GPU)
556   pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
557 #endif
558   PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
559   if (pch2opus->M) { /* see if we can reuse M as initial guess */
560     PetscReal reuse;
561 
562     PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
563     PetscCall(MatNorm(pch2opus->T, norm, &reuse));
564     if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
565   }
566   if (!pch2opus->M) {
567     const char *prefix;
568     PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
569     PetscCall(PCGetOptionsPrefix(pc, &prefix));
570     PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
571     PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
572     PetscCall(MatSetFromOptions(pch2opus->M));
573     PetscCall(PCH2OpusSetUpInit(pc));
574     PetscCall(MatScale(pch2opus->M, pch2opus->s0));
575   }
576   /* A and M have the same h2 matrix structure, save on reordering routines */
577   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
578   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
579   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
580   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
581   err = initerr;
582   if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
583   if (initerr > pch2opus->atol && !pc->failedreason) {
584     PetscInt i;
585 
586     PetscCall(PCH2OpusSetUpSampler_Private(pc));
587     for (i = 0; i < pch2opus->maxits; i++) {
588       Mat         M;
589       const char *prefix;
590 
591       PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
592       PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
593       PetscCall(MatSetOptionsPrefix(M, prefix));
594       PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
595       PetscCall(MatSetFromOptions(M));
596       PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
597       PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
598       PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
599       PetscCall(MatDestroy(&pch2opus->M));
600       pch2opus->M = M;
601       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
602       if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
603       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
604       if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
605     }
606   }
607   /* cleanup setup workspace */
608   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
609   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
610   PetscCall(VecDestroy(&pch2opus->wns[0]));
611   PetscCall(VecDestroy(&pch2opus->wns[1]));
612   PetscCall(VecDestroy(&pch2opus->wns[2]));
613   PetscCall(VecDestroy(&pch2opus->wns[3]));
614   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
615   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
616   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
617   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
622 {
623   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
624   PetscBool  isascii;
625 
626   PetscFunctionBegin;
627   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
628   if (isascii) {
629     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
630       PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
631       PetscCall(PetscViewerASCIIPushTab(viewer));
632       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
633       PetscCall(MatView(pch2opus->A, viewer));
634       PetscCall(PetscViewerPopFormat(viewer));
635       PetscCall(PetscViewerASCIIPopTab(viewer));
636     }
637     if (pch2opus->M) {
638       PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
639       PetscCall(PetscViewerASCIIPushTab(viewer));
640       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
641       PetscCall(MatView(pch2opus->M, viewer));
642       PetscCall(PetscViewerPopFormat(viewer));
643       PetscCall(PetscViewerASCIIPopTab(viewer));
644     }
645     PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
646   }
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 /*MC
651      PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
652 
653    Options Database Keys:
654 +   -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
655 .   -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
656 .   -pc_h2opus_monitor - monitor Newton-Schultz convergence
657 .   -pc_h2opus_atol - absolute tolerance
658 .   -pc_h2opus_rtol - relative tolerance
659 .   -pc_h2opus_norm_type - normtype
660 .   -pc_h2opus_hyperorder - Hyper power order of sampling
661 .   -pc_h2opus_leafsize - leaf size when constructed from kernel
662 .   -pc_h2opus_eta - admissibility condition tolerance
663 .   -pc_h2opus_maxrank - maximum rank when constructed from matvecs
664 .   -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
665 .   -pc_h2opus_mrtol - relative tolerance for construction from sampling
666 -   -pc_h2opus_forcecpu - force construction of preconditioner on CPU
667 
668    Level: intermediate
669 
670 .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
671 M*/
672 
673 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
674 {
675   PC_H2OPUS *pch2opus;
676 
677   PetscFunctionBegin;
678   PetscCall(PetscNew(&pch2opus));
679   pc->data = (void *)pch2opus;
680 
681   pch2opus->atol       = 1.e-2;
682   pch2opus->rtol       = 1.e-6;
683   pch2opus->maxits     = 50;
684   pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
685   pch2opus->normtype   = NORM_2;
686 
687   /* these are needed when we are sampling the pmat */
688   pch2opus->eta      = PETSC_DECIDE;
689   pch2opus->leafsize = PETSC_DECIDE;
690   pch2opus->max_rank = PETSC_DECIDE;
691   pch2opus->bs       = PETSC_DECIDE;
692   pch2opus->mrtol    = PETSC_DECIDE;
693 #if defined(PETSC_H2OPUS_USE_GPU)
694   pch2opus->boundtocpu = PETSC_FALSE;
695 #else
696   pch2opus->boundtocpu = PETSC_TRUE;
697 #endif
698   pc->ops->destroy        = PCDestroy_H2OPUS;
699   pc->ops->setup          = PCSetUp_H2OPUS;
700   pc->ops->apply          = PCApply_H2OPUS;
701   pc->ops->matapply       = PCApplyMat_H2OPUS;
702   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
703   pc->ops->reset          = PCReset_H2OPUS;
704   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
705   pc->ops->view           = PCView_H2OPUS;
706 
707   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
708   PetscFunctionReturn(PETSC_SUCCESS);
709 }
710