xref: /petsc/src/ksp/pc/impls/hpddm/pchpddm.cxx (revision 2fc912f79d312ddfbc78ec0610cc035ba4dda55e)
1 #include <petsc/private/sfimpl.h>
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/matimpl.h>
4 #include <petsc/private/petschpddm.h> /*I "petscpc.h" I*/
5 #include <petsc/private/pcimpl.h>
6 #include <petsc/private/dmimpl.h> /* this must be included after petschpddm.h so that DM_MAX_WORK_VECTORS is not defined  */
7                                   /* otherwise, it is assumed that one is compiling libhpddm_petsc => circular dependency */
8 
9 static PetscErrorCode (*loadedSym)(HPDDM::Schwarz<PetscScalar> *const, IS, Mat, Mat, Mat, std::vector<Vec>, PC_HPDDM_Level **const) = nullptr;
10 
11 static PetscBool PCHPDDMPackageInitialized = PETSC_FALSE;
12 
13 PetscLogEvent PC_HPDDM_Strc;
14 PetscLogEvent PC_HPDDM_PtAP;
15 PetscLogEvent PC_HPDDM_PtBP;
16 PetscLogEvent PC_HPDDM_Next;
17 PetscLogEvent PC_HPDDM_SetUp[PETSC_PCHPDDM_MAXLEVELS];
18 PetscLogEvent PC_HPDDM_Solve[PETSC_PCHPDDM_MAXLEVELS];
19 
20 const char *const PCHPDDMCoarseCorrectionTypes[] = {"DEFLATED", "ADDITIVE", "BALANCED", "NONE", "PCHPDDMCoarseCorrectionType", "PC_HPDDM_COARSE_CORRECTION_", nullptr};
21 const char *const PCHPDDMSchurPreTypes[]         = {"LEAST_SQUARES", "GENEO", "PCHPDDMSchurPreType", "PC_HPDDM_SCHUR_PRE", nullptr};
22 
23 static PetscErrorCode PCReset_HPDDM(PC pc)
24 {
25   PC_HPDDM *data = (PC_HPDDM *)pc->data;
26 
27   PetscFunctionBegin;
28   if (data->levels) {
29     for (PetscInt i = 0; i < PETSC_PCHPDDM_MAXLEVELS && data->levels[i]; ++i) {
30       PetscCall(KSPDestroy(&data->levels[i]->ksp));
31       PetscCall(PCDestroy(&data->levels[i]->pc));
32       PetscCall(PetscFree(data->levels[i]));
33     }
34     PetscCall(PetscFree(data->levels));
35   }
36   PetscCall(ISDestroy(&data->is));
37   PetscCall(MatDestroy(&data->aux));
38   PetscCall(MatDestroy(&data->B));
39   PetscCall(VecDestroy(&data->normal));
40   data->correction = PC_HPDDM_COARSE_CORRECTION_DEFLATED;
41   data->Neumann    = PETSC_BOOL3_UNKNOWN;
42   data->deflation  = PETSC_FALSE;
43   data->setup      = nullptr;
44   data->setup_ctx  = nullptr;
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 static PetscErrorCode PCDestroy_HPDDM(PC pc)
49 {
50   PC_HPDDM *data = (PC_HPDDM *)pc->data;
51 
52   PetscFunctionBegin;
53   PetscCall(PCReset_HPDDM(pc));
54   PetscCall(PetscFree(data));
55   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, nullptr));
56   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetAuxiliaryMat_C", nullptr));
57   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMHasNeumannMat_C", nullptr));
58   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetRHSMat_C", nullptr));
59   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetCoarseCorrectionType_C", nullptr));
60   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetCoarseCorrectionType_C", nullptr));
61   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetSTShareSubKSP_C", nullptr));
62   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetSTShareSubKSP_C", nullptr));
63   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetDeflationMat_C", nullptr));
64   PetscCall(PetscObjectCompose((PetscObject)pc, "_PCHPDDM_Schur", nullptr));
65   PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 
68 static inline PetscErrorCode PCHPDDMSetAuxiliaryMat_Private(PC pc, IS is, Mat A, PetscBool deflation)
69 {
70   PC_HPDDM                   *data = (PC_HPDDM *)pc->data;
71   PCHPDDMCoarseCorrectionType type = data->correction;
72 
73   PetscFunctionBegin;
74   PetscValidLogicalCollectiveBool(pc, deflation, 4);
75   if (is && A) {
76     PetscInt m[2];
77 
78     PetscCall(ISGetLocalSize(is, m));
79     PetscCall(MatGetLocalSize(A, m + 1, nullptr));
80     PetscCheck(m[0] == m[1], PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Inconsistent IS and Mat sizes (%" PetscInt_FMT " v. %" PetscInt_FMT ")", m[0], m[1]);
81   }
82   if (is) {
83     PetscCall(PetscObjectReference((PetscObject)is));
84     if (data->is) { /* new overlap definition resets the PC */
85       PetscCall(PCReset_HPDDM(pc));
86       pc->setfromoptionscalled = 0;
87       pc->setupcalled          = PETSC_FALSE;
88       data->correction         = type;
89     }
90     PetscCall(ISDestroy(&data->is));
91     data->is = is;
92   }
93   if (A) {
94     PetscCall(PetscObjectReference((PetscObject)A));
95     PetscCall(MatDestroy(&data->aux));
96     data->aux = A;
97   }
98   data->deflation = deflation;
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 static inline PetscErrorCode PCHPDDMSplittingMatNormal_Private(Mat A, IS *is, Mat *splitting[])
103 {
104   Mat *sub;
105   IS   zero;
106 
107   PetscFunctionBegin;
108   PetscCall(MatSetOption(A, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
109   PetscCall(MatCreateSubMatrices(A, 1, is + 2, is, MAT_INITIAL_MATRIX, splitting));
110   PetscCall(MatCreateSubMatrices(**splitting, 1, is + 2, is + 1, MAT_INITIAL_MATRIX, &sub));
111   PetscCall(MatFindZeroRows(*sub, &zero));
112   PetscCall(MatDestroySubMatrices(1, &sub));
113   PetscCall(MatSetOption(**splitting, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
114   PetscCall(MatZeroRowsIS(**splitting, zero, 0.0, nullptr, nullptr));
115   PetscCall(ISDestroy(&zero));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 static inline PetscErrorCode PCHPDDMSetAuxiliaryMatNormal_Private(PC pc, Mat A, Mat N, Mat *B, const char *pcpre, Vec *diagonal = nullptr, Mat B01 = nullptr)
120 {
121   PC_HPDDM *data         = (PC_HPDDM *)pc->data;
122   Mat      *splitting[2] = {}, aux;
123   Vec       d;
124   IS        is[3];
125   PetscReal norm;
126   PetscBool flg;
127   char      type[256] = {}; /* same size as in src/ksp/pc/interface/pcset.c */
128 
129   PetscFunctionBegin;
130   if (!B01) PetscCall(MatConvert(N, MATAIJ, MAT_INITIAL_MATRIX, B));
131   else PetscCall(MatTransposeMatMult(B01, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, B));
132   PetscCall(MatEliminateZeros(*B, PETSC_TRUE));
133   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->cmap->n, A->cmap->rstart, 1, is));
134   PetscCall(MatIncreaseOverlap(*B, 1, is, 1));
135   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->cmap->n, A->cmap->rstart, 1, is + 2));
136   PetscCall(ISEmbed(is[0], is[2], PETSC_TRUE, is + 1));
137   PetscCall(ISDestroy(is + 2));
138   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->rmap->N, 0, 1, is + 2));
139   PetscCall(PCHPDDMSplittingMatNormal_Private(A, is, &splitting[0]));
140   if (B01) {
141     PetscCall(PCHPDDMSplittingMatNormal_Private(B01, is, &splitting[1]));
142     PetscCall(MatDestroy(&B01));
143   }
144   PetscCall(ISDestroy(is + 2));
145   PetscCall(ISDestroy(is + 1));
146   PetscCall(PetscOptionsGetString(((PetscObject)pc)->options, pcpre, "-pc_hpddm_levels_1_sub_pc_type", type, sizeof(type), nullptr));
147   PetscCall(PetscStrcmp(type, PCQR, &flg));
148   if (!flg) {
149     Mat conjugate = *splitting[splitting[1] ? 1 : 0];
150 
151     if (PetscDefined(USE_COMPLEX) && !splitting[1]) {
152       PetscCall(MatDuplicate(*splitting[0], MAT_COPY_VALUES, &conjugate));
153       PetscCall(MatConjugate(conjugate));
154     }
155     PetscCall(MatTransposeMatMult(conjugate, *splitting[0], MAT_INITIAL_MATRIX, PETSC_DETERMINE, &aux));
156     if (PetscDefined(USE_COMPLEX) && !splitting[1]) PetscCall(MatDestroy(&conjugate));
157     else if (splitting[1]) PetscCall(MatDestroySubMatrices(1, &splitting[1]));
158     PetscCall(MatNorm(aux, NORM_FROBENIUS, &norm));
159     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
160     if (diagonal) {
161       PetscReal norm;
162 
163       PetscCall(VecScale(*diagonal, -1.0));
164       PetscCall(VecNorm(*diagonal, NORM_INFINITY, &norm));
165       if (norm > PETSC_SMALL) {
166         PetscSF  scatter;
167         PetscInt n;
168 
169         PetscCall(ISGetLocalSize(*is, &n));
170         PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), n, PETSC_DECIDE, &d));
171         PetscCall(VecScatterCreate(*diagonal, *is, d, nullptr, &scatter));
172         PetscCall(VecScatterBegin(scatter, *diagonal, d, INSERT_VALUES, SCATTER_FORWARD));
173         PetscCall(VecScatterEnd(scatter, *diagonal, d, INSERT_VALUES, SCATTER_FORWARD));
174         PetscCall(PetscSFDestroy(&scatter));
175         PetscCall(MatDiagonalSet(aux, d, ADD_VALUES));
176         PetscCall(VecDestroy(&d));
177       } else PetscCall(VecDestroy(diagonal));
178     }
179     if (!diagonal) PetscCall(MatShift(aux, PETSC_SMALL * norm));
180     PetscCall(MatEliminateZeros(aux, PETSC_TRUE));
181   } else {
182     PetscBool flg;
183 
184     PetscCheck(!splitting[1], PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot use PCQR when A01 != A10^T");
185     if (diagonal) {
186       PetscCall(VecNorm(*diagonal, NORM_INFINITY, &norm));
187       PetscCheck(norm < PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Nonzero diagonal A11 block");
188       PetscCall(VecDestroy(diagonal));
189     }
190     PetscCall(PetscObjectTypeCompare((PetscObject)N, MATNORMAL, &flg));
191     if (flg) PetscCall(MatCreateNormal(*splitting[0], &aux));
192     else PetscCall(MatCreateNormalHermitian(*splitting[0], &aux));
193   }
194   PetscCall(MatDestroySubMatrices(1, &splitting[0]));
195   PetscCall(PCHPDDMSetAuxiliaryMat(pc, *is, aux, nullptr, nullptr));
196   data->Neumann = PETSC_BOOL3_TRUE;
197   PetscCall(ISDestroy(is));
198   PetscCall(MatDestroy(&aux));
199   PetscFunctionReturn(PETSC_SUCCESS);
200 }
201 
202 static PetscErrorCode PCHPDDMSetAuxiliaryMat_HPDDM(PC pc, IS is, Mat A, PetscErrorCode (*setup)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void *setup_ctx)
203 {
204   PC_HPDDM *data = (PC_HPDDM *)pc->data;
205 
206   PetscFunctionBegin;
207   PetscCall(PCHPDDMSetAuxiliaryMat_Private(pc, is, A, PETSC_FALSE));
208   if (setup) {
209     data->setup     = setup;
210     data->setup_ctx = setup_ctx;
211   }
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 /*@C
216   PCHPDDMSetAuxiliaryMat - Sets the auxiliary matrix used by `PCHPDDM` for the concurrent GenEO problems at the finest level.
217 
218   Input Parameters:
219 + pc    - preconditioner context
220 . is    - index set of the local auxiliary, e.g., Neumann, matrix
221 . A     - auxiliary sequential matrix
222 . setup - function for generating the auxiliary matrix entries, may be `NULL`
223 - ctx   - context for `setup`, may be `NULL`
224 
225   Calling sequence of `setup`:
226 + J   - matrix whose values are to be set
227 . t   - time
228 . X   - linearization point
229 . X_t - time-derivative of the linearization point
230 . s   - step
231 . ovl - index set of the local auxiliary, e.g., Neumann, matrix
232 - ctx - context for `setup`, may be `NULL`
233 
234   Level: intermediate
235 
236   Note:
237   As an example, in a finite element context with nonoverlapping subdomains plus (overlapping) ghost elements, this could be the unassembled (Neumann)
238   local overlapping operator. As opposed to the assembled (Dirichlet) local overlapping operator obtained by summing neighborhood contributions
239   at the interface of ghost elements.
240 
241   Fortran Notes:
242   Only `PETSC_NULL_FUNCTION` is supported for `setup` and `ctx` is never accessed
243 
244 .seealso: [](ch_ksp), `PCHPDDM`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCHPDDMSetRHSMat()`, `MATIS`
245 @*/
246 PetscErrorCode PCHPDDMSetAuxiliaryMat(PC pc, IS is, Mat A, PetscErrorCode (*setup)(Mat J, PetscReal t, Vec X, Vec X_t, PetscReal s, IS ovl, PetscCtx ctx), PetscCtx ctx)
247 {
248   PetscFunctionBegin;
249   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
250   if (is) PetscValidHeaderSpecific(is, IS_CLASSID, 2);
251   if (A) PetscValidHeaderSpecific(A, MAT_CLASSID, 3);
252   PetscTryMethod(pc, "PCHPDDMSetAuxiliaryMat_C", (PC, IS, Mat, PetscErrorCode (*)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void *), (pc, is, A, setup, ctx));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 static PetscErrorCode PCHPDDMHasNeumannMat_HPDDM(PC pc, PetscBool has)
257 {
258   PC_HPDDM *data = (PC_HPDDM *)pc->data;
259 
260   PetscFunctionBegin;
261   data->Neumann = PetscBoolToBool3(has);
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@
266   PCHPDDMHasNeumannMat - Informs `PCHPDDM` that the `Mat` passed to `PCHPDDMSetAuxiliaryMat()` is the local Neumann matrix.
267 
268   Input Parameters:
269 + pc  - preconditioner context
270 - has - Boolean value
271 
272   Level: intermediate
273 
274   Notes:
275   This may be used to bypass a call to `MatCreateSubMatrices()` and to `MatConvert()` for `MATSBAIJ` matrices.
276 
277   If a function is composed with DMCreateNeumannOverlap_C implementation is available in the `DM` attached to the Pmat, or the Amat, or the `PC`, the flag is internally set to `PETSC_TRUE`. Its default value is otherwise `PETSC_FALSE`.
278 
279 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMSetAuxiliaryMat()`
280 @*/
281 PetscErrorCode PCHPDDMHasNeumannMat(PC pc, PetscBool has)
282 {
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
285   PetscTryMethod(pc, "PCHPDDMHasNeumannMat_C", (PC, PetscBool), (pc, has));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode PCHPDDMSetRHSMat_HPDDM(PC pc, Mat B)
290 {
291   PC_HPDDM *data = (PC_HPDDM *)pc->data;
292 
293   PetscFunctionBegin;
294   PetscCall(PetscObjectReference((PetscObject)B));
295   PetscCall(MatDestroy(&data->B));
296   data->B = B;
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /*@
301   PCHPDDMSetRHSMat - Sets the right-hand side matrix used by `PCHPDDM` for the concurrent GenEO problems at the finest level.
302 
303   Input Parameters:
304 + pc - preconditioner context
305 - B  - right-hand side sequential matrix
306 
307   Level: advanced
308 
309   Note:
310   Must be used in conjunction with `PCHPDDMSetAuxiliaryMat`(N), so that Nv = lambda Bv is solved using `EPSSetOperators`(N, B).
311   It is assumed that N and `B` are provided using the same numbering. This provides a means to try more advanced methods such as GenEO-II or H-GenEO.
312 
313 .seealso: [](ch_ksp), `PCHPDDMSetAuxiliaryMat()`, `PCHPDDM`
314 @*/
315 PetscErrorCode PCHPDDMSetRHSMat(PC pc, Mat B)
316 {
317   PetscFunctionBegin;
318   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
319   if (B) {
320     PetscValidHeaderSpecific(B, MAT_CLASSID, 2);
321     PetscTryMethod(pc, "PCHPDDMSetRHSMat_C", (PC, Mat), (pc, B));
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode PCSetFromOptions_HPDDM(PC pc, PetscOptionItems PetscOptionsObject)
327 {
328   PC_HPDDM                   *data   = (PC_HPDDM *)pc->data;
329   PC_HPDDM_Level            **levels = data->levels;
330   char                        prefix[256], deprecated[256];
331   int                         i = 1;
332   PetscMPIInt                 size, previous;
333   PetscInt                    n, overlap = 1;
334   PCHPDDMCoarseCorrectionType type;
335   PetscBool                   flg = PETSC_TRUE, set;
336 
337   PetscFunctionBegin;
338   if (!data->levels) {
339     PetscCall(PetscCalloc1(PETSC_PCHPDDM_MAXLEVELS, &levels));
340     data->levels = levels;
341   }
342   PetscOptionsHeadBegin(PetscOptionsObject, "PCHPDDM options");
343   PetscCall(PetscOptionsBoundedInt("-pc_hpddm_harmonic_overlap", "Overlap prior to computing local harmonic extensions", "PCHPDDM", overlap, &overlap, &set, 1));
344   if (!set) overlap = -1;
345   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
346   previous = size;
347   while (i < PETSC_PCHPDDM_MAXLEVELS) {
348     PetscInt p = 1;
349 
350     if (!data->levels[i - 1]) PetscCall(PetscNew(data->levels + i - 1));
351     data->levels[i - 1]->parent = data;
352     /* if the previous level has a single process, it is not possible to coarsen further */
353     if (previous == 1 || !flg) break;
354     data->levels[i - 1]->nu        = 0;
355     data->levels[i - 1]->threshold = -1.0;
356     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_nev", i));
357     PetscCall(PetscOptionsBoundedInt(prefix, "Local number of deflation vectors computed by SLEPc", "EPSSetDimensions", data->levels[i - 1]->nu, &data->levels[i - 1]->nu, nullptr, 0));
358     PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_eps_threshold", i));
359     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_absolute", i));
360     PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
361     PetscCall(PetscOptionsReal(prefix, "Local absolute threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[i - 1]->threshold, &data->levels[i - 1]->threshold, nullptr));
362     if (i == 1) {
363       PetscCheck(overlap == -1 || PetscAbsReal(data->levels[i - 1]->threshold + static_cast<PetscReal>(1.0)) < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_threshold_absolute and -pc_hpddm_harmonic_overlap");
364       if (overlap != -1) {
365         PetscInt  nsv    = 0;
366         PetscBool set[2] = {PETSC_FALSE, PETSC_FALSE};
367 
368         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_nsv", i));
369         PetscCall(PetscOptionsBoundedInt(prefix, "Local number of deflation vectors computed by SLEPc", "SVDSetDimensions", nsv, &nsv, nullptr, 0));
370         PetscCheck(data->levels[0]->nu == 0 || nsv == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_nev and -pc_hpddm_levels_1_svd_nsv");
371         if (data->levels[0]->nu == 0) { /* -eps_nev has not been used, so nu is 0 */
372           data->levels[0]->nu = nsv;    /* nu may still be 0 if -svd_nsv has not been used */
373           PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_svd_relative_threshold", i));
374           PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_threshold_relative", i));
375           PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
376           PetscCall(PetscOptionsReal(prefix, "Local relative threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[0]->threshold, &data->levels[0]->threshold, set)); /* cache whether this option has been used or not to error out in case of exclusive options being used simultaneously later on */
377         }
378         if (data->levels[0]->nu == 0 || nsv == 0) { /* if neither -eps_nev nor -svd_nsv has been used */
379           PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_eps_relative_threshold", i));
380           PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_relative", i));
381           PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
382           PetscCall(PetscOptionsReal(prefix, "Local relative threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[0]->threshold, &data->levels[0]->threshold, set + 1));
383           PetscCheck(!set[0] || !set[1], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_threshold_relative and -pc_hpddm_levels_1_svd_threshold_relative");
384         }
385         PetscCheck(data->levels[0]->nu || PetscAbsReal(data->levels[i - 1]->threshold + static_cast<PetscReal>(1.0)) > PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Need to supply at least one of 1) -pc_hpddm_levels_1_eps_nev, 2) -pc_hpddm_levels_1_svd_nsv, 3) -pc_hpddm_levels_1_eps_threshold_relative, 4) -pc_hpddm_levels_1_svd_threshold_relative (for nonsymmetric matrices, only option 2 and option 4 are appropriate)");
386       }
387       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_1_st_share_sub_ksp"));
388       PetscCall(PetscOptionsBool(prefix, "Shared KSP between SLEPc ST and the fine-level subdomain solver", "PCHPDDMSetSTShareSubKSP", PETSC_FALSE, &data->share, nullptr));
389     }
390     /* if there is no prescribed coarsening, just break out of the loop */
391     if (data->levels[i - 1]->threshold <= PetscReal() && data->levels[i - 1]->nu <= 0 && !(data->deflation && i == 1)) break;
392     else {
393       ++i;
394       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_nev", i));
395       PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
396       if (!flg) {
397         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_absolute", i));
398         PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
399       }
400       if (flg) {
401         /* if there are coarsening options for the next level, then register it  */
402         /* otherwise, don't to avoid having both options levels_N_p and coarse_p */
403         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_p", i));
404         PetscCall(PetscOptionsRangeInt(prefix, "Number of processes used to assemble the coarse operator at this level", "PCHPDDM", p, &p, &flg, 1, PetscMax(1, previous / 2)));
405         previous = p;
406       }
407     }
408   }
409   data->N = i;
410   n       = 1;
411   if (i > 1) {
412     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_coarse_p"));
413     PetscCall(PetscOptionsRangeInt(prefix, "Number of processes used to assemble the coarsest operator", "PCHPDDM", n, &n, nullptr, 1, PetscMax(1, previous / 2)));
414 #if PetscDefined(HAVE_MUMPS)
415     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "pc_hpddm_coarse_"));
416     PetscCall(PetscOptionsHasName(PetscOptionsObject->options, prefix, "-mat_mumps_use_omp_threads", &flg));
417     if (flg) {
418       char type[64]; /* same size as in src/ksp/pc/impls/factor/factimpl.c */
419 
420       PetscCall(PetscStrncpy(type, n > 1 && PetscDefined(HAVE_MUMPS) ? MATSOLVERMUMPS : MATSOLVERPETSC, sizeof(type))); /* default solver for a MatMPIAIJ or a MatSeqAIJ */
421       PetscCall(PetscOptionsGetString(PetscOptionsObject->options, prefix, "-pc_factor_mat_solver_type", type, sizeof(type), nullptr));
422       PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
423       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-%smat_mumps_use_omp_threads and -%spc_factor_mat_solver_type != %s", prefix, prefix, MATSOLVERMUMPS);
424       size = n;
425       n    = -1;
426       PetscCall(PetscOptionsGetInt(PetscOptionsObject->options, prefix, "-mat_mumps_use_omp_threads", &n, nullptr));
427       PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Need to specify a positive integer for -%smat_mumps_use_omp_threads", prefix);
428       PetscCheck(n * size <= previous, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%d MPI process%s x %d OpenMP thread%s greater than %d available MPI process%s for the coarsest operator", (int)size, size > 1 ? "es" : "", (int)n, n > 1 ? "s" : "", (int)previous, previous > 1 ? "es" : "");
429     }
430 #endif
431     PetscCall(PetscOptionsEnum("-pc_hpddm_coarse_correction", "Type of coarse correction applied each iteration", "PCHPDDMSetCoarseCorrectionType", PCHPDDMCoarseCorrectionTypes, (PetscEnum)data->correction, (PetscEnum *)&type, &flg));
432     if (flg) PetscCall(PCHPDDMSetCoarseCorrectionType(pc, type));
433     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_has_neumann"));
434     PetscCall(PetscOptionsBool(prefix, "Is the auxiliary Mat the local Neumann matrix?", "PCHPDDMHasNeumannMat", PetscBool3ToBool(data->Neumann), &flg, &set));
435     if (set) data->Neumann = PetscBoolToBool3(flg);
436     data->log_separate = PETSC_FALSE;
437     if (PetscDefined(USE_LOG)) {
438       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_log_separate"));
439       PetscCall(PetscOptionsBool(prefix, "Log events level by level instead of inside PCSetUp()/KSPSolve()", nullptr, data->log_separate, &data->log_separate, nullptr));
440     }
441   }
442   PetscOptionsHeadEnd();
443   while (i < PETSC_PCHPDDM_MAXLEVELS && data->levels[i]) PetscCall(PetscFree(data->levels[i++]));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 template <bool transpose>
448 static PetscErrorCode PCApply_HPDDM(PC pc, Vec x, Vec y)
449 {
450   PC_HPDDM *data = (PC_HPDDM *)pc->data;
451 
452   PetscFunctionBegin;
453   PetscCall(PetscCitationsRegister(HPDDMCitation, &HPDDMCite));
454   PetscCheck(data->levels[0]->ksp, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No KSP attached to PCHPDDM");
455   if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_Solve[0], data->levels[0]->ksp, nullptr, nullptr, nullptr)); /* coarser-level events are directly triggered in HPDDM */
456   if (!transpose) PetscCall(KSPSolve(data->levels[0]->ksp, x, y));
457   else PetscCall(KSPSolveTranspose(data->levels[0]->ksp, x, y));
458   if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Solve[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 template <bool transpose>
463 static PetscErrorCode PCMatApply_HPDDM(PC pc, Mat X, Mat Y)
464 {
465   PC_HPDDM *data = (PC_HPDDM *)pc->data;
466 
467   PetscFunctionBegin;
468   PetscCall(PetscCitationsRegister(HPDDMCitation, &HPDDMCite));
469   PetscCheck(data->levels[0]->ksp, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No KSP attached to PCHPDDM");
470   if (!transpose) PetscCall(KSPMatSolve(data->levels[0]->ksp, X, Y));
471   else PetscCall(KSPMatSolveTranspose(data->levels[0]->ksp, X, Y));
472   PetscFunctionReturn(PETSC_SUCCESS);
473 }
474 
475 /*@
476   PCHPDDMGetComplexities - Computes the grid and operator complexities.
477 
478   Collective
479 
480   Input Parameter:
481 . pc - preconditioner context
482 
483   Output Parameters:
484 + gc - grid complexity $ \sum_i m_i / m_1 $
485 - oc - operator complexity $ \sum_i nnz_i / nnz_1 $
486 
487   Level: advanced
488 
489 .seealso: [](ch_ksp), `PCMGGetGridComplexity()`, `PCHPDDM`, `PCHYPRE`, `PCGAMG`
490 @*/
491 PetscErrorCode PCHPDDMGetComplexities(PC pc, PetscReal *gc, PetscReal *oc)
492 {
493   PC_HPDDM      *data = (PC_HPDDM *)pc->data;
494   MatInfo        info;
495   PetscLogDouble accumulate[2]{}, nnz1 = 1.0, m1 = 1.0;
496 
497   PetscFunctionBegin;
498   if (gc) {
499     PetscAssertPointer(gc, 2);
500     *gc = 0;
501   }
502   if (oc) {
503     PetscAssertPointer(oc, 3);
504     *oc = 0;
505   }
506   for (PetscInt n = 0; n < data->N; ++n) {
507     if (data->levels[n]->ksp) {
508       Mat       P, A = nullptr;
509       PetscInt  m;
510       PetscBool flg = PETSC_FALSE;
511 
512       PetscCall(KSPGetOperators(data->levels[n]->ksp, nullptr, &P));
513       PetscCall(MatGetSize(P, &m, nullptr));
514       accumulate[0] += m;
515       if (n == 0) {
516         PetscCall(PetscObjectTypeCompareAny((PetscObject)P, &flg, MATNORMAL, MATNORMALHERMITIAN, ""));
517         if (flg) {
518           PetscCall(MatConvert(P, MATAIJ, MAT_INITIAL_MATRIX, &A));
519           P = A;
520         } else {
521           PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
522           PetscCall(PetscObjectReference((PetscObject)P));
523         }
524       }
525       if (!A && flg) accumulate[1] += m * m; /* assumption that a MATSCHURCOMPLEMENT is dense if stored explicitly */
526       else if (P->ops->getinfo) {
527         PetscCall(MatGetInfo(P, MAT_GLOBAL_SUM, &info));
528         accumulate[1] += info.nz_used;
529       }
530       if (n == 0) {
531         m1 = m;
532         if (!A && flg) nnz1 = m * m;
533         else if (P->ops->getinfo) nnz1 = info.nz_used;
534         PetscCall(MatDestroy(&P));
535       }
536     }
537   }
538   /* only process #0 has access to the full hierarchy by construction, so broadcast to ensure consistent outputs */
539   PetscCallMPI(MPI_Bcast(accumulate, 2, MPIU_PETSCLOGDOUBLE, 0, PetscObjectComm((PetscObject)pc)));
540   if (gc) *gc = static_cast<PetscReal>(accumulate[0] / m1);
541   if (oc) *oc = static_cast<PetscReal>(accumulate[1] / nnz1);
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 static PetscErrorCode PCView_HPDDM(PC pc, PetscViewer viewer)
546 {
547   PC_HPDDM         *data = (PC_HPDDM *)pc->data;
548   PetscViewer       subviewer;
549   PetscViewerFormat format;
550   PetscSubcomm      subcomm;
551   PetscReal         oc, gc;
552   PetscInt          tabs;
553   PetscMPIInt       size, color, rank;
554   PetscBool         flg;
555   const char       *name;
556 
557   PetscFunctionBegin;
558   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &flg));
559   if (flg) {
560     PetscCall(PetscViewerASCIIPrintf(viewer, "level%s: %" PetscInt_FMT "\n", data->N > 1 ? "s" : "", data->N));
561     PetscCall(PCHPDDMGetComplexities(pc, &gc, &oc));
562     if (data->N > 1) {
563       if (!data->deflation) {
564         PetscCall(PetscViewerASCIIPrintf(viewer, "Neumann matrix attached? %s\n", PetscBools[PetscBool3ToBool(data->Neumann)]));
565         PetscCall(PetscViewerASCIIPrintf(viewer, "shared subdomain KSP between SLEPc and PETSc? %s\n", PetscBools[data->share]));
566       } else PetscCall(PetscViewerASCIIPrintf(viewer, "user-supplied deflation matrix\n"));
567       PetscCall(PetscViewerASCIIPrintf(viewer, "coarse correction: %s\n", PCHPDDMCoarseCorrectionTypes[data->correction]));
568       PetscCall(PetscViewerASCIIPrintf(viewer, "on process #0, value%s (+ threshold%s if available) for selecting deflation vectors:", data->N > 2 ? "s" : "", data->N > 2 ? "s" : ""));
569       PetscCall(PetscViewerASCIIGetTab(viewer, &tabs));
570       PetscCall(PetscViewerASCIISetTab(viewer, 0));
571       for (PetscInt i = 1; i < data->N; ++i) {
572         PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT, data->levels[i - 1]->nu));
573         if (data->levels[i - 1]->threshold > static_cast<PetscReal>(-0.1)) PetscCall(PetscViewerASCIIPrintf(viewer, " (%g)", (double)data->levels[i - 1]->threshold));
574       }
575       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
576       PetscCall(PetscViewerASCIISetTab(viewer, tabs));
577     }
578     PetscCall(PetscViewerASCIIPrintf(viewer, "grid and operator complexities: %g %g\n", (double)gc, (double)oc));
579     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
580     if (data->levels[0]->ksp) {
581       PetscCall(KSPView(data->levels[0]->ksp, viewer));
582       if (data->levels[0]->pc) PetscCall(PCView(data->levels[0]->pc, viewer));
583       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
584       for (PetscInt i = 1; i < data->N; ++i) {
585         if (data->levels[i]->ksp) color = 1;
586         else color = 0;
587         PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)pc), &subcomm));
588         PetscCall(PetscSubcommSetNumber(subcomm, PetscMin(size, 2)));
589         PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
590         PetscCall(PetscViewerASCIIPushTab(viewer));
591         PetscCall(PetscViewerGetSubViewer(viewer, PetscSubcommChild(subcomm), &subviewer));
592         if (color == 1) {
593           PetscCall(KSPView(data->levels[i]->ksp, subviewer));
594           if (data->levels[i]->pc) PetscCall(PCView(data->levels[i]->pc, subviewer));
595           PetscCall(PetscViewerFlush(subviewer));
596         }
597         PetscCall(PetscViewerRestoreSubViewer(viewer, PetscSubcommChild(subcomm), &subviewer));
598         PetscCall(PetscViewerASCIIPopTab(viewer));
599         PetscCall(PetscSubcommDestroy(&subcomm));
600       }
601     }
602     PetscCall(PetscViewerGetFormat(viewer, &format));
603     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
604       PetscCall(PetscViewerFileGetName(viewer, &name));
605       if (name) {
606         Mat             aux[2];
607         IS              is;
608         const PetscInt *indices;
609         PetscInt        m, n, sizes[5] = {pc->mat->rmap->n, pc->mat->cmap->n, pc->mat->rmap->N, pc->mat->cmap->N, 0};
610         char           *tmp;
611         std::string     prefix, suffix;
612         size_t          pos;
613 
614         PetscCall(PetscStrstr(name, ".", &tmp));
615         if (tmp) {
616           pos    = std::distance(const_cast<char *>(name), tmp);
617           prefix = std::string(name, pos);
618           suffix = std::string(name + pos + 1);
619         } else prefix = name;
620         if (data->aux) {
621           PetscCall(MatGetSize(data->aux, &m, &n));
622           PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), aux));
623           PetscCall(MatSetSizes(aux[0], m, n, PETSC_DETERMINE, PETSC_DETERMINE));
624           PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQAIJ, &flg));
625           if (flg) PetscCall(MatSetType(aux[0], MATMPIAIJ));
626           else {
627             PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQBAIJ, &flg));
628             if (flg) PetscCall(MatSetType(aux[0], MATMPIBAIJ));
629             else {
630               PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQSBAIJ, &flg));
631               PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "MatType of auxiliary Mat (%s) is not any of the following: MATSEQAIJ, MATSEQBAIJ, or MATSEQSBAIJ", ((PetscObject)data->aux)->type_name);
632               PetscCall(MatSetType(aux[0], MATMPISBAIJ));
633             }
634           }
635           PetscCall(MatSetBlockSizesFromMats(aux[0], data->aux, data->aux));
636           PetscCall(MatAssemblyBegin(aux[0], MAT_FINAL_ASSEMBLY));
637           PetscCall(MatAssemblyEnd(aux[0], MAT_FINAL_ASSEMBLY));
638           PetscCall(MatGetDiagonalBlock(aux[0], aux + 1));
639           PetscCall(MatCopy(data->aux, aux[1], DIFFERENT_NONZERO_PATTERN));
640           PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_aux_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
641           PetscCall(MatView(aux[0], subviewer));
642           PetscCall(PetscViewerDestroy(&subviewer));
643           PetscCall(MatDestroy(aux));
644         }
645         if (data->is) {
646           PetscCall(ISGetIndices(data->is, &indices));
647           PetscCall(ISGetSize(data->is, sizes + 4));
648           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), sizes[4], indices, PETSC_USE_POINTER, &is));
649           PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_is_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
650           PetscCall(ISView(is, subviewer));
651           PetscCall(PetscViewerDestroy(&subviewer));
652           PetscCall(ISDestroy(&is));
653           PetscCall(ISRestoreIndices(data->is, &indices));
654         }
655         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), PETSC_STATIC_ARRAY_LENGTH(sizes), sizes, PETSC_USE_POINTER, &is));
656         PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_sizes_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
657         PetscCall(ISView(is, subviewer));
658         PetscCall(PetscViewerDestroy(&subviewer));
659         PetscCall(ISDestroy(&is));
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 static PetscErrorCode PCPreSolve_HPDDM(PC pc, KSP ksp, Vec, Vec)
667 {
668   PC_HPDDM *data = (PC_HPDDM *)pc->data;
669   Mat       A;
670   PetscBool flg;
671 
672   PetscFunctionBegin;
673   if (ksp) {
674     PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPLSQR, &flg));
675     if (flg && !data->normal) {
676       PetscCall(KSPGetOperators(ksp, &A, nullptr));
677       PetscCall(MatCreateVecs(A, nullptr, &data->normal)); /* temporary Vec used in PCApply_HPDDMShell() for coarse grid corrections */
678     } else if (!flg) {
679       PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp, &flg, KSPCG, KSPGROPPCG, KSPPIPECG, KSPPIPECGRR, KSPPIPELCG, KSPPIPEPRCG, KSPPIPECG2, KSPSTCG, KSPFCG, KSPPIPEFCG, KSPMINRES, KSPNASH, KSPSYMMLQ, ""));
680       if (!flg) {
681         PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPHPDDM, &flg));
682         if (flg) {
683           KSPHPDDMType type;
684 
685           PetscCall(KSPHPDDMGetType(ksp, &type));
686           flg = (type == KSP_HPDDM_TYPE_CG || type == KSP_HPDDM_TYPE_BCG || type == KSP_HPDDM_TYPE_BFBCG ? PETSC_TRUE : PETSC_FALSE);
687         }
688       }
689     }
690     if (flg) {
691       if (data->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) {
692         PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, ((PetscObject)pc)->prefix, "-pc_hpddm_coarse_correction", &flg));
693         PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "PCHPDDMCoarseCorrectionType %s is known to be not symmetric, but KSPType %s requires a symmetric PC, if you insist on using this configuration, use the additional option -%spc_hpddm_coarse_correction %s, or alternatively, switch to a symmetric PCHPDDMCoarseCorrectionType such as %s",
694                    PCHPDDMCoarseCorrectionTypes[data->correction], ((PetscObject)ksp)->type_name, ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "", PCHPDDMCoarseCorrectionTypes[data->correction], PCHPDDMCoarseCorrectionTypes[PC_HPDDM_COARSE_CORRECTION_BALANCED]);
695       }
696       for (PetscInt n = 0; n < data->N; ++n) {
697         if (data->levels[n]->pc) {
698           PetscCall(PetscObjectTypeCompare((PetscObject)data->levels[n]->pc, PCASM, &flg));
699           if (flg) {
700             PCASMType type;
701 
702             PetscCall(PCASMGetType(data->levels[n]->pc, &type));
703             if (type == PC_ASM_RESTRICT || type == PC_ASM_INTERPOLATE) {
704               PetscCall(PetscOptionsHasName(((PetscObject)data->levels[n]->pc)->options, ((PetscObject)data->levels[n]->pc)->prefix, "-pc_asm_type", &flg));
705               PetscCheck(flg, PetscObjectComm((PetscObject)data->levels[n]->pc), PETSC_ERR_ARG_INCOMP, "PCASMType %s is known to be not symmetric, but KSPType %s requires a symmetric PC, if you insist on using this configuration, use the additional option -%spc_asm_type %s, or alternatively, switch to a symmetric PCASMType such as %s", PCASMTypes[type],
706                          ((PetscObject)ksp)->type_name, ((PetscObject)data->levels[n]->pc)->prefix, PCASMTypes[type], PCASMTypes[PC_ASM_BASIC]);
707             }
708           }
709         }
710       }
711     }
712   }
713   PetscFunctionReturn(PETSC_SUCCESS);
714 }
715 
716 static PetscErrorCode PCSetUp_HPDDMShell(PC pc)
717 {
718   PC_HPDDM_Level *ctx;
719   Mat             A, P;
720   Vec             x;
721   const char     *pcpre;
722 
723   PetscFunctionBegin;
724   PetscCall(PCShellGetContext(pc, &ctx));
725   PetscCall(KSPGetOptionsPrefix(ctx->ksp, &pcpre));
726   PetscCall(KSPGetOperators(ctx->ksp, &A, &P));
727   /* smoother */
728   PetscCall(PCSetOptionsPrefix(ctx->pc, pcpre));
729   PetscCall(PCSetOperators(ctx->pc, A, P));
730   if (!ctx->v[0]) {
731     PetscCall(VecDuplicateVecs(ctx->D, 1, &ctx->v[0]));
732     if (!std::is_same<PetscScalar, PetscReal>::value) PetscCall(VecDestroy(&ctx->D));
733     PetscCall(MatCreateVecs(A, &x, nullptr));
734     PetscCall(VecDuplicateVecs(x, 2, &ctx->v[1]));
735     PetscCall(VecDestroy(&x));
736   }
737   std::fill_n(ctx->V, 3, nullptr);
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 template <bool transpose = false, class Type = Vec, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
742 static inline PetscErrorCode PCHPDDMDeflate_Private(PC pc, Type x, Type y)
743 {
744   PC_HPDDM_Level *ctx;
745   PetscScalar    *out;
746 
747   PetscFunctionBegin;
748   PetscCall(PCShellGetContext(pc, &ctx));
749   /* going from PETSc to HPDDM numbering */
750   PetscCall(VecScatterBegin(ctx->scatter, x, ctx->v[0][0], INSERT_VALUES, SCATTER_FORWARD));
751   PetscCall(VecScatterEnd(ctx->scatter, x, ctx->v[0][0], INSERT_VALUES, SCATTER_FORWARD));
752   PetscCall(VecGetArrayWrite(ctx->v[0][0], &out));
753   PetscCallCXX(ctx->P->deflation<false, transpose>(nullptr, out, 1)); /* y = Q x */
754   PetscCall(VecRestoreArrayWrite(ctx->v[0][0], &out));
755   /* going from HPDDM to PETSc numbering */
756   PetscCall(VecScatterBegin(ctx->scatter, ctx->v[0][0], y, INSERT_VALUES, SCATTER_REVERSE));
757   PetscCall(VecScatterEnd(ctx->scatter, ctx->v[0][0], y, INSERT_VALUES, SCATTER_REVERSE));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 template <bool transpose = false, class Type = Mat, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
762 static inline PetscErrorCode PCHPDDMDeflate_Private(PC pc, Type X, Type Y)
763 {
764   PC_HPDDM_Level    *ctx;
765   PetscSF            strided;
766   const PetscScalar *in;
767   PetscScalar       *out, *tmp;
768   PetscInt           N, ld[2];
769   PetscMemType       type[2];
770 
771   PetscFunctionBegin;
772   PetscCall(PCShellGetContext(pc, &ctx));
773   PetscCall(MatGetSize(X, nullptr, &N));
774   PetscCall(PetscObjectQuery((PetscObject)ctx->scatter, "_HPDDM_StridedSF", (PetscObject *)&strided));
775   PetscCall(MatDenseGetLDA(X, ld));
776   PetscCall(MatDenseGetLDA(Y, ld + 1));
777   PetscCheck(ld[0] == ld[1], PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Leading dimension of input Mat different than the one of output Mat");
778   PetscCall(MatDenseGetLDA(ctx->V[0], ld + 1));
779   if (strided) {
780     PetscInt nroots[2], nleafs[2];
781 
782     PetscCall(PetscSFGetGraph(ctx->scatter, nroots, nleafs, nullptr, nullptr));
783     PetscCall(PetscSFGetGraph(strided, nroots + 1, nleafs + 1, nullptr, nullptr));
784     if (N * nroots[0] != nroots[1] || N * nleafs[0] != nleafs[1]) PetscCall(PetscSFDestroy(&strided));
785   }
786   if (!strided) {
787     PetscCall(PetscSFCreateStridedSF(ctx->scatter, N, ld[0], ld[1], &strided));
788     PetscCall(PetscObjectCompose((PetscObject)ctx->scatter, "_HPDDM_StridedSF", (PetscObject)strided));
789     PetscCall(PetscObjectDereference((PetscObject)strided));
790   }
791   PetscCall(MatDenseGetArrayReadAndMemType(X, &in, type));
792   PetscCall(MatDenseGetArrayWriteAndMemType(ctx->V[0], &tmp, type + 1));
793   /* going from PETSc to HPDDM numbering */
794   PetscCall(PetscSFBcastWithMemTypeBegin(strided, strided->vscat.unit, type[0], in, type[1], tmp, MPI_REPLACE));
795   PetscCall(PetscSFBcastEnd(strided, strided->vscat.unit, in, tmp, MPI_REPLACE));
796   PetscCall(MatDenseRestoreArrayReadAndMemType(X, &in));
797   PetscCallCXX(ctx->P->deflation<false, transpose>(nullptr, tmp, N)); /* Y = Q X */
798   PetscCall(MatDenseGetArrayWriteAndMemType(Y, &out, type + 1));
799   /* going from HPDDM to PETSc numbering */
800   PetscCall(PetscSFReduceWithMemTypeBegin(strided, strided->vscat.unit, type[0], tmp, type[1], out, MPI_REPLACE));
801   PetscCall(PetscSFReduceEnd(strided, strided->vscat.unit, tmp, out, MPI_REPLACE));
802   PetscCall(MatDenseRestoreArrayWriteAndMemType(Y, &out));
803   PetscCall(MatDenseRestoreArrayWriteAndMemType(ctx->V[0], &tmp));
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 /*
808      PCApply_HPDDMShell - Applies a (2) deflated, (1) additive, (3) balanced, or (4) no coarse correction. In what follows, E = Z Pmat Z^T and Q = Z^T E^-1 Z.
809 
810 .vb
811    (1) y =                  Pmat^-1              x + Q x,
812    (2) y =                  Pmat^-1 (I - Amat Q) x + Q x (default),
813    (3) y = (I - Q^T Amat^T) Pmat^-1 (I - Amat Q) x + Q x,
814    (4) y =                  Pmat^-1              x      .
815 .ve
816 
817    Input Parameters:
818 +     pc - preconditioner context
819 -     x - input vector
820 
821    Output Parameter:
822 .     y - output vector
823 
824    Notes:
825      The options of Pmat^1 = pc(Pmat) are prefixed by -pc_hpddm_levels_1_pc_. Z is a tall-and-skiny matrix assembled by HPDDM. The number of processes on which (Z Pmat Z^T) is aggregated is set via -pc_hpddm_coarse_p.
826      The options of (Z Pmat Z^T)^-1 = ksp(Z Pmat Z^T) are prefixed by -pc_hpddm_coarse_ (`KSPPREONLY` and `PCCHOLESKY` by default), unless a multilevel correction is turned on, in which case, this function is called recursively at each level except the coarsest one.
827      (1) and (2) visit the "next" level (in terms of coarsening) once per application, while (3) visits it twice, so it is asymptotically twice costlier. (2) is not symmetric even if both Amat and Pmat are symmetric.
828 
829    Level: advanced
830 
831    Developer Note:
832    Since this is not an actual manual page the material below should be moved to an appropriate manual page with the appropriate context, i.e. explaining when it is used and how
833    to trigger it. Likely the manual page is `PCHPDDM`
834 
835 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
836 */
837 static PetscErrorCode PCApply_HPDDMShell(PC pc, Vec x, Vec y)
838 {
839   PC_HPDDM_Level *ctx;
840   Mat             A;
841 
842   PetscFunctionBegin;
843   PetscCall(PCShellGetContext(pc, &ctx));
844   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
845   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
846   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCApply(ctx->pc, x, y)); /* y = M^-1 x */
847   else {
848     PetscCall(PCHPDDMDeflate_Private(pc, x, y)); /* y = Q x */
849     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
850       if (!ctx->parent->normal || ctx != ctx->parent->levels[0]) PetscCall(MatMult(A, y, ctx->v[1][0])); /* y = A Q x */
851       else {
852         /* KSPLSQR and finest level */
853         PetscCall(MatMult(A, y, ctx->parent->normal));                              /* y = A Q x                 */
854         PetscCall(MatMultHermitianTranspose(A, ctx->parent->normal, ctx->v[1][0])); /* y = A^T A Q x             */
855       }
856       PetscCall(VecWAXPY(ctx->v[1][1], -1.0, ctx->v[1][0], x)); /* y = (I - A Q) x                               */
857       PetscCall(PCApply(ctx->pc, ctx->v[1][1], ctx->v[1][0]));  /* y = M^-1 (I - A Q) x                          */
858       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
859         if (!ctx->parent->normal || ctx != ctx->parent->levels[0]) PetscCall(MatMultHermitianTranspose(A, ctx->v[1][0], ctx->v[1][1])); /* z = A^T y */
860         else {
861           PetscCall(MatMult(A, ctx->v[1][0], ctx->parent->normal));
862           PetscCall(MatMultHermitianTranspose(A, ctx->parent->normal, ctx->v[1][1])); /* z = A^T A y             */
863         }
864         PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->v[1][1], ctx->v[1][1])); /* z = Q^T z                    */
865         PetscCall(VecAXPBYPCZ(y, -1.0, 1.0, 1.0, ctx->v[1][1], ctx->v[1][0]));   /* y = (I - Q^T A^T) y + Q x    */
866       } else PetscCall(VecAXPY(y, 1.0, ctx->v[1][0]));                           /* y = Q M^-1 (I - A Q) x + Q x */
867     } else {
868       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
869       PetscCall(PCApply(ctx->pc, x, ctx->v[1][0]));
870       PetscCall(VecAXPY(y, 1.0, ctx->v[1][0])); /* y = M^-1 x + Q x */
871     }
872   }
873   PetscFunctionReturn(PETSC_SUCCESS);
874 }
875 
876 template <bool transpose>
877 static PetscErrorCode PCHPDDMMatApply_Private(PC_HPDDM_Level *ctx, Mat Y, PetscBool *reset)
878 {
879   Mat            A, *ptr;
880   PetscScalar   *array;
881   PetscInt       m, M, N, prev = 0;
882   PetscContainer container = nullptr;
883 
884   PetscFunctionBegin;
885   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
886   PetscCall(MatGetSize(Y, nullptr, &N));
887   PetscCall(PetscObjectQuery((PetscObject)A, "_HPDDM_MatProduct", (PetscObject *)&container));
888   if (container) { /* MatProduct container already attached */
889     PetscCall(PetscContainerGetPointer(container, &ptr));
890     if (ptr[1] != ctx->V[2]) /* Mat has changed or may have been set first in KSPHPDDM */
891       for (m = 0; m < 2; ++m) {
892         PetscCall(MatDestroy(ctx->V + m + 1));
893         ctx->V[m + 1] = ptr[m];
894         PetscCall(PetscObjectReference((PetscObject)ctx->V[m + 1]));
895       }
896   }
897   if (ctx->V[1]) PetscCall(MatGetSize(ctx->V[1], nullptr, &prev));
898   if (N != prev || !ctx->V[0]) {
899     PetscCall(MatDestroy(ctx->V));
900     PetscCall(VecGetLocalSize(ctx->v[0][0], &m));
901     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, PETSC_DECIDE, N, nullptr, ctx->V));
902     if (N != prev) {
903       PetscCall(MatDestroy(ctx->V + 1));
904       PetscCall(MatDestroy(ctx->V + 2));
905       PetscCall(MatGetLocalSize(Y, &m, nullptr));
906       PetscCall(MatGetSize(Y, &M, nullptr));
907       if (ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) PetscCall(MatDenseGetArrayWrite(ctx->V[0], &array));
908       else array = nullptr;
909       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, M, N, array, ctx->V + 1));
910       if (ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &array));
911       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, M, N, nullptr, ctx->V + 2));
912       PetscCall(MatProductCreateWithMat(A, !transpose ? Y : ctx->V[2], nullptr, ctx->V[1]));
913       PetscCall(MatProductSetType(ctx->V[1], !transpose ? MATPRODUCT_AB : MATPRODUCT_AtB));
914       PetscCall(MatProductSetFromOptions(ctx->V[1]));
915       PetscCall(MatProductSymbolic(ctx->V[1]));
916       if (!container) PetscCall(PetscObjectContainerCompose((PetscObject)A, "_HPDDM_MatProduct", ctx->V + 1, nullptr)); /* no MatProduct container attached, create one to be queried in KSPHPDDM or at the next call to PCMatApply() */
917       else PetscCall(PetscContainerSetPointer(container, ctx->V + 1));                                                  /* need to compose B and D from MatProductCreateWithMat(A, B, NULL, D), which are stored in the contiguous array ctx->V */
918     }
919     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
920       PetscCall(MatProductCreateWithMat(A, !transpose ? ctx->V[1] : Y, nullptr, ctx->V[2]));
921       PetscCall(MatProductSetType(ctx->V[2], !transpose ? MATPRODUCT_AtB : MATPRODUCT_AB));
922       PetscCall(MatProductSetFromOptions(ctx->V[2]));
923       PetscCall(MatProductSymbolic(ctx->V[2]));
924     }
925     PetscCallCXX(ctx->P->start(N));
926   }
927   if (N == prev || container) { /* when MatProduct container is attached, always need to MatProductReplaceMats() since KSPHPDDM may have replaced the Mat as well */
928     PetscCall(MatProductReplaceMats(nullptr, !transpose ? Y : ctx->V[2], nullptr, ctx->V[1]));
929     if (container && ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) {
930       PetscCall(MatDenseGetArrayWrite(ctx->V[0], &array));
931       PetscCall(MatDensePlaceArray(ctx->V[1], array));
932       PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &array));
933       *reset = PETSC_TRUE;
934     }
935   }
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
939 /*
940      PCMatApply_HPDDMShell - Variant of PCApply_HPDDMShell() for blocks of vectors.
941 
942    Input Parameters:
943 +     pc - preconditioner context
944 -     X - block of input vectors
945 
946    Output Parameter:
947 .     Y - block of output vectors
948 
949    Level: advanced
950 
951 .seealso: [](ch_ksp), `PCHPDDM`, `PCApply_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
952 */
953 static PetscErrorCode PCMatApply_HPDDMShell(PC pc, Mat X, Mat Y)
954 {
955   PC_HPDDM_Level *ctx;
956   PetscBool       reset = PETSC_FALSE;
957 
958   PetscFunctionBegin;
959   PetscCall(PCShellGetContext(pc, &ctx));
960   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
961   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCMatApply(ctx->pc, X, Y));
962   else {
963     PetscCall(PCHPDDMMatApply_Private<false>(ctx, Y, &reset));
964     PetscCall(PCHPDDMDeflate_Private(pc, X, Y));
965     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
966       PetscCall(MatProductNumeric(ctx->V[1]));
967       PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
968       PetscCall(MatAXPY(ctx->V[2], -1.0, X, SAME_NONZERO_PATTERN));
969       PetscCall(PCMatApply(ctx->pc, ctx->V[2], ctx->V[1]));
970       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
971         PetscCall(MatProductNumeric(ctx->V[2]));
972         PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[2], ctx->V[2]));
973         PetscCall(MatAXPY(ctx->V[1], -1.0, ctx->V[2], SAME_NONZERO_PATTERN));
974       }
975       PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
976     } else {
977       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
978       PetscCall(PCMatApply(ctx->pc, X, ctx->V[1]));
979       PetscCall(MatAXPY(Y, 1.0, ctx->V[1], SAME_NONZERO_PATTERN));
980     }
981     if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
982   }
983   PetscFunctionReturn(PETSC_SUCCESS);
984 }
985 
986 /*
987      PCApplyTranspose_HPDDMShell - Applies the transpose of a (2) deflated, (1) additive, (3) balanced, or (4) no coarse correction. In what follows, E = Z Pmat Z^T and Q = Z^T E^-1 Z.
988 
989 .vb
990    (1) y =                  Pmat^-T              x + Q^T x,
991    (2) y = (I - Q^T Amat^T) Pmat^-T              x + Q^T x (default),
992    (3) y = (I - Q^T Amat^T) Pmat^-T (I - Amat Q) x + Q^T x,
993    (4) y =                  Pmat^-T              x        .
994 .ve
995 
996    Input Parameters:
997 +     pc - preconditioner context
998 -     x - input vector
999 
1000    Output Parameter:
1001 .     y - output vector
1002 
1003    Level: advanced
1004 
1005    Developer Note:
1006    Since this is not an actual manual page the material below should be moved to an appropriate manual page with the appropriate context, i.e. explaining when it is used and how
1007    to trigger it. Likely the manual page is `PCHPDDM`
1008 
1009 .seealso: [](ch_ksp), `PCHPDDM`, `PCApply_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
1010 */
1011 static PetscErrorCode PCApplyTranspose_HPDDMShell(PC pc, Vec x, Vec y)
1012 {
1013   PC_HPDDM_Level *ctx;
1014   Mat             A;
1015 
1016   PetscFunctionBegin;
1017   PetscCall(PCShellGetContext(pc, &ctx));
1018   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
1019   PetscCheck(!ctx->parent->normal, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not implemented for the normal equations");
1020   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
1021   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCApplyTranspose(ctx->pc, x, y)); /* y = M^-T x */
1022   else {
1023     PetscCall(PCHPDDMDeflate_Private<true>(pc, x, y)); /* y = Q^T x */
1024     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1025       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1026         /* TODO: checking whether Q^T = Q would make it possible to skip this coarse correction */
1027         PetscCall(PCHPDDMDeflate_Private(pc, x, ctx->v[1][1]));                /* y = Q x                     */
1028         PetscCall(MatMult(A, ctx->v[1][1], ctx->v[1][0]));                     /* y = A Q x                   */
1029         PetscCall(VecWAXPY(ctx->v[1][1], -1.0, ctx->v[1][0], x));              /* y = (I - A Q) x             */
1030         PetscCall(PCApplyTranspose(ctx->pc, ctx->v[1][1], ctx->v[1][0]));      /* y = M^-T (I - A Q) x        */
1031       } else PetscCall(PCApplyTranspose(ctx->pc, x, ctx->v[1][0]));            /* y = M^-T x                  */
1032       PetscCall(MatMultHermitianTranspose(A, ctx->v[1][0], ctx->v[1][1]));     /* z = A^T y                   */
1033       PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->v[1][1], ctx->v[1][1])); /* z = Q^T z                   */
1034       PetscCall(VecAXPBYPCZ(y, -1.0, 1.0, 1.0, ctx->v[1][1], ctx->v[1][0]));   /* y = (I - Q^T A^T) y + Q^T x */
1035     } else {
1036       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
1037       PetscCall(PCApplyTranspose(ctx->pc, x, ctx->v[1][0]));
1038       PetscCall(VecAXPY(y, 1.0, ctx->v[1][0])); /* y = M^-T x + Q^T x */
1039     }
1040   }
1041   PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043 
1044 /*
1045      PCMatApplyTranspose_HPDDMShell - Variant of PCApplyTranspose_HPDDMShell() for blocks of vectors.
1046 
1047    Input Parameters:
1048 +     pc - preconditioner context
1049 -     X - block of input vectors
1050 
1051    Output Parameter:
1052 .     Y - block of output vectors
1053 
1054    Level: advanced
1055 
1056 .seealso: [](ch_ksp), `PCHPDDM`, `PCApplyTranspose_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
1057 */
1058 static PetscErrorCode PCMatApplyTranspose_HPDDMShell(PC pc, Mat X, Mat Y)
1059 {
1060   PC_HPDDM_Level *ctx;
1061   PetscScalar    *array;
1062   PetscBool       reset = PETSC_FALSE;
1063 
1064   PetscFunctionBegin;
1065   PetscCall(PCShellGetContext(pc, &ctx));
1066   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
1067   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCMatApplyTranspose(ctx->pc, X, Y));
1068   else if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1069     /* similar code as in PCMatApply_HPDDMShell() with an extra call to PCHPDDMDeflate_Private<true>() */
1070     PetscCall(PCHPDDMMatApply_Private<false>(ctx, Y, &reset));
1071     PetscCall(PCHPDDMDeflate_Private(pc, X, Y));
1072     PetscCall(MatProductNumeric(ctx->V[1]));
1073     PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
1074     PetscCall(MatAXPY(ctx->V[2], -1.0, X, SAME_NONZERO_PATTERN));
1075     PetscCall(PCMatApplyTranspose(ctx->pc, ctx->V[2], ctx->V[1]));
1076     PetscCall(MatProductNumeric(ctx->V[2]));
1077     PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[2], ctx->V[2]));
1078     PetscCall(MatAXPY(ctx->V[1], -1.0, ctx->V[2], SAME_NONZERO_PATTERN));
1079     PetscCall(PCHPDDMDeflate_Private<true>(pc, X, Y)); /* TODO: checking whether Q^T = Q would make it possible to skip this coarse correction */
1080     PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1081   } else {
1082     PetscCall(PCHPDDMMatApply_Private<true>(ctx, Y, &reset));
1083     PetscCall(PCHPDDMDeflate_Private<true>(pc, X, Y));
1084     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) {
1085       PetscCall(PCMatApplyTranspose(ctx->pc, X, ctx->V[2]));
1086       PetscCall(MatAXPY(Y, 1.0, ctx->V[2], SAME_NONZERO_PATTERN));
1087       PetscCall(MatProductNumeric(ctx->V[1]));
1088       /* ctx->V[0] and ctx->V[1] memory regions overlap, so need to copy to ctx->V[2] and switch array */
1089       PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
1090       if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
1091       PetscCall(MatDenseGetArrayWrite(ctx->V[2], &array));
1092       PetscCall(MatDensePlaceArray(ctx->V[1], array));
1093       PetscCall(MatDenseRestoreArrayWrite(ctx->V[2], &array));
1094       reset = PETSC_TRUE;
1095       PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[1], ctx->V[1]));
1096       PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1097     } else {
1098       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
1099       PetscCall(PCMatApplyTranspose(ctx->pc, X, ctx->V[1]));
1100       PetscCall(MatAXPY(Y, 1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1101     }
1102     if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
1103   }
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode PCDestroy_HPDDMShell(PC pc)
1108 {
1109   PC_HPDDM_Level *ctx;
1110 
1111   PetscFunctionBegin;
1112   PetscCall(PCShellGetContext(pc, &ctx));
1113   PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(ctx, PETSC_TRUE));
1114   PetscCall(VecDestroyVecs(1, &ctx->v[0]));
1115   PetscCall(VecDestroyVecs(2, &ctx->v[1]));
1116   PetscCall(PetscObjectCompose((PetscObject)ctx->pc->mat, "_HPDDM_MatProduct", nullptr));
1117   PetscCall(MatDestroy(ctx->V));
1118   PetscCall(MatDestroy(ctx->V + 1));
1119   PetscCall(MatDestroy(ctx->V + 2));
1120   PetscCall(VecDestroy(&ctx->D));
1121   PetscCall(PetscObjectCompose((PetscObject)ctx->scatter, "_HPDDM_StridedSF", nullptr));
1122   PetscCall(PetscSFDestroy(&ctx->scatter));
1123   PetscCall(PCDestroy(&ctx->pc));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 template <class Type, bool T = false, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
1128 static inline PetscErrorCode PCApply_Schur_Private(std::tuple<KSP, IS, Vec[2]> *p, PC factor, Type x, Type y)
1129 {
1130   PetscFunctionBegin;
1131   PetscCall(VecISCopy(std::get<2>(*p)[0], std::get<1>(*p), SCATTER_FORWARD, x));
1132   if (!T) PetscCall(PCApply(factor, std::get<2>(*p)[0], std::get<2>(*p)[1]));
1133   else PetscCall(PCApplyTranspose(factor, std::get<2>(*p)[0], std::get<2>(*p)[1]));
1134   PetscCall(VecISCopy(std::get<2>(*p)[1], std::get<1>(*p), SCATTER_REVERSE, y));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 template <class Type, bool = false, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
1139 static inline PetscErrorCode PCApply_Schur_Private(std::tuple<KSP, IS, Vec[2]> *p, PC factor, Type X, Type Y)
1140 {
1141   Mat B[2];
1142   Vec x, y;
1143 
1144   PetscFunctionBegin;
1145   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, factor->mat->rmap->n, X->cmap->n, nullptr, B));
1146   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, factor->mat->rmap->n, X->cmap->n, nullptr, B + 1));
1147   for (PetscInt i = 0; i < X->cmap->n; ++i) {
1148     PetscCall(MatDenseGetColumnVecRead(X, i, &x));
1149     PetscCall(MatDenseGetColumnVecWrite(B[0], i, &y));
1150     PetscCall(VecISCopy(y, std::get<1>(*p), SCATTER_FORWARD, x));
1151     PetscCall(MatDenseRestoreColumnVecWrite(B[0], i, &y));
1152     PetscCall(MatDenseRestoreColumnVecRead(X, i, &x));
1153   }
1154   PetscCall(PCMatApply(factor, B[0], B[1]));
1155   PetscCall(MatDestroy(B));
1156   for (PetscInt i = 0; i < X->cmap->n; ++i) {
1157     PetscCall(MatDenseGetColumnVecRead(B[1], i, &x));
1158     PetscCall(MatDenseGetColumnVecWrite(Y, i, &y));
1159     PetscCall(VecISCopy(x, std::get<1>(*p), SCATTER_REVERSE, y));
1160     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &y));
1161     PetscCall(MatDenseRestoreColumnVecRead(B[1], i, &x));
1162   }
1163   PetscCall(MatDestroy(B + 1));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 template <class Type = Vec, bool T = false>
1168 static PetscErrorCode PCApply_Schur(PC pc, Type x, Type y)
1169 {
1170   PC                           factor;
1171   Mat                          A;
1172   MatSolverType                type;
1173   PetscBool                    flg;
1174   std::tuple<KSP, IS, Vec[2]> *p;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(PCShellGetContext(pc, &p));
1178   PetscCall(KSPGetPC(std::get<0>(*p), &factor));
1179   PetscCall(PCFactorGetMatSolverType(factor, &type));
1180   PetscCall(PCFactorGetMatrix(factor, &A));
1181   PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
1182   if (flg) {
1183     PetscCheck(PetscDefined(HAVE_MUMPS), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent MatSolverType");
1184     PetscCall(MatMumpsSetIcntl(A, 26, 0));
1185   } else {
1186     PetscCall(PetscStrcmp(type, MATSOLVERMKL_PARDISO, &flg));
1187     PetscCheck(flg && PetscDefined(HAVE_MKL_PARDISO), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent MatSolverType");
1188     flg = PETSC_FALSE;
1189 #if PetscDefined(HAVE_MKL_PARDISO)
1190     PetscCall(MatMkl_PardisoSetCntl(A, 70, 1));
1191 #endif
1192   }
1193   PetscCall(PCApply_Schur_Private<Type, T>(p, factor, x, y));
1194   if (flg) {
1195     PetscCall(MatMumpsSetIcntl(A, 26, -1));
1196   } else {
1197 #if PetscDefined(HAVE_MKL_PARDISO)
1198     PetscCall(MatMkl_PardisoSetCntl(A, 70, 0));
1199 #endif
1200   }
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 static PetscErrorCode PCDestroy_Schur(PC pc)
1205 {
1206   std::tuple<KSP, IS, Vec[2]> *p;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(PCShellGetContext(pc, &p));
1210   PetscCall(ISDestroy(&std::get<1>(*p)));
1211   PetscCall(VecDestroy(std::get<2>(*p)));
1212   PetscCall(VecDestroy(std::get<2>(*p) + 1));
1213   PetscCall(PetscFree(p));
1214   PetscFunctionReturn(PETSC_SUCCESS);
1215 }
1216 
1217 template <bool transpose>
1218 static PetscErrorCode PCHPDDMSolve_Private(const PC_HPDDM_Level *ctx, PetscScalar *rhs, const unsigned short &mu)
1219 {
1220   Mat      B, X;
1221   PetscInt n, N, j = 0;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(KSPGetOperators(ctx->ksp, &B, nullptr));
1225   PetscCall(MatGetLocalSize(B, &n, nullptr));
1226   PetscCall(MatGetSize(B, &N, nullptr));
1227   if (ctx->parent->log_separate) {
1228     j = std::distance(ctx->parent->levels, std::find(ctx->parent->levels, ctx->parent->levels + ctx->parent->N, ctx));
1229     PetscCall(PetscLogEventBegin(PC_HPDDM_Solve[j], ctx->ksp, nullptr, nullptr, nullptr));
1230   }
1231   if (mu == 1) {
1232     if (!ctx->ksp->vec_rhs) {
1233       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)ctx->ksp), 1, n, N, nullptr, &ctx->ksp->vec_rhs));
1234       PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)ctx->ksp), n, N, &ctx->ksp->vec_sol));
1235     }
1236     PetscCall(VecPlaceArray(ctx->ksp->vec_rhs, rhs));
1237     if (!transpose) PetscCall(KSPSolve(ctx->ksp, nullptr, nullptr));
1238     else {
1239       PetscCall(VecConjugate(ctx->ksp->vec_rhs));
1240       PetscCall(KSPSolveTranspose(ctx->ksp, nullptr, nullptr)); /* TODO: missing KSPSolveHermitianTranspose() */
1241       PetscCall(VecConjugate(ctx->ksp->vec_sol));
1242     }
1243     PetscCall(VecCopy(ctx->ksp->vec_sol, ctx->ksp->vec_rhs));
1244     PetscCall(VecResetArray(ctx->ksp->vec_rhs));
1245   } else {
1246     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)ctx->ksp), n, PETSC_DECIDE, N, mu, rhs, &B));
1247     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)ctx->ksp), n, PETSC_DECIDE, N, mu, nullptr, &X));
1248     if (!transpose) PetscCall(KSPMatSolve(ctx->ksp, B, X));
1249     else {
1250       PetscCall(MatConjugate(B));
1251       PetscCall(KSPMatSolveTranspose(ctx->ksp, B, X)); /* TODO: missing KSPMatSolveHermitianTranspose() */
1252       PetscCall(MatConjugate(X));
1253     }
1254     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
1255     PetscCall(MatDestroy(&X));
1256     PetscCall(MatDestroy(&B));
1257   }
1258   if (ctx->parent->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Solve[j], ctx->ksp, nullptr, nullptr, nullptr));
1259   PetscFunctionReturn(PETSC_SUCCESS);
1260 }
1261 
1262 static PetscErrorCode PCHPDDMSetUpNeumannOverlap_Private(PC pc)
1263 {
1264   PC_HPDDM *data = (PC_HPDDM *)pc->data;
1265 
1266   PetscFunctionBegin;
1267   if (data->setup) {
1268     Mat       P;
1269     Vec       x, xt = nullptr;
1270     PetscReal t = 0.0, s = 0.0;
1271 
1272     PetscCall(PCGetOperators(pc, nullptr, &P));
1273     PetscCall(PetscObjectQuery((PetscObject)P, "__SNES_latest_X", (PetscObject *)&x));
1274     PetscCallBack("PCHPDDM Neumann callback", (*data->setup)(data->aux, t, x, xt, s, data->is, data->setup_ctx));
1275   }
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 static PetscErrorCode PCHPDDMCreateSubMatrices_Private(Mat mat, PetscInt n, const IS *, const IS *, MatReuse scall, Mat *submat[])
1280 {
1281   Mat       A;
1282   PetscBool flg;
1283 
1284   PetscFunctionBegin;
1285   PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatCreateSubMatrices() called to extract %" PetscInt_FMT " submatrices, which is different than 1", n);
1286   /* previously composed Mat */
1287   PetscCall(PetscObjectQuery((PetscObject)mat, "_PCHPDDM_SubMatrices", (PetscObject *)&A));
1288   PetscCheck(A, PETSC_COMM_SELF, PETSC_ERR_PLIB, "SubMatrices not found in Mat");
1289   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSCHURCOMPLEMENT, &flg)); /* MATSCHURCOMPLEMENT has neither a MatDuplicate() nor a MatCopy() implementation */
1290   if (scall == MAT_INITIAL_MATRIX) {
1291     PetscCall(PetscCalloc1(2, submat)); /* allocate an extra Mat to avoid errors in MatDestroySubMatrices_Dummy() */
1292     if (!flg) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, *submat));
1293   } else if (!flg) PetscCall(MatCopy(A, (*submat)[0], SAME_NONZERO_PATTERN));
1294   if (flg) {
1295     PetscCall(MatDestroy(*submat)); /* previously created Mat has to be destroyed */
1296     (*submat)[0] = A;
1297     PetscCall(PetscObjectReference((PetscObject)A));
1298   }
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 static PetscErrorCode PCHPDDMCommunicationAvoidingPCASM_Private(PC pc, Mat C, PetscBool sorted)
1303 {
1304   PetscErrorCodeFn *op;
1305 
1306   PetscFunctionBegin;
1307   /* previously-composed Mat */
1308   PetscCall(PetscObjectCompose((PetscObject)pc->pmat, "_PCHPDDM_SubMatrices", (PetscObject)C));
1309   PetscCall(MatGetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, &op));
1310   /* trick suggested by Barry https://lists.mcs.anl.gov/pipermail/petsc-dev/2020-January/025491.html */
1311   PetscCall(MatSetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, (PetscErrorCodeFn *)PCHPDDMCreateSubMatrices_Private));
1312   if (sorted) PetscCall(PCASMSetSortIndices(pc, PETSC_FALSE)); /* everything is already sorted */
1313   PetscCall(PCSetFromOptions(pc));                             /* otherwise -pc_hpddm_levels_1_pc_asm_sub_mat_type is not used */
1314   PetscCall(PCSetUp(pc));
1315   /* reset MatCreateSubMatrices() */
1316   PetscCall(MatSetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, op));
1317   PetscCall(PetscObjectCompose((PetscObject)pc->pmat, "_PCHPDDM_SubMatrices", nullptr));
1318   PetscFunctionReturn(PETSC_SUCCESS);
1319 }
1320 
1321 static PetscErrorCode PCHPDDMPermute_Private(IS is, IS in_is, IS *out_is, Mat in_C, Mat *out_C, IS *p)
1322 {
1323   IS                           perm;
1324   const PetscInt              *ptr;
1325   PetscInt                    *concatenate, size, bs;
1326   std::map<PetscInt, PetscInt> order;
1327   PetscBool                    sorted;
1328 
1329   PetscFunctionBegin;
1330   PetscValidHeaderSpecific(is, IS_CLASSID, 1);
1331   PetscValidHeaderSpecific(in_C, MAT_CLASSID, 4);
1332   PetscCall(ISSorted(is, &sorted));
1333   if (!sorted) {
1334     PetscCall(ISGetLocalSize(is, &size));
1335     PetscCall(ISGetIndices(is, &ptr));
1336     PetscCall(ISGetBlockSize(is, &bs));
1337     /* MatCreateSubMatrices(), called by PCASM, follows the global numbering of Pmat */
1338     for (PetscInt n = 0; n < size; n += bs) order.insert(std::make_pair(ptr[n] / bs, n / bs));
1339     PetscCall(ISRestoreIndices(is, &ptr));
1340     size /= bs;
1341     if (out_C) {
1342       PetscCall(PetscMalloc1(size, &concatenate));
1343       for (const std::pair<const PetscInt, PetscInt> &i : order) *concatenate++ = i.second;
1344       concatenate -= size;
1345       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)in_C), bs, size, concatenate, PETSC_OWN_POINTER, &perm));
1346       PetscCall(ISSetPermutation(perm));
1347       /* permute user-provided Mat so that it matches with MatCreateSubMatrices() numbering */
1348       PetscCall(MatPermute(in_C, perm, perm, out_C));
1349       if (p) *p = perm;
1350       else PetscCall(ISDestroy(&perm)); /* no need to save the permutation */
1351     }
1352     if (out_is) {
1353       PetscCall(PetscMalloc1(size, &concatenate));
1354       for (const std::pair<const PetscInt, PetscInt> &i : order) *concatenate++ = i.first;
1355       concatenate -= size;
1356       /* permute user-provided IS so that it matches with MatCreateSubMatrices() numbering */
1357       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)in_is), bs, size, concatenate, PETSC_OWN_POINTER, out_is));
1358     }
1359   } else { /* input IS is sorted, nothing to permute, simply duplicate inputs when needed */
1360     if (out_C) PetscCall(MatDuplicate(in_C, MAT_COPY_VALUES, out_C));
1361     if (out_is) PetscCall(ISDuplicate(in_is, out_is));
1362   }
1363   PetscFunctionReturn(PETSC_SUCCESS);
1364 }
1365 
1366 static PetscErrorCode PCHPDDMCheckSymmetry_Private(PC pc, Mat A01, Mat A10, Mat *B01 = nullptr)
1367 {
1368   Mat       T, U = nullptr, B = nullptr;
1369   IS        z;
1370   PetscBool flg, conjugate = PETSC_FALSE;
1371 
1372   PetscFunctionBegin;
1373   PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
1374   if (B01) *B01 = nullptr;
1375   if (flg) {
1376     PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1377     PetscCall(MatTransposeGetMat(A10, &U));
1378   } else {
1379     PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1380     if (flg) {
1381       PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1382       PetscCall(MatHermitianTransposeGetMat(A10, &U));
1383       conjugate = PETSC_TRUE;
1384     }
1385   }
1386   if (U) PetscCall(MatDuplicate(U, MAT_COPY_VALUES, &T));
1387   else PetscCall(MatHermitianTranspose(A10, MAT_INITIAL_MATRIX, &T));
1388   PetscCall(PetscObjectTypeCompare((PetscObject)A01, MATTRANSPOSEVIRTUAL, &flg));
1389   if (flg) {
1390     PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1391     PetscCall(MatTransposeGetMat(A01, &A01));
1392     PetscCall(MatTranspose(A01, MAT_INITIAL_MATRIX, &B));
1393     A01 = B;
1394   } else {
1395     PetscCall(PetscObjectTypeCompare((PetscObject)A01, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1396     if (flg) {
1397       PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1398       PetscCall(MatHermitianTransposeGetMat(A01, &A01));
1399       PetscCall(MatHermitianTranspose(A01, MAT_INITIAL_MATRIX, &B));
1400       A01 = B;
1401     }
1402   }
1403   PetscCall(PetscLayoutCompare(T->rmap, A01->rmap, &flg));
1404   if (flg) {
1405     PetscCall(PetscLayoutCompare(T->cmap, A01->cmap, &flg));
1406     if (flg) {
1407       PetscCall(MatFindZeroRows(A01, &z)); /* for essential boundary conditions, some implementations will */
1408       if (z) {                             /*  zero rows in [P00 A01] except for the diagonal of P00       */
1409         if (B01) PetscCall(MatDuplicate(T, MAT_COPY_VALUES, B01));
1410         PetscCall(MatSetOption(T, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
1411         PetscCall(MatZeroRowsIS(T, z, 0.0, nullptr, nullptr)); /* corresponding zero rows from A01 */
1412       }
1413       PetscCall(MatMultEqual(A01, T, 20, &flg));
1414       if (!B01) PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "A01 != A10^T");
1415       else {
1416         PetscCall(PetscInfo(pc, "A01 and A10^T are equal? %s\n", PetscBools[flg]));
1417         if (!flg) {
1418           if (z) PetscCall(MatDestroy(&T));
1419           else *B01 = T;
1420           flg = PETSC_TRUE;
1421         } else PetscCall(MatDestroy(B01));
1422       }
1423       PetscCall(ISDestroy(&z));
1424     }
1425   }
1426   if (!flg) PetscCall(PetscInfo(pc, "A01 and A10^T have non-congruent layouts, cannot test for equality\n"));
1427   if (!B01 || !*B01) PetscCall(MatDestroy(&T));
1428   else if (conjugate) PetscCall(MatConjugate(T));
1429   PetscCall(MatDestroy(&B));
1430   PetscFunctionReturn(PETSC_SUCCESS);
1431 }
1432 
1433 static PetscErrorCode PCHPDDMCheckInclusion_Private(PC pc, IS is, IS is_local, PetscBool check)
1434 {
1435   IS          intersect;
1436   const char *str = "IS of the auxiliary Mat does not include all local rows of A";
1437   PetscBool   equal;
1438 
1439   PetscFunctionBegin;
1440   PetscCall(ISIntersect(is, is_local, &intersect));
1441   PetscCall(ISEqualUnsorted(is_local, intersect, &equal));
1442   PetscCall(ISDestroy(&intersect));
1443   if (check) PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%s", str);
1444   else if (!equal) PetscCall(PetscInfo(pc, "%s\n", str));
1445   PetscFunctionReturn(PETSC_SUCCESS);
1446 }
1447 
1448 static PetscErrorCode PCHPDDMCheckMatStructure_Private(PC pc, Mat A, Mat B)
1449 {
1450   Mat             X, Y;
1451   const PetscInt *i[2], *j[2];
1452   PetscBool       flg = PETSC_TRUE;
1453 
1454   PetscFunctionBegin;
1455   PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &X)); /* no common way to compare sparsity pattern, so just convert to MATSEQAIJ */
1456   PetscCall(MatConvert(B, MATAIJ, MAT_INITIAL_MATRIX, &Y)); /* the second Mat (B = Neumann) should have a SUBSET_NONZERO_PATTERN MatStructure of the first one (A = Dirichlet) */
1457   PetscCall(MatSeqAIJGetCSRAndMemType(X, &i[0], &j[0], nullptr, nullptr));
1458   PetscCall(MatSeqAIJGetCSRAndMemType(Y, &i[1], &j[1], nullptr, nullptr));
1459   for (PetscInt row = 0; (row < X->rmap->n) && flg; ++row) {
1460     const PetscInt n = i[0][row + 1] - i[0][row];
1461 
1462     for (PetscInt k = i[1][row], location; k < i[1][row + 1]; ++k) {
1463       PetscCall(PetscFindInt(j[1][k], n, j[0] + i[0][row], &location));
1464       if (location < 0) {
1465         flg = PETSC_FALSE;
1466         break;
1467       }
1468     }
1469   }
1470   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1471   PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER_INPUT, "Auxiliary Mat is supposedly the local Neumann matrix but it has a sparsity pattern which is not a subset of the one of the local assembled matrix");
1472   PetscCall(MatDestroy(&Y));
1473   PetscCall(MatDestroy(&X));
1474   PetscFunctionReturn(PETSC_SUCCESS);
1475 }
1476 
1477 static PetscErrorCode PCHPDDMDestroySubMatrices_Private(PetscBool flg, PetscBool algebraic, Mat *sub)
1478 {
1479   IS is;
1480 
1481   PetscFunctionBegin;
1482   if (!flg) {
1483     if (algebraic) {
1484       PetscCall(PetscObjectQuery((PetscObject)sub[0], "_PCHPDDM_Embed", (PetscObject *)&is));
1485       PetscCall(ISDestroy(&is));
1486       PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Embed", nullptr));
1487       PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Compact", nullptr));
1488     }
1489     PetscCall(MatDestroySubMatrices(algebraic ? 2 : 1, &sub));
1490   }
1491   PetscFunctionReturn(PETSC_SUCCESS);
1492 }
1493 
1494 static PetscErrorCode PCHPDDMAlgebraicAuxiliaryMat_Private(Mat P, IS *is, Mat *sub[], PetscBool block)
1495 {
1496   IS         icol[3], irow[2];
1497   Mat       *M, Q;
1498   PetscReal *ptr;
1499   PetscInt  *idx, p = 0, bs = P->cmap->bs;
1500   PetscBool  flg;
1501 
1502   PetscFunctionBegin;
1503   PetscCall(ISCreateStride(PETSC_COMM_SELF, P->cmap->N, 0, 1, icol + 2));
1504   PetscCall(ISSetBlockSize(icol[2], bs));
1505   PetscCall(ISSetIdentity(icol[2]));
1506   PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg));
1507   if (flg) {
1508     /* MatCreateSubMatrices() does not handle MATMPISBAIJ properly when iscol != isrow, so convert first to MATMPIBAIJ */
1509     PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &Q));
1510     std::swap(P, Q);
1511   }
1512   PetscCall(MatCreateSubMatrices(P, 1, is, icol + 2, MAT_INITIAL_MATRIX, &M));
1513   if (flg) {
1514     std::swap(P, Q);
1515     PetscCall(MatDestroy(&Q));
1516   }
1517   PetscCall(ISDestroy(icol + 2));
1518   PetscCall(ISCreateStride(PETSC_COMM_SELF, M[0]->rmap->N, 0, 1, irow));
1519   PetscCall(ISSetBlockSize(irow[0], bs));
1520   PetscCall(ISSetIdentity(irow[0]));
1521   if (!block) {
1522     PetscCall(PetscMalloc2(P->cmap->N, &ptr, P->cmap->N / bs, &idx));
1523     PetscCall(MatGetColumnNorms(M[0], NORM_INFINITY, ptr));
1524     /* check for nonzero columns so that M[0] may be expressed in compact form */
1525     for (PetscInt n = 0; n < P->cmap->N; n += bs) {
1526       if (std::find_if(ptr + n, ptr + n + bs, [](PetscReal v) { return v > PETSC_MACHINE_EPSILON; }) != ptr + n + bs) idx[p++] = n / bs;
1527     }
1528     PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, p, idx, PETSC_USE_POINTER, icol + 1));
1529     PetscCall(ISSetInfo(icol[1], IS_SORTED, IS_GLOBAL, PETSC_TRUE, PETSC_TRUE));
1530     PetscCall(ISEmbed(*is, icol[1], PETSC_FALSE, icol + 2));
1531     irow[1] = irow[0];
1532     /* first Mat will be used in PCASM (if it is used as a PC on this level) and as the left-hand side of GenEO */
1533     icol[0] = is[0];
1534     PetscCall(MatCreateSubMatrices(M[0], 2, irow, icol, MAT_INITIAL_MATRIX, sub));
1535     PetscCall(ISDestroy(icol + 1));
1536     PetscCall(PetscFree2(ptr, idx));
1537     /* IS used to go back and forth between the augmented and the original local linear system, see eq. (3.4) of [2022b] */
1538     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Embed", (PetscObject)icol[2]));
1539     /* Mat used in eq. (3.1) of [2022b] */
1540     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Compact", (PetscObject)(*sub)[1]));
1541   } else {
1542     Mat aux;
1543 
1544     PetscCall(MatSetOption(M[0], MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
1545     /* diagonal block of the overlapping rows */
1546     PetscCall(MatCreateSubMatrices(M[0], 1, irow, is, MAT_INITIAL_MATRIX, sub));
1547     PetscCall(MatDuplicate((*sub)[0], MAT_COPY_VALUES, &aux));
1548     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1549     if (bs == 1) { /* scalar case */
1550       Vec sum[2];
1551 
1552       PetscCall(MatCreateVecs(aux, sum, sum + 1));
1553       PetscCall(MatGetRowSum(M[0], sum[0]));
1554       PetscCall(MatGetRowSum(aux, sum[1]));
1555       /* off-diagonal block row sum (full rows - diagonal block rows) */
1556       PetscCall(VecAXPY(sum[0], -1.0, sum[1]));
1557       /* subdomain matrix plus off-diagonal block row sum */
1558       PetscCall(MatDiagonalSet(aux, sum[0], ADD_VALUES));
1559       PetscCall(VecDestroy(sum));
1560       PetscCall(VecDestroy(sum + 1));
1561     } else { /* vectorial case */
1562       /* TODO: missing MatGetValuesBlocked(), so the code below is     */
1563       /* an extension of the scalar case for when bs > 1, but it could */
1564       /* be more efficient by avoiding all these MatMatMult()          */
1565       Mat          sum[2], ones;
1566       PetscScalar *ptr;
1567 
1568       PetscCall(PetscCalloc1(M[0]->cmap->n * bs, &ptr));
1569       PetscCall(MatCreateDense(PETSC_COMM_SELF, M[0]->cmap->n, bs, M[0]->cmap->n, bs, ptr, &ones));
1570       for (PetscInt n = 0; n < M[0]->cmap->n; n += bs) {
1571         for (p = 0; p < bs; ++p) ptr[n + p * (M[0]->cmap->n + 1)] = 1.0;
1572       }
1573       PetscCall(MatMatMult(M[0], ones, MAT_INITIAL_MATRIX, PETSC_CURRENT, sum));
1574       PetscCall(MatDestroy(&ones));
1575       PetscCall(MatCreateDense(PETSC_COMM_SELF, aux->cmap->n, bs, aux->cmap->n, bs, ptr, &ones));
1576       PetscCall(MatDenseSetLDA(ones, M[0]->cmap->n));
1577       PetscCall(MatMatMult(aux, ones, MAT_INITIAL_MATRIX, PETSC_CURRENT, sum + 1));
1578       PetscCall(MatDestroy(&ones));
1579       PetscCall(PetscFree(ptr));
1580       /* off-diagonal block row sum (full rows - diagonal block rows) */
1581       PetscCall(MatAXPY(sum[0], -1.0, sum[1], SAME_NONZERO_PATTERN));
1582       PetscCall(MatDestroy(sum + 1));
1583       /* re-order values to be consistent with MatSetValuesBlocked()           */
1584       /* equivalent to MatTranspose() which does not truly handle              */
1585       /* MAT_INPLACE_MATRIX in the rectangular case, as it calls PetscMalloc() */
1586       PetscCall(MatDenseGetArrayWrite(sum[0], &ptr));
1587       HPDDM::Wrapper<PetscScalar>::imatcopy<'T'>(bs, sum[0]->rmap->n, ptr, sum[0]->rmap->n, bs);
1588       /* subdomain matrix plus off-diagonal block row sum */
1589       for (PetscInt n = 0; n < aux->cmap->n / bs; ++n) PetscCall(MatSetValuesBlocked(aux, 1, &n, 1, &n, ptr + n * bs * bs, ADD_VALUES));
1590       PetscCall(MatAssemblyBegin(aux, MAT_FINAL_ASSEMBLY));
1591       PetscCall(MatAssemblyEnd(aux, MAT_FINAL_ASSEMBLY));
1592       PetscCall(MatDenseRestoreArrayWrite(sum[0], &ptr));
1593       PetscCall(MatDestroy(sum));
1594     }
1595     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1596     /* left-hand side of GenEO, with the same sparsity pattern as PCASM subdomain solvers  */
1597     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Neumann_Mat", (PetscObject)aux));
1598   }
1599   PetscCall(ISDestroy(irow));
1600   PetscCall(MatDestroySubMatrices(1, &M));
1601   PetscFunctionReturn(PETSC_SUCCESS);
1602 }
1603 
1604 static PetscErrorCode PCApply_Nest(PC pc, Vec x, Vec y)
1605 {
1606   Mat                    A;
1607   MatSolverType          type;
1608   IS                     is[2];
1609   PetscBool              flg;
1610   std::pair<PC, Vec[2]> *p;
1611 
1612   PetscFunctionBegin;
1613   PetscCall(PCShellGetContext(pc, &p));
1614   if (p->second[0]) { /* in case of a centralized Schur complement, some processes may have no local operator */
1615     PetscCall(PCGetOperators(p->first, &A, nullptr));
1616     PetscCall(MatNestGetISs(A, is, nullptr));
1617     PetscCall(PetscObjectTypeCompareAny((PetscObject)p->first, &flg, PCLU, PCCHOLESKY, ""));
1618     if (flg) { /* partial solve currently only makes sense with exact factorizations */
1619       PetscCall(PCFactorGetMatSolverType(p->first, &type));
1620       PetscCall(PCFactorGetMatrix(p->first, &A));
1621       if (A->schur) {
1622         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
1623         if (flg) PetscCall(MatMumpsSetIcntl(A, 26, 1)); /* reduction/condensation phase followed by Schur complement solve */
1624       } else flg = PETSC_FALSE;
1625     }
1626     PetscCall(VecISCopy(p->second[0], is[1], SCATTER_FORWARD, x)); /* assign the RHS associated to the Schur complement */
1627     PetscCall(PCApply(p->first, p->second[0], p->second[1]));
1628     PetscCall(VecISCopy(p->second[1], is[1], SCATTER_REVERSE, y)); /* retrieve the partial solution associated to the Schur complement */
1629     if (flg) PetscCall(MatMumpsSetIcntl(A, 26, -1));               /* default ICNTL(26) value in PETSc */
1630   }
1631   PetscFunctionReturn(PETSC_SUCCESS);
1632 }
1633 
1634 static PetscErrorCode PCView_Nest(PC pc, PetscViewer viewer)
1635 {
1636   std::pair<PC, Vec[2]> *p;
1637 
1638   PetscFunctionBegin;
1639   PetscCall(PCShellGetContext(pc, &p));
1640   PetscCall(PCView(p->first, viewer));
1641   PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643 
1644 static PetscErrorCode PCDestroy_Nest(PC pc)
1645 {
1646   std::pair<PC, Vec[2]> *p;
1647 
1648   PetscFunctionBegin;
1649   PetscCall(PCShellGetContext(pc, &p));
1650   PetscCall(VecDestroy(p->second));
1651   PetscCall(VecDestroy(p->second + 1));
1652   PetscCall(PCDestroy(&p->first));
1653   PetscCall(PetscFree(p));
1654   PetscFunctionReturn(PETSC_SUCCESS);
1655 }
1656 
1657 template <bool T = false>
1658 static PetscErrorCode MatMult_Schur(Mat A, Vec x, Vec y)
1659 {
1660   std::tuple<Mat, PetscSF, Vec[2]> *ctx;
1661 
1662   PetscFunctionBegin;
1663   PetscCall(MatShellGetContext(A, &ctx));
1664   PetscCall(VecScatterBegin(std::get<1>(*ctx), x, std::get<2>(*ctx)[0], INSERT_VALUES, SCATTER_FORWARD)); /* local Vec with overlap */
1665   PetscCall(VecScatterEnd(std::get<1>(*ctx), x, std::get<2>(*ctx)[0], INSERT_VALUES, SCATTER_FORWARD));
1666   if (!T) PetscCall(MatMult(std::get<0>(*ctx), std::get<2>(*ctx)[0], std::get<2>(*ctx)[1])); /* local Schur complement */
1667   else PetscCall(MatMultTranspose(std::get<0>(*ctx), std::get<2>(*ctx)[0], std::get<2>(*ctx)[1]));
1668   PetscCall(VecSet(y, 0.0));
1669   PetscCall(VecScatterBegin(std::get<1>(*ctx), std::get<2>(*ctx)[1], y, ADD_VALUES, SCATTER_REVERSE)); /* global Vec with summed up contributions on the overlap */
1670   PetscCall(VecScatterEnd(std::get<1>(*ctx), std::get<2>(*ctx)[1], y, ADD_VALUES, SCATTER_REVERSE));
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 static PetscErrorCode MatDestroy_Schur(Mat A)
1675 {
1676   std::tuple<Mat, PetscSF, Vec[2]> *ctx;
1677 
1678   PetscFunctionBegin;
1679   PetscCall(MatShellGetContext(A, &ctx));
1680   PetscCall(VecDestroy(std::get<2>(*ctx)));
1681   PetscCall(VecDestroy(std::get<2>(*ctx) + 1));
1682   PetscCall(PetscFree(ctx));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 static PetscErrorCode MatMult_SchurCorrection(Mat A, Vec x, Vec y)
1687 {
1688   PC                                         pc;
1689   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1690 
1691   PetscFunctionBegin;
1692   PetscCall(MatShellGetContext(A, &ctx));
1693   pc = ((PC_HPDDM *)std::get<0>(*ctx)[0]->data)->levels[0]->ksp->pc;
1694   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {             /* Q_0 is the coarse correction associated to the A00 block from PCFIELDSPLIT */
1695     PetscCall(MatMult(std::get<1>(*ctx)[0], x, std::get<3>(*ctx)[1]));                    /*     A_01 x                 */
1696     PetscCall(PCHPDDMDeflate_Private(pc, std::get<3>(*ctx)[1], std::get<3>(*ctx)[1]));    /*     Q_0 A_01 x             */
1697     PetscCall(MatMult(std::get<1>(*ctx)[1], std::get<3>(*ctx)[1], std::get<3>(*ctx)[0])); /*     A_10 Q_0 A_01 x        */
1698     PetscCall(PCApply(std::get<0>(*ctx)[1], std::get<3>(*ctx)[0], y));                    /* y = M_S^-1 A_10 Q_0 A_01 x */
1699   } else {
1700     PetscCall(PCApply(std::get<0>(*ctx)[1], x, std::get<3>(*ctx)[0]));                    /*     M_S^-1 x               */
1701     PetscCall(MatMult(std::get<1>(*ctx)[0], std::get<3>(*ctx)[0], std::get<3>(*ctx)[1])); /*     A_01 M_S^-1 x          */
1702     PetscCall(PCHPDDMDeflate_Private(pc, std::get<3>(*ctx)[1], std::get<3>(*ctx)[1]));    /*     Q_0 A_01 M_S^-1 x      */
1703     PetscCall(MatMult(std::get<1>(*ctx)[1], std::get<3>(*ctx)[1], y));                    /* y = A_10 Q_0 A_01 M_S^-1 x */
1704   }
1705   PetscCall(VecAXPY(y, -1.0, x)); /* y -= x, preconditioned eq. (24) of https://hal.science/hal-02343808v6/document (with a sign flip) */
1706   PetscFunctionReturn(PETSC_SUCCESS);
1707 }
1708 
1709 static PetscErrorCode MatView_SchurCorrection(Mat A, PetscViewer viewer)
1710 {
1711   PetscBool                                  ascii;
1712   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1713 
1714   PetscFunctionBegin;
1715   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &ascii));
1716   if (ascii) {
1717     PetscCall(MatShellGetContext(A, &ctx));
1718     PetscCall(PetscViewerASCIIPrintf(viewer, "action of %s\n", std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT ? "(I - M_S^-1 A_10 Q_0 A_01)" : "(I - A_10 Q_0 A_01 M_S^-1)"));
1719     PetscCall(PCView(std::get<0>(*ctx)[1], viewer)); /* no need to PCView(Q_0) since it will be done by PCFIELDSPLIT */
1720   }
1721   PetscFunctionReturn(PETSC_SUCCESS);
1722 }
1723 
1724 static PetscErrorCode MatDestroy_SchurCorrection(Mat A)
1725 {
1726   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1727 
1728   PetscFunctionBegin;
1729   PetscCall(MatShellGetContext(A, &ctx));
1730   PetscCall(VecDestroy(std::get<3>(*ctx)));
1731   PetscCall(VecDestroy(std::get<3>(*ctx) + 1));
1732   PetscCall(VecDestroy(std::get<3>(*ctx) + 2));
1733   PetscCall(PCDestroy(std::get<0>(*ctx) + 1));
1734   PetscCall(PetscFree(ctx));
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 static PetscErrorCode PCPostSolve_SchurPreLeastSquares(PC, KSP, Vec, Vec x)
1739 {
1740   PetscFunctionBegin;
1741   PetscCall(VecScale(x, -1.0));
1742   PetscFunctionReturn(PETSC_SUCCESS);
1743 }
1744 
1745 static PetscErrorCode KSPPreSolve_SchurCorrection(KSP, Vec b, Vec, void *context)
1746 {
1747   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = reinterpret_cast<std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *>(context);
1748 
1749   PetscFunctionBegin;
1750   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {
1751     PetscCall(PCApply(std::get<0>(*ctx)[1], b, std::get<3>(*ctx)[2]));
1752     std::swap(*b, *std::get<3>(*ctx)[2]); /* replace b by M^-1 b, but need to keep a copy of the original RHS, so swap it with the work Vec */
1753   }
1754   PetscFunctionReturn(PETSC_SUCCESS);
1755 }
1756 
1757 static PetscErrorCode KSPPostSolve_SchurCorrection(KSP, Vec b, Vec x, void *context)
1758 {
1759   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = reinterpret_cast<std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *>(context);
1760 
1761   PetscFunctionBegin;
1762   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) std::swap(*b, *std::get<3>(*ctx)[2]); /* put back the original RHS where it belongs */
1763   else {
1764     PetscCall(PCApply(std::get<0>(*ctx)[1], x, std::get<3>(*ctx)[2]));
1765     PetscCall(VecCopy(std::get<3>(*ctx)[2], x)); /* replace x by M^-1 x */
1766   }
1767   PetscFunctionReturn(PETSC_SUCCESS);
1768 }
1769 
1770 static PetscErrorCode MatMult_Harmonic(Mat, Vec, Vec);
1771 static PetscErrorCode MatMultTranspose_Harmonic(Mat, Vec, Vec);
1772 static PetscErrorCode MatProduct_AB_Harmonic(Mat, Mat, Mat, void *);
1773 static PetscErrorCode MatProduct_AtB_Harmonic(Mat, Mat, Mat, void *);
1774 static PetscErrorCode MatDestroy_Harmonic(Mat);
1775 
1776 static PetscErrorCode PCSetUp_HPDDM(PC pc)
1777 {
1778   PC_HPDDM                                  *data = (PC_HPDDM *)pc->data;
1779   PC                                         inner;
1780   KSP                                       *ksp;
1781   Mat                                       *sub, A, P, N, C = nullptr, uaux = nullptr, weighted, subA[2], S;
1782   Vec                                        xin, v;
1783   std::vector<Vec>                           initial;
1784   IS                                         is[1], loc, uis = data->is, unsorted = nullptr;
1785   ISLocalToGlobalMapping                     l2g;
1786   char                                       prefix[256];
1787   const char                                *pcpre;
1788   const PetscScalar *const                  *ev;
1789   PetscInt                                   n, requested = data->N, reused = 0, overlap = -1;
1790   MatStructure                               structure  = UNKNOWN_NONZERO_PATTERN;
1791   PetscBool                                  subdomains = PETSC_FALSE, flg = PETSC_FALSE, ismatis, swap = PETSC_FALSE, algebraic = PETSC_FALSE, block = PETSC_FALSE;
1792   DM                                         dm;
1793   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = nullptr;
1794 #if PetscDefined(USE_DEBUG)
1795   IS  dis  = nullptr;
1796   Mat daux = nullptr;
1797 #endif
1798 
1799   PetscFunctionBegin;
1800   PetscCheck(data->levels && data->levels[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not a single level allocated");
1801   PetscCall(PCGetOptionsPrefix(pc, &pcpre));
1802   PetscCall(PCGetOperators(pc, &A, &P));
1803   if (!data->levels[0]->ksp) {
1804     PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->ksp));
1805     PetscCall(KSPSetNestLevel(data->levels[0]->ksp, pc->kspnestlevel));
1806     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_%s_", pcpre ? pcpre : "", data->N > 1 ? "levels_1" : "coarse"));
1807     PetscCall(KSPSetOptionsPrefix(data->levels[0]->ksp, prefix));
1808     PetscCall(KSPSetType(data->levels[0]->ksp, KSPPREONLY));
1809   } else if (data->levels[0]->ksp->pc && data->levels[0]->ksp->pc->setupcalled && data->levels[0]->ksp->pc->reusepreconditioner) {
1810     /* if the fine-level PCSHELL exists, its setup has succeeded, and one wants to reuse it, */
1811     /* then just propagate the appropriate flag to the coarser levels                        */
1812     for (n = 0; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
1813       /* the following KSP and PC may be NULL for some processes, hence the check            */
1814       if (data->levels[n]->ksp) PetscCall(KSPSetReusePreconditioner(data->levels[n]->ksp, PETSC_TRUE));
1815       if (data->levels[n]->pc) PetscCall(PCSetReusePreconditioner(data->levels[n]->pc, PETSC_TRUE));
1816     }
1817     /* early bail out because there is nothing to do */
1818     PetscFunctionReturn(PETSC_SUCCESS);
1819   } else {
1820     /* reset coarser levels */
1821     for (n = 1; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
1822       if (data->levels[n]->ksp && data->levels[n]->ksp->pc && data->levels[n]->ksp->pc->setupcalled && data->levels[n]->ksp->pc->reusepreconditioner && n < data->N) {
1823         reused = data->N - n;
1824         break;
1825       }
1826       PetscCall(KSPDestroy(&data->levels[n]->ksp));
1827       PetscCall(PCDestroy(&data->levels[n]->pc));
1828     }
1829     /* check if some coarser levels are being reused */
1830     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reused, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
1831     const int *addr = data->levels[0]->P ? data->levels[0]->P->getAddrLocal() : &HPDDM::i__0;
1832 
1833     if (addr != &HPDDM::i__0 && reused != data->N - 1) {
1834       /* reuse previously computed eigenvectors */
1835       ev = data->levels[0]->P->getVectors();
1836       if (ev) {
1837         initial.reserve(*addr);
1838         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, data->levels[0]->P->getDof(), ev[0], &xin));
1839         for (n = 0; n < *addr; ++n) {
1840           PetscCall(VecDuplicate(xin, &v));
1841           PetscCall(VecPlaceArray(xin, ev[n]));
1842           PetscCall(VecCopy(xin, v));
1843           initial.emplace_back(v);
1844           PetscCall(VecResetArray(xin));
1845         }
1846         PetscCall(VecDestroy(&xin));
1847       }
1848     }
1849   }
1850   data->N -= reused;
1851   PetscCall(KSPSetOperators(data->levels[0]->ksp, A, P));
1852 
1853   PetscCall(PetscObjectTypeCompare((PetscObject)P, MATIS, &ismatis));
1854   if (!data->is && !ismatis) {
1855     PetscErrorCode (*create)(DM, IS *, Mat *, PetscErrorCode (**)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void **) = nullptr;
1856     PetscErrorCode (*usetup)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *)                                                = nullptr;
1857     void *uctx                                                                                                               = nullptr;
1858 
1859     /* first see if we can get the data from the DM */
1860     PetscCall(MatGetDM(P, &dm));
1861     if (!dm) PetscCall(MatGetDM(A, &dm));
1862     if (!dm) PetscCall(PCGetDM(pc, &dm));
1863     if (dm) { /* this is the hook for DMPLEX for which the auxiliary Mat is the local Neumann matrix */
1864       PetscCall(PetscObjectQueryFunction((PetscObject)dm, "DMCreateNeumannOverlap_C", &create));
1865       if (create) {
1866         PetscCall((*create)(dm, &uis, &uaux, &usetup, &uctx));
1867         if (data->Neumann == PETSC_BOOL3_UNKNOWN) data->Neumann = PETSC_BOOL3_TRUE; /* set the value only if it was not already provided by the user */
1868       }
1869     }
1870     if (!create) {
1871       if (!uis) {
1872         PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Neumann_IS", (PetscObject *)&uis));
1873         PetscCall(PetscObjectReference((PetscObject)uis));
1874       }
1875       if (!uaux) {
1876         PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Neumann_Mat", (PetscObject *)&uaux));
1877         PetscCall(PetscObjectReference((PetscObject)uaux));
1878       }
1879       /* look inside the Pmat instead of the PC, needed for MatSchurComplementComputeExplicitOperator() */
1880       if (!uis) {
1881         PetscCall(PetscObjectQuery((PetscObject)P, "_PCHPDDM_Neumann_IS", (PetscObject *)&uis));
1882         PetscCall(PetscObjectReference((PetscObject)uis));
1883       }
1884       if (!uaux) {
1885         PetscCall(PetscObjectQuery((PetscObject)P, "_PCHPDDM_Neumann_Mat", (PetscObject *)&uaux));
1886         PetscCall(PetscObjectReference((PetscObject)uaux));
1887       }
1888     }
1889     PetscCall(PCHPDDMSetAuxiliaryMat(pc, uis, uaux, usetup, uctx));
1890     PetscCall(MatDestroy(&uaux));
1891     PetscCall(ISDestroy(&uis));
1892   }
1893 
1894   if (!ismatis) {
1895     PetscCall(PCHPDDMSetUpNeumannOverlap_Private(pc));
1896     PetscCall(PetscOptionsGetBool(((PetscObject)pc)->options, pcpre, "-pc_hpddm_block_splitting", &block, nullptr));
1897     PetscCall(PetscOptionsGetInt(((PetscObject)pc)->options, pcpre, "-pc_hpddm_harmonic_overlap", &overlap, nullptr));
1898     PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
1899     if (data->is || flg) {
1900       if (block || overlap != -1) {
1901         PetscCall(ISDestroy(&data->is));
1902         PetscCall(MatDestroy(&data->aux));
1903       } else if (flg) {
1904         PCHPDDMSchurPreType type = PC_HPDDM_SCHUR_PRE_GENEO;
1905 
1906         PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, pcpre, "-pc_hpddm_schur_precondition", PCHPDDMSchurPreTypes, (PetscEnum *)&type, &flg));
1907         if (type == PC_HPDDM_SCHUR_PRE_LEAST_SQUARES) {
1908           PetscCall(ISDestroy(&data->is)); /* destroy any previously user-set objects since they will be set automatically */
1909           PetscCall(MatDestroy(&data->aux));
1910         } else if (type == PC_HPDDM_SCHUR_PRE_GENEO) {
1911           PetscContainer container = nullptr;
1912 
1913           PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Schur", (PetscObject *)&container));
1914           if (!container) { /* first call to PCSetUp() on the PC associated to the Schur complement */
1915             PC_HPDDM       *data_00;
1916             KSP             ksp, inner_ksp;
1917             PC              pc_00;
1918             Mat             A11 = nullptr;
1919             Vec             d   = nullptr;
1920             PetscReal       norm;
1921             const PetscInt *ranges;
1922             PetscMPIInt     size;
1923             char           *prefix;
1924 
1925             PetscCall(MatSchurComplementGetKSP(P, &ksp));
1926             PetscCall(KSPGetPC(ksp, &pc_00));
1927             PetscCall(PetscObjectTypeCompare((PetscObject)pc_00, PCHPDDM, &flg));
1928             PetscCheck(flg, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and -%spc_type %s (!= %s)", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], ((PetscObject)pc_00)->prefix ? ((PetscObject)pc_00)->prefix : "",
1929                        ((PetscObject)pc_00)->type_name, PCHPDDM);
1930             data_00 = (PC_HPDDM *)pc_00->data;
1931             PetscCheck(data_00->N == 2, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and %" PetscInt_FMT " level%s instead of 2 for the A00 block -%s", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type],
1932                        data_00->N, data_00->N > 1 ? "s" : "", ((PetscObject)pc_00)->prefix);
1933             PetscCheck(data_00->levels[0]->pc, PetscObjectComm((PetscObject)P), PETSC_ERR_ORDER, "PC of the first block%s not setup yet", ((PetscObject)pc_00)->prefix ? std::string(std::string(" (") + ((PetscObject)pc_00)->prefix + std::string(")")).c_str() : "");
1934             PetscCall(PetscObjectTypeCompare((PetscObject)data_00->levels[0]->pc, PCASM, &flg));
1935             PetscCheck(flg, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and -%spc_type %s (!= %s)", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], ((PetscObject)data_00->levels[0]->pc)->prefix,
1936                        ((PetscObject)data_00->levels[0]->pc)->type_name, PCASM);
1937             PetscCall(PetscNew(&ctx)); /* context to pass data around for the inner-most PC, which will be a proper PCHPDDM (or a dummy variable if the Schur complement is centralized on a single process)  */
1938             PetscCall(MatSchurComplementGetSubMatrices(P, nullptr, nullptr, nullptr, nullptr, &A11));
1939             PetscCall(MatGetOwnershipRanges(A11, &ranges));
1940             PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A11), &size));
1941             flg = PetscBool(std::find_if(ranges, ranges + size + 1, [&](PetscInt v) { return v != ranges[0] && v != ranges[size]; }) == ranges + size + 1); /* are all local matrices but one of dimension 0 (centralized Schur complement)? */
1942             if (!flg) {
1943               if (PetscDefined(USE_DEBUG) || !data->is) {
1944                 Mat A01, A10, B = nullptr, C = nullptr, *sub;
1945 
1946                 PetscCall(MatSchurComplementGetSubMatrices(P, &A, nullptr, &A01, &A10, nullptr));
1947                 PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
1948                 if (flg) {
1949                   PetscCall(MatTransposeGetMat(A10, &C));
1950                   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &B));
1951                 } else {
1952                   PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1953                   if (flg) {
1954                     PetscCall(MatHermitianTransposeGetMat(A10, &C));
1955                     PetscCall(MatHermitianTranspose(C, MAT_INITIAL_MATRIX, &B));
1956                   }
1957                 }
1958                 if (flg)
1959                   PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1960                 if (!B) {
1961                   B = A10;
1962                   PetscCall(PetscObjectReference((PetscObject)B));
1963                 } else if (!data->is) {
1964                   PetscCall(PetscObjectTypeCompareAny((PetscObject)A01, &flg, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
1965                   if (!flg) C = A01;
1966                   else
1967                     PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1968                 }
1969                 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->rmap->N, 0, 1, &uis));
1970                 PetscCall(ISSetIdentity(uis));
1971                 if (!data->is) {
1972                   if (C) PetscCall(PetscObjectReference((PetscObject)C));
1973                   else PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &C));
1974                   PetscCall(ISDuplicate(data_00->is, is));
1975                   PetscCall(MatIncreaseOverlap(A, 1, is, 1));
1976                   PetscCall(MatSetOption(C, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
1977                   PetscCall(MatCreateSubMatrices(C, 1, is, &uis, MAT_INITIAL_MATRIX, &sub));
1978                   PetscCall(MatDestroy(&C));
1979                   PetscCall(MatTranspose(sub[0], MAT_INITIAL_MATRIX, &C));
1980                   PetscCall(MatDestroySubMatrices(1, &sub));
1981                   PetscCall(MatFindNonzeroRows(C, &data->is));
1982                   PetscCheck(data->is, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "No empty row, which likely means that some rows of A_10 are dense");
1983                   PetscCall(MatDestroy(&C));
1984                   PetscCall(ISDestroy(is));
1985                   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data->is), A11->rmap->n, A11->rmap->rstart, 1, &loc));
1986                   if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckInclusion_Private(pc, data->is, loc, PETSC_FALSE));
1987                   PetscCall(ISExpand(data->is, loc, is));
1988                   PetscCall(ISDestroy(&loc));
1989                   PetscCall(ISDestroy(&data->is));
1990                   data->is = is[0];
1991                   is[0]    = nullptr;
1992                 }
1993                 if (PetscDefined(USE_DEBUG)) {
1994                   PetscCall(PCHPDDMCheckSymmetry_Private(pc, A01, A10));
1995                   PetscCall(MatCreateSubMatrices(B, 1, &uis, &data_00->is, MAT_INITIAL_MATRIX, &sub)); /* expensive check since all processes fetch all rows (but only some columns) of the constraint matrix */
1996                   PetscCall(ISDestroy(&uis));
1997                   PetscCall(ISDuplicate(data->is, &uis));
1998                   PetscCall(ISSort(uis));
1999                   PetscCall(ISComplement(uis, 0, B->rmap->N, is));
2000                   PetscCall(MatDuplicate(sub[0], MAT_COPY_VALUES, &C));
2001                   PetscCall(MatZeroRowsIS(C, is[0], 0.0, nullptr, nullptr));
2002                   PetscCall(ISDestroy(is));
2003                   PetscCall(MatMultEqual(sub[0], C, 20, &flg));
2004                   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "The image of A_10 (R_i^p)^T from the local primal (e.g., velocity) space to the full dual (e.g., pressure) space is not restricted to the local dual space: A_10 (R_i^p)^T != R_i^d (R_i^d)^T A_10 (R_i^p)^T"); /* cf. eq. (9) of https://hal.science/hal-02343808v6/document */
2005                   PetscCall(MatDestroy(&C));
2006                   PetscCall(MatDestroySubMatrices(1, &sub));
2007                 }
2008                 PetscCall(ISDestroy(&uis));
2009                 PetscCall(MatDestroy(&B));
2010               }
2011               flg = PETSC_FALSE;
2012               if (!data->aux) {
2013                 Mat D;
2014 
2015                 PetscCall(MatCreateVecs(A11, &d, nullptr));
2016                 PetscCall(MatGetDiagonal(A11, d));
2017                 PetscCall(PetscObjectTypeCompareAny((PetscObject)A11, &flg, MATDIAGONAL, MATCONSTANTDIAGONAL, ""));
2018                 if (!flg) {
2019                   PetscCall(MatCreateDiagonal(d, &D));
2020                   PetscCall(MatMultEqual(A11, D, 20, &flg));
2021                   PetscCall(MatDestroy(&D));
2022                 }
2023                 if (flg) PetscCall(PetscInfo(pc, "A11 block is likely diagonal so the PC will build an auxiliary Mat (which was not initially provided by the user)\n"));
2024               }
2025               if ((PetscDefined(USE_DEBUG) || (data->Neumann != PETSC_BOOL3_TRUE && !flg)) && A11) {
2026                 PetscCall(MatNorm(A11, NORM_INFINITY, &norm));
2027                 if (data->Neumann != PETSC_BOOL3_TRUE && !flg) {
2028                   PetscCheck(norm < PETSC_MACHINE_EPSILON * static_cast<PetscReal>(10.0), PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_has_neumann != true with a nonzero or non-diagonal A11 block", pcpre ? pcpre : "", pcpre ? pcpre : "");
2029                   PetscCall(PetscInfo(pc, "A11 block is likely zero so the PC will build an auxiliary Mat (which was%s initially provided by the user)\n", data->aux ? "" : " not"));
2030                   PetscCall(MatDestroy(&data->aux));
2031                   flg = PETSC_TRUE;
2032                 }
2033               }
2034               if (!data->aux) { /* if A11 is near zero, e.g., Stokes equation, or diagonal, build an auxiliary (Neumann) Mat which is a (possibly slightly shifted) diagonal weighted by the inverse of the multiplicity */
2035                 PetscSF            scatter;
2036                 const PetscScalar *read;
2037                 PetscScalar       *write, *diagonal = nullptr;
2038 
2039                 PetscCall(MatDestroy(&data->aux));
2040                 PetscCall(ISGetLocalSize(data->is, &n));
2041                 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)P), n, PETSC_DECIDE, &xin));
2042                 PetscCall(VecDuplicate(xin, &v));
2043                 PetscCall(VecScatterCreate(xin, data->is, v, nullptr, &scatter));
2044                 PetscCall(VecSet(v, 1.0));
2045                 PetscCall(VecSet(xin, 1.0));
2046                 PetscCall(VecScatterBegin(scatter, v, xin, ADD_VALUES, SCATTER_REVERSE));
2047                 PetscCall(VecScatterEnd(scatter, v, xin, ADD_VALUES, SCATTER_REVERSE)); /* v has the multiplicity of all unknowns on the overlap */
2048                 PetscCall(PetscSFDestroy(&scatter));
2049                 if (d) {
2050                   PetscCall(VecScatterCreate(d, data->is, v, nullptr, &scatter));
2051                   PetscCall(VecScatterBegin(scatter, d, v, INSERT_VALUES, SCATTER_FORWARD));
2052                   PetscCall(VecScatterEnd(scatter, d, v, INSERT_VALUES, SCATTER_FORWARD));
2053                   PetscCall(PetscSFDestroy(&scatter));
2054                   PetscCall(VecDestroy(&d));
2055                   PetscCall(PetscMalloc1(n, &diagonal));
2056                   PetscCall(VecGetArrayRead(v, &read));
2057                   PetscCallCXX(std::copy_n(read, n, diagonal));
2058                   PetscCall(VecRestoreArrayRead(v, &read));
2059                 }
2060                 PetscCall(VecDestroy(&v));
2061                 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &v));
2062                 PetscCall(VecGetArrayRead(xin, &read));
2063                 PetscCall(VecGetArrayWrite(v, &write));
2064                 for (PetscInt i = 0; i < n; ++i) write[i] = (!diagonal || std::abs(diagonal[i]) < PETSC_MACHINE_EPSILON) ? PETSC_SMALL / (static_cast<PetscReal>(1000.0) * read[i]) : diagonal[i] / read[i];
2065                 PetscCall(PetscFree(diagonal));
2066                 PetscCall(VecRestoreArrayRead(xin, &read));
2067                 PetscCall(VecRestoreArrayWrite(v, &write));
2068                 PetscCall(VecDestroy(&xin));
2069                 PetscCall(MatCreateDiagonal(v, &data->aux));
2070                 PetscCall(VecDestroy(&v));
2071               }
2072               uis  = data->is;
2073               uaux = data->aux;
2074               PetscCall(PetscObjectReference((PetscObject)uis));
2075               PetscCall(PetscObjectReference((PetscObject)uaux));
2076               PetscCall(PetscStrallocpy(pcpre, &prefix));
2077               PetscCall(PCSetOptionsPrefix(pc, nullptr));
2078               PetscCall(PCSetType(pc, PCKSP));                                    /* replace the PC associated to the Schur complement by PCKSP */
2079               PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &inner_ksp)); /* new KSP that will be attached to the previously set PC */
2080               PetscCall(PetscObjectGetTabLevel((PetscObject)pc, &n));
2081               PetscCall(PetscObjectSetTabLevel((PetscObject)inner_ksp, n + 2));
2082               PetscCall(KSPSetOperators(inner_ksp, pc->mat, pc->pmat));
2083               PetscCall(KSPSetOptionsPrefix(inner_ksp, std::string(std::string(prefix) + "pc_hpddm_").c_str()));
2084               PetscCall(KSPSetSkipPCSetFromOptions(inner_ksp, PETSC_TRUE));
2085               PetscCall(KSPSetFromOptions(inner_ksp));
2086               PetscCall(KSPGetPC(inner_ksp, &inner));
2087               PetscCall(PCSetOptionsPrefix(inner, nullptr));
2088               PetscCall(PCSetType(inner, PCNONE)); /* no preconditioner since the action of M^-1 A or A M^-1 will be computed by the Amat */
2089               PetscCall(PCKSPSetKSP(pc, inner_ksp));
2090               std::get<0>(*ctx)[0] = pc_00; /* for coarse correction on the primal (e.g., velocity) space */
2091               PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &std::get<0>(*ctx)[1]));
2092               PetscCall(PCSetOptionsPrefix(pc, prefix)); /* both PC share the same prefix so that the outer PC can be reset with PCSetFromOptions() */
2093               PetscCall(PCSetOptionsPrefix(std::get<0>(*ctx)[1], prefix));
2094               PetscCall(PetscFree(prefix));
2095               PetscCall(PCSetOperators(std::get<0>(*ctx)[1], pc->mat, pc->pmat));
2096               PetscCall(PCSetType(std::get<0>(*ctx)[1], PCHPDDM));
2097               PetscCall(PCHPDDMSetAuxiliaryMat(std::get<0>(*ctx)[1], uis, uaux, nullptr, nullptr)); /* transfer ownership of the auxiliary inputs from the inner (PCKSP) to the inner-most (PCHPDDM) PC */
2098               if (flg) static_cast<PC_HPDDM *>(std::get<0>(*ctx)[1]->data)->Neumann = PETSC_BOOL3_TRUE;
2099               else if (PetscDefined(USE_DEBUG) && norm > PETSC_MACHINE_EPSILON * static_cast<PetscReal>(10.0)) {
2100                 /* no check when A11 is near zero */
2101                 PetscCall(MatCreateSubMatrices(A11, 1, &uis, &uis, MAT_INITIAL_MATRIX, &sub));
2102                 PetscCall(PCHPDDMCheckMatStructure_Private(pc, sub[0], uaux));
2103                 PetscCall(MatDestroySubMatrices(1, &sub));
2104               }
2105               PetscCall(PCSetFromOptions(std::get<0>(*ctx)[1]));
2106               PetscCall(PetscObjectDereference((PetscObject)uis));
2107               PetscCall(PetscObjectDereference((PetscObject)uaux));
2108               PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc), inner->mat->rmap->n, inner->mat->cmap->n, inner->mat->rmap->N, inner->mat->cmap->N, ctx, &S)); /* MatShell computing the action of M^-1 A or A M^-1 */
2109               PetscCall(MatShellSetOperation(S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_SchurCorrection));
2110               PetscCall(MatShellSetOperation(S, MATOP_VIEW, (PetscErrorCodeFn *)MatView_SchurCorrection));
2111               PetscCall(MatShellSetOperation(S, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_SchurCorrection));
2112               PetscCall(KSPGetPCSide(inner_ksp, &(std::get<2>(*ctx))));
2113               if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {
2114                 PetscCall(KSPSetPreSolve(inner_ksp, KSPPreSolve_SchurCorrection, ctx));
2115               } else { /* no support for PC_SYMMETRIC */
2116                 PetscCheck(std::get<2>(*ctx) == PC_RIGHT, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "PCSide %s (!= %s or %s or %s)", PCSides[std::get<2>(*ctx)], PCSides[PC_SIDE_DEFAULT], PCSides[PC_LEFT], PCSides[PC_RIGHT]);
2117               }
2118               PetscCall(KSPSetPostSolve(inner_ksp, KSPPostSolve_SchurCorrection, ctx));
2119               PetscCall(PetscObjectContainerCompose((PetscObject)std::get<0>(*ctx)[1], "_PCHPDDM_Schur", ctx, nullptr));
2120               PetscCall(PCSetUp(std::get<0>(*ctx)[1]));
2121               PetscCall(KSPSetOperators(inner_ksp, S, S));
2122               PetscCall(MatCreateVecs(std::get<1>(*ctx)[0], std::get<3>(*ctx), std::get<3>(*ctx) + 1));
2123               PetscCall(VecDuplicate(std::get<3>(*ctx)[0], std::get<3>(*ctx) + 2));
2124               PetscCall(PetscObjectDereference((PetscObject)inner_ksp));
2125               PetscCall(PetscObjectDereference((PetscObject)S));
2126             } else {
2127               std::get<0>(*ctx)[0] = pc_00;
2128               PetscCall(PetscObjectContainerCompose((PetscObject)pc, "_PCHPDDM_Schur", ctx, nullptr));
2129               PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data_00->is), A11->rmap->n, A11->rmap->rstart, 1, &data->is)); /* dummy variables in the case of a centralized Schur complement */
2130               PetscCall(MatGetDiagonalBlock(A11, &data->aux));
2131               PetscCall(PetscObjectReference((PetscObject)data->aux));
2132               PetscCall(PCSetUp(pc));
2133             }
2134             for (std::vector<Vec>::iterator it = initial.begin(); it != initial.end(); ++it) PetscCall(VecDestroy(&*it));
2135             PetscFunctionReturn(PETSC_SUCCESS);
2136           } else { /* second call to PCSetUp() on the PC associated to the Schur complement, retrieve previously set context */
2137             PetscCall(PetscContainerGetPointer(container, &ctx));
2138           }
2139         }
2140       }
2141     }
2142     if (!data->is && data->N > 1) {
2143       char type[256] = {}; /* same size as in src/ksp/pc/interface/pcset.c */
2144 
2145       PetscCall(PetscObjectTypeCompareAny((PetscObject)P, &flg, MATNORMAL, MATNORMALHERMITIAN, ""));
2146       if (flg || (A->rmap->N != A->cmap->N && P->rmap->N == P->cmap->N && P->rmap->N == A->cmap->N)) {
2147         Mat B;
2148 
2149         PetscCall(PCHPDDMSetAuxiliaryMatNormal_Private(pc, A, P, &B, pcpre));
2150         if (data->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) data->correction = PC_HPDDM_COARSE_CORRECTION_BALANCED;
2151         PetscCall(MatDestroy(&B));
2152       } else {
2153         PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
2154         if (flg) {
2155           Mat                 A00, P00, A01, A10, A11, B, N;
2156           PCHPDDMSchurPreType type = PC_HPDDM_SCHUR_PRE_LEAST_SQUARES;
2157 
2158           PetscCall(MatSchurComplementGetSubMatrices(P, &A00, &P00, &A01, &A10, &A11));
2159           PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, pcpre, "-pc_hpddm_schur_precondition", PCHPDDMSchurPreTypes, (PetscEnum *)&type, &flg));
2160           if (type == PC_HPDDM_SCHUR_PRE_LEAST_SQUARES) {
2161             Mat                        B01;
2162             Vec                        diagonal = nullptr;
2163             const PetscScalar         *array;
2164             MatSchurComplementAinvType type;
2165 
2166             PetscCall(PCHPDDMCheckSymmetry_Private(pc, A01, A10, &B01));
2167             if (A11) {
2168               PetscCall(MatCreateVecs(A11, &diagonal, nullptr));
2169               PetscCall(MatGetDiagonal(A11, diagonal));
2170             }
2171             PetscCall(MatCreateVecs(P00, &v, nullptr));
2172             PetscCall(MatSchurComplementGetAinvType(P, &type));
2173             PetscCheck(type == MAT_SCHUR_COMPLEMENT_AINV_DIAG || type == MAT_SCHUR_COMPLEMENT_AINV_LUMP || type == MAT_SCHUR_COMPLEMENT_AINV_BLOCK_DIAG, PetscObjectComm((PetscObject)P), PETSC_ERR_SUP, "-%smat_schur_complement_ainv_type %s",
2174                        ((PetscObject)P)->prefix ? ((PetscObject)P)->prefix : "", MatSchurComplementAinvTypes[type]);
2175             if (type != MAT_SCHUR_COMPLEMENT_AINV_BLOCK_DIAG) {
2176               if (type == MAT_SCHUR_COMPLEMENT_AINV_LUMP) {
2177                 PetscCall(MatGetRowSum(P00, v));
2178                 if (A00 == P00) PetscCall(PetscObjectReference((PetscObject)A00));
2179                 PetscCall(MatDestroy(&P00));
2180                 PetscCall(VecGetArrayRead(v, &array));
2181                 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A00), A00->rmap->n, A00->cmap->n, A00->rmap->N, A00->cmap->N, 1, nullptr, 0, nullptr, &P00));
2182                 PetscCall(MatSetOption(P00, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2183                 for (n = A00->rmap->rstart; n < A00->rmap->rend; ++n) PetscCall(MatSetValue(P00, n, n, array[n - A00->rmap->rstart], INSERT_VALUES));
2184                 PetscCall(MatAssemblyBegin(P00, MAT_FINAL_ASSEMBLY));
2185                 PetscCall(MatAssemblyEnd(P00, MAT_FINAL_ASSEMBLY));
2186                 PetscCall(VecRestoreArrayRead(v, &array));
2187                 PetscCall(MatSchurComplementUpdateSubMatrices(P, A00, P00, A01, A10, A11)); /* replace P00 by diag(sum of each row of P00) */
2188                 PetscCall(MatDestroy(&P00));
2189               } else PetscCall(MatGetDiagonal(P00, v));
2190               PetscCall(VecReciprocal(v)); /* inv(diag(P00))       */
2191               PetscCall(VecSqrtAbs(v));    /* sqrt(inv(diag(P00))) */
2192               PetscCall(MatDuplicate(A01, MAT_COPY_VALUES, &B));
2193               PetscCall(MatDiagonalScale(B, v, nullptr));
2194               if (B01) PetscCall(MatDiagonalScale(B01, v, nullptr));
2195             } else {
2196               Mat     D00;
2197               MatType type;
2198 
2199               PetscCall(MatCreate(PetscObjectComm((PetscObject)A00), &D00));
2200               PetscCall(MatSetType(D00, MATAIJ));
2201               PetscCall(MatSetOptionsPrefix(D00, ((PetscObject)A00)->prefix));
2202               PetscCall(MatAppendOptionsPrefix(D00, "block_diagonal_"));
2203               PetscCall(MatSetFromOptions(D00));                          /* for setting -mat_block_size dynamically */
2204               PetscCall(MatConvert(A00, MATAIJ, MAT_INITIAL_MATRIX, &B)); /* not all MatTypes have a MatInvertBlockDiagonal() implementation, plus one may want to use a different block size than the one of A00 */
2205               PetscCall(MatSetBlockSizesFromMats(B, D00, D00));
2206               PetscCall(MatInvertBlockDiagonalMat(B, D00));
2207               PetscCall(MatDestroy(&B));
2208               PetscCall(MatGetType(A01, &type));                            /* cache MatType */
2209               PetscCall(MatConvert(A01, MATAIJ, MAT_INPLACE_MATRIX, &A01)); /* MatProduct is not versatile enough to fallback gracefully if no implementation found, so MatConvert() */
2210               PetscCall(MatMatMult(D00, A01, MAT_INITIAL_MATRIX, PETSC_CURRENT, &B));
2211               PetscCall(MatDestroy(&D00));
2212               PetscCall(MatConvert(A01, type, MAT_INPLACE_MATRIX, &A01)); /* reset to previous MatType */
2213               PetscCall(MatConvert(B, type, MAT_INPLACE_MATRIX, &B));
2214               if (!B01) { /* symmetric case */
2215                 B01 = A01;
2216                 PetscCall(PetscObjectReference((PetscObject)B01));
2217               }
2218             }
2219             if (B01 && B01 != A01) PetscCall(MatSetBlockSizesFromMats(B01, A01, A01)); /* TODO: remove this line once Firedrake is fixed */
2220             PetscCall(VecDestroy(&v));
2221             PetscCall(MatCreateNormalHermitian(B, &N));
2222             PetscCall(PCHPDDMSetAuxiliaryMatNormal_Private(pc, B, N, &P, pcpre, &diagonal, B01));
2223             PetscCall(PetscObjectTypeCompare((PetscObject)data->aux, MATSEQAIJ, &flg));
2224             if (!flg) {
2225               PetscCall(MatDestroy(&P));
2226               P = N;
2227               PetscCall(PetscObjectReference((PetscObject)P));
2228             }
2229             if (diagonal) {
2230               PetscCall(MatSetOption(P, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); /* may have missing diagonal entries */
2231               PetscCall(MatDiagonalSet(P, diagonal, ADD_VALUES));
2232               PetscCall(PCSetOperators(pc, P, P)); /* replace P by A01^T inv(diag(P00)) A01 - diag(P11) */
2233               PetscCall(VecDestroy(&diagonal));
2234             } else PetscCall(PCSetOperators(pc, B01 ? P : N, P));  /* replace P by A01^T inv(diag(P00)) A01                         */
2235             pc->ops->postsolve = PCPostSolve_SchurPreLeastSquares; /*  PCFIELDSPLIT expect a KSP for (P11 - A10 inv(diag(P00)) A01) */
2236             PetscCall(MatDestroy(&N));                             /*  but a PC for (A10 inv(diag(P00)) A10 - P11) is setup instead */
2237             PetscCall(MatDestroy(&P));                             /*  so the sign of the solution must be flipped                  */
2238             PetscCall(MatDestroy(&B));
2239           } else
2240             PetscCheck(type != PC_HPDDM_SCHUR_PRE_GENEO, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s without a prior call to PCHPDDMSetAuxiliaryMat() on the A11 block%s%s", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], pcpre ? " -" : "", pcpre ? pcpre : "");
2241           for (std::vector<Vec>::iterator it = initial.begin(); it != initial.end(); ++it) PetscCall(VecDestroy(&*it));
2242           PetscFunctionReturn(PETSC_SUCCESS);
2243         } else {
2244           PetscCall(PetscOptionsGetString(((PetscObject)pc)->options, pcpre, "-pc_hpddm_levels_1_st_pc_type", type, sizeof(type), nullptr));
2245           PetscCall(PetscStrcmp(type, PCMAT, &algebraic));
2246           PetscCheck(!algebraic || !block, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_levels_1_st_pc_type mat and -pc_hpddm_block_splitting");
2247           if (overlap != -1) {
2248             PetscCheck(!block && !algebraic, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_%s and -pc_hpddm_harmonic_overlap", block ? "block_splitting" : "levels_1_st_pc_type mat");
2249             PetscCheck(overlap >= 1, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_WRONG, "-pc_hpddm_harmonic_overlap %" PetscInt_FMT " < 1", overlap);
2250           }
2251           if (block || overlap != -1) algebraic = PETSC_TRUE;
2252           if (algebraic) {
2253             PetscCall(ISCreateStride(PETSC_COMM_SELF, P->rmap->n, P->rmap->rstart, 1, &data->is));
2254             PetscCall(MatIncreaseOverlap(P, 1, &data->is, 1));
2255             PetscCall(ISSort(data->is));
2256           } else
2257             PetscCall(PetscInfo(pc, "Cannot assemble a fully-algebraic coarse operator with an assembled Pmat and -%spc_hpddm_levels_1_st_pc_type != mat and -%spc_hpddm_block_splitting != true and -%spc_hpddm_harmonic_overlap < 1\n", pcpre ? pcpre : "", pcpre ? pcpre : "", pcpre ? pcpre : ""));
2258         }
2259       }
2260     }
2261   }
2262 #if PetscDefined(USE_DEBUG)
2263   if (data->is) PetscCall(ISDuplicate(data->is, &dis));
2264   if (data->aux) PetscCall(MatDuplicate(data->aux, MAT_COPY_VALUES, &daux));
2265 #endif
2266   if (data->is || (ismatis && data->N > 1)) {
2267     if (ismatis) {
2268       std::initializer_list<std::string> list = {MATSEQBAIJ, MATSEQSBAIJ};
2269       PetscCall(MatISGetLocalMat(P, &N));
2270       std::initializer_list<std::string>::const_iterator it = std::find(list.begin(), list.end(), ((PetscObject)N)->type_name);
2271       PetscCall(MatISRestoreLocalMat(P, &N));
2272       switch (std::distance(list.begin(), it)) {
2273       case 0:
2274         PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &C));
2275         break;
2276       case 1:
2277         /* MatCreateSubMatrices() does not work with MATSBAIJ and unsorted ISes, so convert to MPIBAIJ */
2278         PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &C));
2279         break;
2280       default:
2281         PetscCall(MatConvert(P, MATMPIAIJ, MAT_INITIAL_MATRIX, &C));
2282       }
2283       PetscCall(MatISGetLocalToGlobalMapping(P, &l2g, nullptr));
2284       PetscCall(PetscObjectReference((PetscObject)P));
2285       PetscCall(KSPSetOperators(data->levels[0]->ksp, A, C));
2286       std::swap(C, P);
2287       PetscCall(ISLocalToGlobalMappingGetSize(l2g, &n));
2288       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &loc));
2289       PetscCall(ISLocalToGlobalMappingApplyIS(l2g, loc, &is[0]));
2290       PetscCall(ISDestroy(&loc));
2291       /* the auxiliary Mat is _not_ the local Neumann matrix                                */
2292       /* it is the local Neumann matrix augmented (with zeros) through MatIncreaseOverlap() */
2293       data->Neumann = PETSC_BOOL3_FALSE;
2294       structure     = SAME_NONZERO_PATTERN;
2295     } else {
2296       is[0] = data->is;
2297       if (algebraic || ctx) subdomains = PETSC_TRUE;
2298       PetscCall(PetscOptionsGetBool(((PetscObject)pc)->options, pcpre, "-pc_hpddm_define_subdomains", &subdomains, nullptr));
2299       if (ctx) PetscCheck(subdomains, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_define_subdomains false", pcpre, pcpre);
2300       if (PetscBool3ToBool(data->Neumann)) {
2301         PetscCheck(!block, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_block_splitting and -pc_hpddm_has_neumann");
2302         PetscCheck(overlap == -1, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_harmonic_overlap %" PetscInt_FMT " and -pc_hpddm_has_neumann", overlap);
2303         PetscCheck(!algebraic, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_levels_1_st_pc_type mat and -pc_hpddm_has_neumann");
2304       }
2305       if (PetscBool3ToBool(data->Neumann) || block) structure = SAME_NONZERO_PATTERN;
2306       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data->is), P->rmap->n, P->rmap->rstart, 1, &loc));
2307     }
2308     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2309     PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, prefix, "-st_matstructure", MatStructures, (PetscEnum *)&structure, &flg)); /* if not user-provided, force its value when possible */
2310     if (!flg && structure == SAME_NONZERO_PATTERN) { /* cannot call STSetMatStructure() yet, insert the appropriate option in the database, parsed by STSetFromOptions() */
2311       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-%spc_hpddm_levels_1_st_matstructure", pcpre ? pcpre : ""));
2312       PetscCall(PetscOptionsSetValue(((PetscObject)pc)->options, prefix, MatStructures[structure]));
2313     }
2314     flg = PETSC_FALSE;
2315     if (data->share) {
2316       data->share = PETSC_FALSE; /* will be reset to PETSC_TRUE if none of the conditions below are true */
2317       if (!subdomains) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since -%spc_hpddm_define_subdomains is not true\n", pcpre ? pcpre : ""));
2318       else if (data->deflation) PetscCall(PetscInfo(pc, "Nothing to share since PCHPDDMSetDeflationMat() has been called\n"));
2319       else if (ismatis) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc with a Pmat of type MATIS\n"));
2320       else if (!algebraic && structure != SAME_NONZERO_PATTERN)
2321         PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since -%spc_hpddm_levels_1_st_matstructure %s (!= %s)\n", pcpre ? pcpre : "", MatStructures[structure], MatStructures[SAME_NONZERO_PATTERN]));
2322       else data->share = PETSC_TRUE;
2323     }
2324     if (!ismatis) {
2325       if (data->share || (!PetscBool3ToBool(data->Neumann) && subdomains)) PetscCall(ISDuplicate(is[0], &unsorted));
2326       else unsorted = is[0];
2327     }
2328     if ((ctx || data->N > 1) && (data->aux || ismatis || algebraic)) {
2329       PetscCheck(loadedSym, PETSC_COMM_SELF, PETSC_ERR_PLIB, "HPDDM library not loaded, cannot use more than one level");
2330       PetscCall(MatSetOption(P, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2331       if (ismatis) {
2332         /* needed by HPDDM (currently) so that the partition of unity is 0 on subdomain interfaces */
2333         PetscCall(MatIncreaseOverlap(P, 1, is, 1));
2334         PetscCall(ISDestroy(&data->is));
2335         data->is = is[0];
2336       } else {
2337         if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckInclusion_Private(pc, data->is, loc, PETSC_TRUE));
2338         if (!ctx && overlap == -1) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", PCHPDDMAlgebraicAuxiliaryMat_Private));
2339         if (!PetscBool3ToBool(data->Neumann) && (!algebraic || overlap != -1)) {
2340           PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg));
2341           if (flg) {
2342             /* maybe better to ISSort(is[0]), MatCreateSubMatrices(), and then MatPermute() */
2343             /* but there is no MatPermute_SeqSBAIJ(), so as before, just use MATMPIBAIJ     */
2344             PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &uaux));
2345             flg = PETSC_FALSE;
2346           }
2347         }
2348       }
2349       if (algebraic && overlap == -1) {
2350         PetscUseMethod(pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", (Mat, IS *, Mat *[], PetscBool), (P, is, &sub, block));
2351         if (block) {
2352           PetscCall(PetscObjectQuery((PetscObject)sub[0], "_PCHPDDM_Neumann_Mat", (PetscObject *)&data->aux));
2353           PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Neumann_Mat", nullptr));
2354         }
2355       } else if (!uaux || overlap != -1) {
2356         if (!ctx) {
2357           if (PetscBool3ToBool(data->Neumann)) sub = &data->aux;
2358           else {
2359             PetscBool flg;
2360             if (overlap != -1) {
2361               Harmonic              h;
2362               Mat                   A0, *a;                    /* with an SVD: [ A_00  A_01       ] */
2363               IS                    ov[2], rows, cols, stride; /*              [ A_10  A_11  A_12 ] */
2364               const PetscInt       *i[2], bs = P->cmap->bs;    /* with a GEVP: [ A_00  A_01       ] */
2365               PetscInt              n[2], location;            /*              [ A_10  A_11  A_12 ] */
2366               std::vector<PetscInt> v[2];                      /*              [       A_21  A_22 ] */
2367 
2368               do {
2369                 PetscCall(ISDuplicate(data->is, ov));
2370                 if (overlap > 1) PetscCall(MatIncreaseOverlap(P, 1, ov, overlap - 1));
2371                 PetscCall(ISDuplicate(ov[0], ov + 1));
2372                 PetscCall(MatIncreaseOverlap(P, 1, ov + 1, 1));
2373                 PetscCall(ISGetLocalSize(ov[0], n));
2374                 PetscCall(ISGetLocalSize(ov[1], n + 1));
2375                 flg = PetscBool(n[0] == n[1] && n[0] != P->rmap->n);
2376                 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
2377                 if (flg) {
2378                   PetscCall(ISDestroy(ov));
2379                   PetscCall(ISDestroy(ov + 1));
2380                   PetscCheck(--overlap, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "No oversampling possible");
2381                   PetscCall(PetscInfo(pc, "Supplied -%spc_hpddm_harmonic_overlap parameter is too large, it has been decreased to %" PetscInt_FMT "\n", pcpre ? pcpre : "", overlap));
2382                 } else break;
2383               } while (1);
2384               PetscCall(PetscNew(&h));
2385               h->ksp = nullptr;
2386               PetscCall(PetscCalloc1(2, &h->A));
2387               PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-eps_nev", &flg));
2388               if (!flg) {
2389                 PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-svd_nsv", &flg));
2390                 if (!flg) PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-svd_threshold_relative", &flg));
2391               } else flg = PETSC_FALSE;
2392               PetscCall(ISSort(ov[0]));
2393               if (!flg) PetscCall(ISSort(ov[1]));
2394               PetscCall(PetscCalloc1(5, &h->is));
2395               PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, ov + !flg, ov + 1, MAT_INITIAL_MATRIX, &a)); /* submatrix from above, either square (!flg) or rectangular (flg) */
2396               for (PetscInt j = 0; j < 2; ++j) PetscCall(ISGetIndices(ov[j], i + j));
2397               v[1].reserve((n[1] - n[0]) / bs);
2398               for (PetscInt j = 0; j < n[1]; j += bs) { /* indices of the (2,2) block */
2399                 PetscCall(ISLocate(ov[0], i[1][j], &location));
2400                 if (location < 0) v[1].emplace_back(j / bs);
2401               }
2402               if (!flg) {
2403                 h->A[1] = a[0];
2404                 PetscCall(PetscObjectReference((PetscObject)h->A[1]));
2405                 v[0].reserve((n[0] - P->rmap->n) / bs);
2406                 for (PetscInt j = 0; j < n[1]; j += bs) { /* row indices of the (1,2) block */
2407                   PetscCall(ISLocate(loc, i[1][j], &location));
2408                   if (location < 0) {
2409                     PetscCall(ISLocate(ov[0], i[1][j], &location));
2410                     if (location >= 0) v[0].emplace_back(j / bs);
2411                   }
2412                 }
2413                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[0].size(), v[0].data(), PETSC_USE_POINTER, &rows));
2414                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[1].size(), v[1].data(), PETSC_COPY_VALUES, h->is + 4));
2415                 PetscCall(MatCreateSubMatrix(a[0], rows, h->is[4], MAT_INITIAL_MATRIX, h->A)); /* A_12 submatrix from above */
2416                 PetscCall(ISDestroy(&rows));
2417                 if (uaux) PetscCall(MatConvert(a[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, a)); /* initial Pmat was MATSBAIJ, convert back to the same format since the rectangular A_12 submatrix has been created */
2418                 PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &rows));
2419                 PetscCall(MatCreateSubMatrix(a[0], rows, cols = rows, MAT_INITIAL_MATRIX, &A0)); /* [ A_00  A_01 ; A_10  A_11 ] submatrix from above */
2420                 PetscCall(ISDestroy(&rows));
2421                 v[0].clear();
2422                 PetscCall(ISEmbed(loc, ov[1], PETSC_TRUE, h->is + 3));
2423                 PetscCall(ISEmbed(data->is, ov[1], PETSC_TRUE, h->is + 2));
2424               }
2425               v[0].reserve((n[0] - P->rmap->n) / bs);
2426               for (PetscInt j = 0; j < n[0]; j += bs) {
2427                 PetscCall(ISLocate(loc, i[0][j], &location));
2428                 if (location < 0) v[0].emplace_back(j / bs);
2429               }
2430               PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[0].size(), v[0].data(), PETSC_USE_POINTER, &rows));
2431               for (PetscInt j = 0; j < 2; ++j) PetscCall(ISRestoreIndices(ov[j], i + j));
2432               if (flg) {
2433                 PetscCall(ISCreateStride(PETSC_COMM_SELF, a[0]->rmap->n, 0, 1, &stride));
2434                 PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &cols));
2435                 PetscCall(MatCreateSubMatrix(a[0], stride, cols, MAT_INITIAL_MATRIX, &A0)); /* [ A_00  A_01 ; A_10  A_11 ] submatrix from above */
2436                 PetscCall(ISDestroy(&cols));
2437                 PetscCall(ISDestroy(&stride));
2438                 if (uaux) { /* initial Pmat was MATSBAIJ, convert back to the same format since this submatrix is square */
2439                   PetscCall(MatSetOption(A0, MAT_SYMMETRIC, PETSC_TRUE));
2440                   PetscCall(MatConvert(A0, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A0));
2441                 }
2442                 PetscCall(ISEmbed(loc, data->is, PETSC_TRUE, h->is + 2));
2443                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[1].size(), v[1].data(), PETSC_USE_POINTER, &cols));
2444                 PetscCall(MatCreateSubMatrix(a[0], rows, cols, MAT_INITIAL_MATRIX, h->A)); /* A_12 submatrix from above */
2445                 PetscCall(ISDestroy(&cols));
2446               }
2447               PetscCall(ISCreateStride(PETSC_COMM_SELF, A0->rmap->n, 0, 1, &stride));
2448               PetscCall(ISEmbed(rows, stride, PETSC_TRUE, h->is));
2449               PetscCall(ISDestroy(&stride));
2450               PetscCall(ISDestroy(&rows));
2451               PetscCall(ISEmbed(loc, ov[0], PETSC_TRUE, h->is + 1));
2452               if (subdomains) {
2453                 if (!data->levels[0]->pc) {
2454                   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->pc));
2455                   PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2456                   PetscCall(PCSetOptionsPrefix(data->levels[0]->pc, prefix));
2457                   PetscCall(PCSetOperators(data->levels[0]->pc, A, P));
2458                 }
2459                 PetscCall(PCSetType(data->levels[0]->pc, PCASM));
2460                 if (!data->levels[0]->pc->setupcalled) PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, ov + !flg, &loc));
2461                 PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(data->levels[0]->pc, flg ? A0 : a[0], PETSC_TRUE));
2462                 if (!flg) ++overlap;
2463                 if (data->share) {
2464                   PetscInt n = -1;
2465                   PetscTryMethod(data->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data->levels[0]->pc, &n, nullptr, &ksp));
2466                   PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2467                   if (flg) {
2468                     h->ksp = ksp[0];
2469                     PetscCall(PetscObjectReference((PetscObject)h->ksp));
2470                   }
2471                 }
2472               }
2473               if (!h->ksp) {
2474                 PetscBool share = data->share;
2475 
2476                 PetscCall(KSPCreate(PETSC_COMM_SELF, &h->ksp));
2477                 PetscCall(KSPSetType(h->ksp, KSPPREONLY));
2478                 PetscCall(KSPSetOperators(h->ksp, A0, A0));
2479                 do {
2480                   if (!data->share) {
2481                     share = PETSC_FALSE;
2482                     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_%s", pcpre ? pcpre : "", flg ? "svd_" : "eps_"));
2483                     PetscCall(KSPSetOptionsPrefix(h->ksp, prefix));
2484                     PetscCall(KSPSetFromOptions(h->ksp));
2485                   } else {
2486                     MatSolverType type;
2487 
2488                     PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp[0]->pc, &data->share, PCLU, PCCHOLESKY, ""));
2489                     if (data->share) {
2490                       PetscCall(PCFactorGetMatSolverType(ksp[0]->pc, &type));
2491                       if (!type) {
2492                         if (PetscDefined(HAVE_MUMPS)) PetscCall(PCFactorSetMatSolverType(ksp[0]->pc, MATSOLVERMUMPS));
2493                         else if (PetscDefined(HAVE_MKL_PARDISO)) PetscCall(PCFactorSetMatSolverType(ksp[0]->pc, MATSOLVERMKL_PARDISO));
2494                         else data->share = PETSC_FALSE;
2495                         if (data->share) PetscCall(PCSetFromOptions(ksp[0]->pc));
2496                       } else {
2497                         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &data->share));
2498                         if (!data->share) PetscCall(PetscStrcmp(type, MATSOLVERMKL_PARDISO, &data->share));
2499                       }
2500                       if (data->share) {
2501                         std::tuple<KSP, IS, Vec[2]> *p;
2502 
2503                         PetscCall(PCFactorGetMatrix(ksp[0]->pc, &A));
2504                         PetscCall(MatFactorSetSchurIS(A, h->is[4]));
2505                         PetscCall(KSPSetUp(ksp[0]));
2506                         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_eps_shell_", pcpre ? pcpre : ""));
2507                         PetscCall(KSPSetOptionsPrefix(h->ksp, prefix));
2508                         PetscCall(KSPSetFromOptions(h->ksp));
2509                         PetscCall(PCSetType(h->ksp->pc, PCSHELL));
2510                         PetscCall(PetscNew(&p));
2511                         std::get<0>(*p) = ksp[0];
2512                         PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &std::get<1>(*p)));
2513                         PetscCall(MatCreateVecs(A, std::get<2>(*p), std::get<2>(*p) + 1));
2514                         PetscCall(PCShellSetContext(h->ksp->pc, p));
2515                         PetscCall(PCShellSetApply(h->ksp->pc, PCApply_Schur));
2516                         PetscCall(PCShellSetApplyTranspose(h->ksp->pc, PCApply_Schur<Vec, true>));
2517                         PetscCall(PCShellSetMatApply(h->ksp->pc, PCApply_Schur<Mat>));
2518                         PetscCall(PCShellSetDestroy(h->ksp->pc, PCDestroy_Schur));
2519                       }
2520                     }
2521                     if (!data->share) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since neither MUMPS nor MKL PARDISO is used\n"));
2522                   }
2523                 } while (!share != !data->share); /* if data->share is initially PETSC_TRUE, but then reset to PETSC_FALSE, then go back to the beginning of the do loop */
2524               }
2525               PetscCall(ISDestroy(ov));
2526               PetscCall(ISDestroy(ov + 1));
2527               if (overlap == 1 && subdomains && flg) {
2528                 *subA = A0;
2529                 sub   = subA;
2530                 if (uaux) PetscCall(MatDestroy(&uaux));
2531               } else PetscCall(MatDestroy(&A0));
2532               PetscCall(MatCreateShell(PETSC_COMM_SELF, P->rmap->n, n[1] - n[0], P->rmap->n, n[1] - n[0], h, &data->aux));
2533               PetscCall(KSPSetErrorIfNotConverged(h->ksp, PETSC_TRUE)); /* bail out as early as possible to avoid (apparently) unrelated error messages */
2534               PetscCall(MatCreateVecs(h->ksp->pc->pmat, &h->v, nullptr));
2535               PetscCall(MatShellSetOperation(data->aux, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Harmonic));
2536               PetscCall(MatShellSetOperation(data->aux, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Harmonic));
2537               PetscCall(MatShellSetMatProductOperation(data->aux, MATPRODUCT_AB, nullptr, MatProduct_AB_Harmonic, nullptr, MATDENSE, MATDENSE));
2538               PetscCall(MatShellSetMatProductOperation(data->aux, MATPRODUCT_AtB, nullptr, MatProduct_AtB_Harmonic, nullptr, MATDENSE, MATDENSE));
2539               PetscCall(MatShellSetOperation(data->aux, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Harmonic));
2540               PetscCall(MatDestroySubMatrices(1, &a));
2541             }
2542             if (overlap != 1 || !subdomains) {
2543               PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, is, is, MAT_INITIAL_MATRIX, &sub));
2544               if (ismatis) {
2545                 PetscCall(MatISGetLocalMat(C, &N));
2546                 PetscCall(PetscObjectTypeCompare((PetscObject)N, MATSEQSBAIJ, &flg));
2547                 if (flg) PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2548                 PetscCall(MatISRestoreLocalMat(C, &N));
2549               }
2550             }
2551             if (uaux) {
2552               PetscCall(MatDestroy(&uaux));
2553               PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2554             }
2555           }
2556         }
2557       } else if (!ctx) {
2558         PetscCall(MatCreateSubMatrices(uaux, 1, is, is, MAT_INITIAL_MATRIX, &sub));
2559         PetscCall(MatDestroy(&uaux));
2560         PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2561       }
2562       if (data->N > 1) {
2563         /* Vec holding the partition of unity */
2564         if (!data->levels[0]->D) {
2565           PetscCall(ISGetLocalSize(data->is, &n));
2566           PetscCall(VecCreateMPI(PETSC_COMM_SELF, n, PETSC_DETERMINE, &data->levels[0]->D));
2567         }
2568         if (data->share && overlap == -1) {
2569           Mat      D;
2570           IS       perm = nullptr;
2571           PetscInt size = -1;
2572 
2573           if (!data->levels[0]->pc) {
2574             PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2575             PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->pc));
2576             PetscCall(PCSetOptionsPrefix(data->levels[0]->pc, prefix));
2577             PetscCall(PCSetOperators(data->levels[0]->pc, A, P));
2578           }
2579           PetscCall(PCSetType(data->levels[0]->pc, PCASM));
2580           if (!ctx) {
2581             if (!data->levels[0]->pc->setupcalled) {
2582               IS sorted; /* PCASM will sort the input IS, duplicate it to return an unmodified (PCHPDDM) input IS */
2583               PetscCall(ISDuplicate(is[0], &sorted));
2584               PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, &sorted, &loc));
2585               PetscCall(PetscObjectDereference((PetscObject)sorted));
2586             }
2587             PetscCall(PCSetFromOptions(data->levels[0]->pc));
2588             if (block) {
2589               PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, sub[0], &C, &perm));
2590               PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(data->levels[0]->pc, C, algebraic));
2591             } else PetscCall(PCSetUp(data->levels[0]->pc));
2592             PetscTryMethod(data->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data->levels[0]->pc, &size, nullptr, &ksp));
2593             if (size != 1) {
2594               data->share = PETSC_FALSE;
2595               PetscCheck(size == -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", size);
2596               PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since PCASMGetSubKSP() not found in fine-level PC\n"));
2597               PetscCall(ISDestroy(&unsorted));
2598               unsorted = is[0];
2599             } else {
2600               const char *matpre;
2601               PetscBool   cmp[4];
2602 
2603               if (!block && !ctx) PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, PetscBool3ToBool(data->Neumann) ? sub[0] : data->aux, &C, &perm));
2604               if (perm) { /* unsorted input IS */
2605                 if (!PetscBool3ToBool(data->Neumann) && !block) {
2606                   PetscCall(MatPermute(sub[0], perm, perm, &D)); /* permute since PCASM will call ISSort() */
2607                   PetscCall(MatHeaderReplace(sub[0], &D));
2608                 }
2609                 if (data->B) { /* see PCHPDDMSetRHSMat() */
2610                   PetscCall(MatPermute(data->B, perm, perm, &D));
2611                   PetscCall(MatHeaderReplace(data->B, &D));
2612                 }
2613                 PetscCall(ISDestroy(&perm));
2614               }
2615               PetscCall(KSPGetOperators(ksp[0], subA, subA + 1));
2616               PetscCall(PetscObjectReference((PetscObject)subA[0]));
2617               PetscCall(MatDuplicate(subA[1], MAT_SHARE_NONZERO_PATTERN, &D));
2618               PetscCall(MatGetOptionsPrefix(subA[1], &matpre));
2619               PetscCall(MatSetOptionsPrefix(D, matpre));
2620               PetscCall(PetscObjectTypeCompare((PetscObject)D, MATNORMAL, cmp));
2621               PetscCall(PetscObjectTypeCompare((PetscObject)C, MATNORMAL, cmp + 1));
2622               if (!cmp[0]) PetscCall(PetscObjectTypeCompare((PetscObject)D, MATNORMALHERMITIAN, cmp + 2));
2623               else cmp[2] = PETSC_FALSE;
2624               if (!cmp[1]) PetscCall(PetscObjectTypeCompare((PetscObject)C, MATNORMALHERMITIAN, cmp + 3));
2625               else cmp[3] = PETSC_FALSE;
2626               PetscCheck(cmp[0] == cmp[1] && cmp[2] == cmp[3], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_levels_1_pc_asm_sub_mat_type %s and auxiliary Mat of type %s", pcpre ? pcpre : "", ((PetscObject)D)->type_name, ((PetscObject)C)->type_name);
2627               if (!cmp[0] && !cmp[2]) {
2628                 if (!block) {
2629                   if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckMatStructure_Private(pc, D, C));
2630                   PetscCall(MatAXPY(D, 1.0, C, SUBSET_NONZERO_PATTERN));
2631                 } else {
2632                   structure = DIFFERENT_NONZERO_PATTERN;
2633                   PetscCall(MatAXPY(D, 1.0, data->aux, structure));
2634                 }
2635               } else {
2636                 Mat mat[2];
2637 
2638                 if (cmp[0]) {
2639                   PetscCall(MatNormalGetMat(D, mat));
2640                   PetscCall(MatNormalGetMat(C, mat + 1));
2641                 } else {
2642                   PetscCall(MatNormalHermitianGetMat(D, mat));
2643                   PetscCall(MatNormalHermitianGetMat(C, mat + 1));
2644                 }
2645                 PetscCall(MatAXPY(mat[0], 1.0, mat[1], SUBSET_NONZERO_PATTERN));
2646               }
2647               PetscCall(MatPropagateSymmetryOptions(C, D));
2648               PetscCall(MatDestroy(&C));
2649               C = D;
2650               /* swap pointers so that variables stay consistent throughout PCSetUp() */
2651               std::swap(C, data->aux);
2652               std::swap(uis, data->is);
2653               swap = PETSC_TRUE;
2654             }
2655           }
2656         }
2657       }
2658       if (ctx) {
2659         PC_HPDDM              *data_00 = (PC_HPDDM *)std::get<0>(*ctx)[0]->data;
2660         PC                     s;
2661         Mat                    A00, P00, A01 = nullptr, A10, A11, N, b[4];
2662         IS                     sorted, is[2], *is_00;
2663         MatSolverType          type;
2664         std::pair<PC, Vec[2]> *p;
2665 
2666         n = -1;
2667         PetscTryMethod(data_00->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data_00->levels[0]->pc, &n, nullptr, &ksp));
2668         PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2669         PetscCall(KSPGetOperators(ksp[0], subA, subA + 1));
2670         PetscCall(ISGetLocalSize(data_00->is, &n));
2671         if (n != subA[0]->rmap->n || n != subA[0]->cmap->n) {
2672           PetscCall(PCASMGetLocalSubdomains(data_00->levels[0]->pc, &n, &is_00, nullptr));
2673           PetscCall(ISGetLocalSize(*is_00, &n));
2674           PetscCheck(n == subA[0]->rmap->n && n == subA[0]->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_define_subdomains false", pcpre ? pcpre : "", ((PetscObject)pc)->prefix);
2675         } else is_00 = &data_00->is;
2676         PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, data->aux, &C, nullptr)); /* permute since PCASM works with a sorted IS */
2677         std::swap(C, data->aux);
2678         std::swap(uis, data->is);
2679         swap = PETSC_TRUE;
2680         PetscCall(MatSchurComplementGetSubMatrices(P, &A00, &P00, std::get<1>(*ctx), &A10, &A11));
2681         std::get<1>(*ctx)[1] = A10;
2682         PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
2683         if (flg) PetscCall(MatTransposeGetMat(A10, &A01));
2684         else {
2685           PetscBool flg;
2686 
2687           PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
2688           if (flg) PetscCall(MatHermitianTransposeGetMat(A10, &A01));
2689         }
2690         PetscCall(ISDuplicate(*is_00, &sorted)); /* during setup of the PC associated to the A00 block, this IS has already been sorted, but it's put back to its original state at the end of PCSetUp_HPDDM(), which may be unsorted */
2691         PetscCall(ISSort(sorted));               /* this is to avoid changing users inputs, but it requires a new call to ISSort() here                                                                                               */
2692         if (!A01) {
2693           PetscCall(MatSetOption(A10, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2694           PetscCall(MatCreateSubMatrices(A10, 1, &data->is, &sorted, MAT_INITIAL_MATRIX, &sub));
2695           b[2] = sub[0];
2696           PetscCall(PetscObjectReference((PetscObject)sub[0]));
2697           PetscCall(MatDestroySubMatrices(1, &sub));
2698           PetscCall(PetscObjectTypeCompare((PetscObject)std::get<1>(*ctx)[0], MATTRANSPOSEVIRTUAL, &flg));
2699           A10 = nullptr;
2700           if (flg) PetscCall(MatTransposeGetMat(std::get<1>(*ctx)[0], &A10));
2701           else {
2702             PetscBool flg;
2703 
2704             PetscCall(PetscObjectTypeCompare((PetscObject)std::get<1>(*ctx)[0], MATHERMITIANTRANSPOSEVIRTUAL, &flg));
2705             if (flg) PetscCall(MatHermitianTransposeGetMat(std::get<1>(*ctx)[0], &A10));
2706           }
2707           if (!A10) PetscCall(MatCreateSubMatrices(std::get<1>(*ctx)[0], 1, &sorted, &data->is, MAT_INITIAL_MATRIX, &sub));
2708           else {
2709             if (flg) PetscCall(MatCreateTranspose(b[2], b + 1));
2710             else PetscCall(MatCreateHermitianTranspose(b[2], b + 1));
2711           }
2712         } else {
2713           PetscCall(MatSetOption(A01, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2714           PetscCall(MatCreateSubMatrices(A01, 1, &sorted, &data->is, MAT_INITIAL_MATRIX, &sub));
2715           if (flg) PetscCall(MatCreateTranspose(*sub, b + 2));
2716           else PetscCall(MatCreateHermitianTranspose(*sub, b + 2));
2717         }
2718         if (A01 || !A10) {
2719           b[1] = sub[0];
2720           PetscCall(PetscObjectReference((PetscObject)sub[0]));
2721         }
2722         PetscCall(MatDestroySubMatrices(1, &sub));
2723         PetscCall(ISDestroy(&sorted));
2724         b[3] = data->aux;
2725         PetscCall(MatCreateSchurComplement(subA[0], subA[1], b[1], b[2], b[3], &S));
2726         PetscCall(MatSchurComplementSetKSP(S, ksp[0]));
2727         if (data->N != 1) {
2728           PetscCall(PCASMSetType(data->levels[0]->pc, PC_ASM_NONE)); /* "Neumann--Neumann" preconditioning with overlap and a Boolean partition of unity */
2729           PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, &data->is, &loc));
2730           PetscCall(PCSetFromOptions(data->levels[0]->pc)); /* action of eq. (15) of https://hal.science/hal-02343808v6/document (with a sign flip) */
2731           s = data->levels[0]->pc;
2732         } else {
2733           is[0] = data->is;
2734           PetscCall(PetscObjectReference((PetscObject)is[0]));
2735           PetscCall(PetscObjectReference((PetscObject)b[3]));
2736           PetscCall(PCSetType(pc, PCASM));                          /* change the type of the current PC */
2737           data = nullptr;                                           /* destroyed in the previous PCSetType(), so reset to NULL to avoid any faulty use */
2738           PetscCall(PCAppendOptionsPrefix(pc, "pc_hpddm_coarse_")); /* same prefix as when using PCHPDDM with a single level */
2739           PetscCall(PCASMSetLocalSubdomains(pc, 1, is, &loc));
2740           PetscCall(ISDestroy(is));
2741           PetscCall(ISDestroy(&loc));
2742           s = pc;
2743         }
2744         PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(s, S, PETSC_TRUE)); /* the subdomain Mat is already known and the input IS of PCASMSetLocalSubdomains() is already sorted */
2745         PetscTryMethod(s, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (s, &n, nullptr, &ksp));
2746         PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2747         PetscCall(KSPGetPC(ksp[0], &inner));
2748         PetscCall(PCSetType(inner, PCSHELL)); /* compute the action of the inverse of the local Schur complement with a PCSHELL */
2749         b[0] = subA[0];
2750         PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, nullptr, 2, nullptr, b, &N)); /* instead of computing inv(A11 - A10 inv(A00) A01), compute inv([A00, A01; A10, A11]) followed by a partial solution associated to the A11 block */
2751         if (!data) PetscCall(PetscObjectDereference((PetscObject)b[3]));
2752         PetscCall(PetscObjectDereference((PetscObject)b[1]));
2753         PetscCall(PetscObjectDereference((PetscObject)b[2]));
2754         PetscCall(PCCreate(PETSC_COMM_SELF, &s));
2755         PetscCall(PCSetOptionsPrefix(s, ((PetscObject)inner)->prefix));
2756         PetscCall(PCSetOptionsPrefix(inner, nullptr));
2757         PetscCall(KSPSetSkipPCSetFromOptions(ksp[0], PETSC_TRUE));
2758         PetscCall(PCSetType(s, PCLU));
2759         if (PetscDefined(HAVE_MUMPS)) PetscCall(PCFactorSetMatSolverType(s, MATSOLVERMUMPS)); /* only MATSOLVERMUMPS handles MATNEST, so for the others, e.g., MATSOLVERPETSC or MATSOLVERMKL_PARDISO, convert to plain MATAIJ */
2760         PetscCall(PCSetFromOptions(s));
2761         PetscCall(PCFactorGetMatSolverType(s, &type));
2762         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
2763         PetscCall(MatGetLocalSize(A11, &n, nullptr));
2764         if (flg || n == 0) {
2765           PetscCall(PCSetOperators(s, N, N));
2766           if (n) {
2767             PetscCall(PCFactorGetMatrix(s, b));
2768             PetscCall(MatSetOptionsPrefix(*b, ((PetscObject)s)->prefix));
2769             n = -1;
2770             PetscCall(PetscOptionsGetInt(((PetscObject)pc)->options, ((PetscObject)s)->prefix, "-mat_mumps_icntl_26", &n, nullptr));
2771             if (n == 1) {                                /* allocates a square MatDense of size is[1]->map->n, so one */
2772               PetscCall(MatNestGetISs(N, is, nullptr));  /*  needs to be able to deactivate this path when dealing    */
2773               PetscCall(MatFactorSetSchurIS(*b, is[1])); /*  with a large constraint space in order to avoid OOM      */
2774             }
2775           } else PetscCall(PCSetType(s, PCNONE)); /* empty local Schur complement (e.g., centralized on another process) */
2776         } else {
2777           PetscCall(MatConvert(N, MATAIJ, MAT_INITIAL_MATRIX, b));
2778           PetscCall(PCSetOperators(s, N, *b));
2779           PetscCall(PetscObjectDereference((PetscObject)*b));
2780           PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &flg, PCLU, PCCHOLESKY, PCILU, PCICC, PCQR, ""));
2781           if (flg) PetscCall(PCFactorGetMatrix(s, b)); /* MATSOLVERMKL_PARDISO cannot compute in PETSc (yet) a partial solution associated to the A11 block, only partial solution associated to the A00 block or full solution */
2782         }
2783         PetscCall(PetscNew(&p));
2784         p->first = s;
2785         if (n != 0) PetscCall(MatCreateVecs(*b, p->second, p->second + 1));
2786         else p->second[0] = p->second[1] = nullptr;
2787         PetscCall(PCShellSetContext(inner, p));
2788         PetscCall(PCShellSetApply(inner, PCApply_Nest));
2789         PetscCall(PCShellSetView(inner, PCView_Nest));
2790         PetscCall(PCShellSetDestroy(inner, PCDestroy_Nest));
2791         PetscCall(PetscObjectDereference((PetscObject)N));
2792         if (!data) {
2793           PetscCall(MatDestroy(&S));
2794           PetscCall(ISDestroy(&unsorted));
2795           PetscCall(MatDestroy(&C));
2796           PetscCall(ISDestroy(&uis));
2797           PetscCall(PetscFree(ctx));
2798 #if PetscDefined(USE_DEBUG)
2799           PetscCall(ISDestroy(&dis));
2800           PetscCall(MatDestroy(&daux));
2801 #endif
2802           PetscFunctionReturn(PETSC_SUCCESS);
2803         }
2804       }
2805       if (!data->levels[0]->scatter) {
2806         PetscCall(MatCreateVecs(P, &xin, nullptr));
2807         if (ismatis) PetscCall(MatDestroy(&P));
2808         PetscCall(VecScatterCreate(xin, data->is, data->levels[0]->D, nullptr, &data->levels[0]->scatter));
2809         PetscCall(VecDestroy(&xin));
2810       }
2811       if (data->levels[0]->P) {
2812         /* if the pattern is the same and PCSetUp() has previously succeeded, reuse HPDDM buffers and connectivity */
2813         PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(data->levels[0], !pc->setupcalled || pc->flag == DIFFERENT_NONZERO_PATTERN ? PETSC_TRUE : PETSC_FALSE));
2814       }
2815       if (!data->levels[0]->P) data->levels[0]->P = new HPDDM::Schwarz<PetscScalar>();
2816       if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_SetUp[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
2817       else PetscCall(PetscLogEventBegin(PC_HPDDM_Strc, data->levels[0]->ksp, nullptr, nullptr, nullptr));
2818       /* HPDDM internal data structure */
2819       PetscCall(data->levels[0]->P->structure(loc, data->is, !ctx ? sub[0] : nullptr, ismatis ? C : data->aux, data->levels));
2820       if (!data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Strc, data->levels[0]->ksp, nullptr, nullptr, nullptr));
2821       /* matrix pencil of the generalized eigenvalue problem on the overlap (GenEO) */
2822       if (!ctx) {
2823         if (data->deflation || overlap != -1) weighted = data->aux;
2824         else if (!data->B) {
2825           PetscBool cmp;
2826 
2827           PetscCall(MatDuplicate(sub[0], MAT_COPY_VALUES, &weighted));
2828           PetscCall(PetscObjectTypeCompareAny((PetscObject)weighted, &cmp, MATNORMAL, MATNORMALHERMITIAN, ""));
2829           if (cmp) flg = PETSC_FALSE;
2830           PetscCall(MatDiagonalScale(weighted, data->levels[0]->D, data->levels[0]->D));
2831           /* neither MatDuplicate() nor MatDiagonalScale() handles the symmetry options, so propagate the options explicitly */
2832           /* only useful for -mat_type baij -pc_hpddm_levels_1_st_pc_type cholesky (no problem with MATAIJ or MATSBAIJ)      */
2833           PetscCall(MatPropagateSymmetryOptions(sub[0], weighted));
2834           if (PetscDefined(USE_DEBUG) && PetscBool3ToBool(data->Neumann)) {
2835             Mat      *sub, A[3];
2836             PetscReal norm[2];
2837             PetscBool flg;
2838 
2839             PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg)); /* MatCreateSubMatrices() does not work with MATSBAIJ and unsorted ISes, so convert to MPIAIJ */
2840             if (flg) PetscCall(MatConvert(P, MATMPIAIJ, MAT_INITIAL_MATRIX, A));
2841             else {
2842               A[0] = P;
2843               PetscCall(PetscObjectReference((PetscObject)P));
2844             }
2845             PetscCall(MatCreateSubMatrices(A[0], 1, &data->is, &data->is, MAT_INITIAL_MATRIX, &sub));
2846             PetscCall(MatDiagonalScale(sub[0], data->levels[0]->D, data->levels[0]->D));
2847             PetscCall(MatConvert(sub[0], MATSEQAIJ, MAT_INITIAL_MATRIX, A + 1)); /* too many corner cases to handle (MATNORMAL, MATNORMALHERMITIAN, MATBAIJ with different block sizes...), so just MatConvert() to MATSEQAIJ since this is just for debugging */
2848             PetscCall(MatConvert(weighted, MATSEQAIJ, MAT_INITIAL_MATRIX, A + 2));
2849             PetscCall(MatAXPY(A[1], -1.0, A[2], UNKNOWN_NONZERO_PATTERN));
2850             PetscCall(MatNorm(A[1], NORM_FROBENIUS, norm));
2851             if (norm[0]) {
2852               PetscCall(MatNorm(A[2], NORM_FROBENIUS, norm + 1));
2853               PetscCheck(PetscAbsReal(norm[0] / norm[1]) < PetscSqrtReal(PETSC_SMALL), PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Auxiliary Mat is different from the (assembled) subdomain Mat for the interior unknowns, so it cannot be the Neumann matrix, remove -%spc_hpddm_has_neumann", pcpre ? pcpre : "");
2854             }
2855             PetscCall(MatDestroySubMatrices(1, &sub));
2856             for (PetscInt i = 0; i < 3; ++i) PetscCall(MatDestroy(A + i));
2857           }
2858         } else weighted = data->B;
2859       } else weighted = nullptr;
2860       /* SLEPc is used inside the loaded symbol */
2861       PetscCall((*loadedSym)(data->levels[0]->P, data->is, ismatis ? C : (algebraic && !block && overlap == -1 ? sub[0] : (!ctx ? data->aux : S)), weighted, data->B, initial, data->levels));
2862       if (!ctx && data->share && overlap == -1) {
2863         Mat st[2];
2864 
2865         PetscCall(KSPGetOperators(ksp[0], st, st + 1));
2866         PetscCall(MatCopy(subA[0], st[0], structure));
2867         if (subA[1] != subA[0] || st[1] != st[0]) PetscCall(MatCopy(subA[1], st[1], SAME_NONZERO_PATTERN));
2868         PetscCall(PetscObjectDereference((PetscObject)subA[0]));
2869       }
2870       if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_SetUp[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
2871       if (ismatis) PetscCall(MatISGetLocalMat(C, &N));
2872       else N = data->aux;
2873       if (!ctx) P = sub[0];
2874       else P = S;
2875       /* going through the grid hierarchy */
2876       for (n = 1; n < data->N; ++n) {
2877         if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_SetUp[n], data->levels[n]->ksp, nullptr, nullptr, nullptr));
2878         /* method composed in the loaded symbol since there, SLEPc is used as well */
2879         PetscTryMethod(data->levels[0]->ksp, "PCHPDDMSetUp_Private_C", (Mat *, Mat *, PetscInt, PetscInt *const, PC_HPDDM_Level **const), (&P, &N, n, &data->N, data->levels));
2880         if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_SetUp[n], data->levels[n]->ksp, nullptr, nullptr, nullptr));
2881       }
2882       /* reset to NULL to avoid any faulty use */
2883       PetscCall(PetscObjectComposeFunction((PetscObject)data->levels[0]->ksp, "PCHPDDMSetUp_Private_C", nullptr));
2884       if (!ismatis) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_C", nullptr));
2885       else PetscCall(PetscObjectDereference((PetscObject)C)); /* matching PetscObjectReference() above */
2886       for (n = 0; n < data->N - 1; ++n)
2887         if (data->levels[n]->P) {
2888           /* HPDDM internal work buffers */
2889           data->levels[n]->P->setBuffer();
2890           data->levels[n]->P->super::start();
2891         }
2892       if (ismatis || !subdomains) PetscCall(PCHPDDMDestroySubMatrices_Private(PetscBool3ToBool(data->Neumann), PetscBool(algebraic && !block && overlap == -1), sub));
2893       if (ismatis) data->is = nullptr;
2894       for (n = 0; n < data->N - 1 + (reused > 0); ++n) {
2895         if (data->levels[n]->P) {
2896           PC spc;
2897 
2898           /* force the PC to be PCSHELL to do the coarse grid corrections */
2899           PetscCall(KSPSetSkipPCSetFromOptions(data->levels[n]->ksp, PETSC_TRUE));
2900           PetscCall(KSPGetPC(data->levels[n]->ksp, &spc));
2901           PetscCall(PCSetType(spc, PCSHELL));
2902           PetscCall(PCShellSetContext(spc, data->levels[n]));
2903           PetscCall(PCShellSetSetUp(spc, PCSetUp_HPDDMShell));
2904           PetscCall(PCShellSetApply(spc, PCApply_HPDDMShell));
2905           PetscCall(PCShellSetMatApply(spc, PCMatApply_HPDDMShell));
2906           PetscCall(PCShellSetApplyTranspose(spc, PCApplyTranspose_HPDDMShell));
2907           PetscCall(PCShellSetMatApplyTranspose(spc, PCMatApplyTranspose_HPDDMShell));
2908           if (ctx && n == 0) {
2909             Mat                               Amat, Pmat;
2910             PetscInt                          m, M;
2911             std::tuple<Mat, PetscSF, Vec[2]> *ctx;
2912 
2913             PetscCall(KSPGetOperators(data->levels[n]->ksp, nullptr, &Pmat));
2914             PetscCall(MatGetLocalSize(Pmat, &m, nullptr));
2915             PetscCall(MatGetSize(Pmat, &M, nullptr));
2916             PetscCall(PetscNew(&ctx));
2917             std::get<0>(*ctx) = S;
2918             std::get<1>(*ctx) = data->levels[n]->scatter;
2919             PetscCall(MatCreateShell(PetscObjectComm((PetscObject)data->levels[n]->ksp), m, m, M, M, ctx, &Amat));
2920             PetscCall(MatShellSetOperation(Amat, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Schur<false>));
2921             PetscCall(MatShellSetOperation(Amat, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_Schur<true>));
2922             PetscCall(MatShellSetOperation(Amat, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Schur));
2923             PetscCall(MatCreateVecs(S, std::get<2>(*ctx), std::get<2>(*ctx) + 1));
2924             PetscCall(KSPSetOperators(data->levels[n]->ksp, Amat, Pmat));
2925             PetscCall(PetscObjectDereference((PetscObject)Amat));
2926           }
2927           PetscCall(PCShellSetDestroy(spc, PCDestroy_HPDDMShell));
2928           if (!data->levels[n]->pc) PetscCall(PCCreate(PetscObjectComm((PetscObject)data->levels[n]->ksp), &data->levels[n]->pc));
2929           if (n < reused) {
2930             PetscCall(PCSetReusePreconditioner(spc, PETSC_TRUE));
2931             PetscCall(PCSetReusePreconditioner(data->levels[n]->pc, PETSC_TRUE));
2932           }
2933           PetscCall(PCSetUp(spc));
2934         }
2935       }
2936       if (ctx) PetscCall(MatDestroy(&S));
2937       if (overlap == -1) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", nullptr));
2938     } else flg = reused ? PETSC_FALSE : PETSC_TRUE;
2939     if (!ismatis && subdomains) {
2940       if (flg) PetscCall(KSPGetPC(data->levels[0]->ksp, &inner));
2941       else inner = data->levels[0]->pc;
2942       if (inner) {
2943         if (!inner->setupcalled) PetscCall(PCSetType(inner, PCASM));
2944         PetscCall(PCSetFromOptions(inner));
2945         PetscCall(PetscStrcmp(((PetscObject)inner)->type_name, PCASM, &flg));
2946         if (flg) {
2947           if (!inner->setupcalled) { /* evaluates to PETSC_FALSE when -pc_hpddm_block_splitting */
2948             IS sorted;               /* PCASM will sort the input IS, duplicate it to return an unmodified (PCHPDDM) input IS */
2949 
2950             PetscCall(ISDuplicate(is[0], &sorted));
2951             PetscCall(PCASMSetLocalSubdomains(inner, 1, &sorted, &loc));
2952             PetscCall(PetscObjectDereference((PetscObject)sorted));
2953           }
2954           if (!PetscBool3ToBool(data->Neumann) && data->N > 1) { /* subdomain matrices are already created for the eigenproblem, reuse them for the fine-level PC */
2955             PetscCall(PCHPDDMPermute_Private(*is, nullptr, nullptr, sub[0], &P, nullptr));
2956             PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(inner, P, algebraic));
2957             PetscCall(PetscObjectDereference((PetscObject)P));
2958           }
2959         }
2960       }
2961       if (data->N > 1) {
2962         if (overlap != 1) PetscCall(PCHPDDMDestroySubMatrices_Private(PetscBool3ToBool(data->Neumann), PetscBool(algebraic && !block && overlap == -1), sub));
2963         if (overlap == 1) PetscCall(MatDestroy(subA));
2964       }
2965     }
2966     PetscCall(ISDestroy(&loc));
2967   } else data->N = 1 + reused; /* enforce this value to 1 + reused if there is no way to build another level */
2968   if (requested != data->N + reused) {
2969     PetscCall(PetscInfo(pc, "%" PetscInt_FMT " levels requested, only %" PetscInt_FMT " built + %" PetscInt_FMT " reused. Options for level(s) > %" PetscInt_FMT ", including -%spc_hpddm_coarse_ will not be taken into account\n", requested, data->N, reused,
2970                         data->N, pcpre ? pcpre : ""));
2971     PetscCall(PetscInfo(pc, "It is best to tune parameters, e.g., a higher value for -%spc_hpddm_levels_%" PetscInt_FMT "_eps_threshold_absolute or a lower value for -%spc_hpddm_levels_%" PetscInt_FMT "_svd_threshold_relative, so that at least one local deflation vector will be selected\n", pcpre ? pcpre : "",
2972                         data->N, pcpre ? pcpre : "", data->N));
2973     /* cannot use PCDestroy_HPDDMShell() because PCSHELL not set for unassembled levels */
2974     for (n = data->N - 1; n < requested - 1; ++n) {
2975       if (data->levels[n]->P) {
2976         PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(data->levels[n], PETSC_TRUE));
2977         PetscCall(VecDestroyVecs(1, &data->levels[n]->v[0]));
2978         PetscCall(VecDestroyVecs(2, &data->levels[n]->v[1]));
2979         PetscCall(MatDestroy(data->levels[n]->V));
2980         PetscCall(MatDestroy(data->levels[n]->V + 1));
2981         PetscCall(MatDestroy(data->levels[n]->V + 2));
2982         PetscCall(VecDestroy(&data->levels[n]->D));
2983         PetscCall(PetscSFDestroy(&data->levels[n]->scatter));
2984       }
2985     }
2986     if (reused) {
2987       for (n = reused; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
2988         PetscCall(KSPDestroy(&data->levels[n]->ksp));
2989         PetscCall(PCDestroy(&data->levels[n]->pc));
2990       }
2991     }
2992     PetscCheck(!PetscDefined(USE_DEBUG), PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " levels requested, only %" PetscInt_FMT " built + %" PetscInt_FMT " reused. Options for level(s) > %" PetscInt_FMT ", including -%spc_hpddm_coarse_ will not be taken into account. It is best to tune parameters, e.g., a higher value for -%spc_hpddm_levels_%" PetscInt_FMT "_eps_threshold or a lower value for -%spc_hpddm_levels_%" PetscInt_FMT "_svd_threshold_relative, so that at least one local deflation vector will be selected. If you don't want this to error out, compile --with-debugging=0", requested,
2993                data->N, reused, data->N, pcpre ? pcpre : "", pcpre ? pcpre : "", data->N, pcpre ? pcpre : "", data->N);
2994   }
2995   /* these solvers are created after PCSetFromOptions() is called */
2996   if (pc->setfromoptionscalled) {
2997     for (n = 0; n < data->N; ++n) {
2998       if (data->levels[n]->ksp) PetscCall(KSPSetFromOptions(data->levels[n]->ksp));
2999       if (data->levels[n]->pc) PetscCall(PCSetFromOptions(data->levels[n]->pc));
3000     }
3001     pc->setfromoptionscalled = 0;
3002   }
3003   data->N += reused;
3004   if (data->share && swap) {
3005     /* swap back pointers so that variables follow the user-provided numbering */
3006     std::swap(C, data->aux);
3007     std::swap(uis, data->is);
3008     PetscCall(MatDestroy(&C));
3009     PetscCall(ISDestroy(&uis));
3010   }
3011   if (algebraic) PetscCall(MatDestroy(&data->aux));
3012   if (unsorted && unsorted != is[0]) {
3013     PetscCall(ISCopy(unsorted, data->is));
3014     PetscCall(ISDestroy(&unsorted));
3015   }
3016 #if PetscDefined(USE_DEBUG)
3017   PetscCheck((data->is && dis) || (!data->is && !dis), PETSC_COMM_SELF, PETSC_ERR_PLIB, "An IS pointer is NULL but not the other: input IS pointer (%p) v. output IS pointer (%p)", (void *)dis, (void *)data->is);
3018   if (data->is) {
3019     PetscCall(ISEqualUnsorted(data->is, dis, &flg));
3020     PetscCall(ISDestroy(&dis));
3021     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Input IS and output IS are not equal");
3022   }
3023   PetscCheck((data->aux && daux) || (!data->aux && !daux), PETSC_COMM_SELF, PETSC_ERR_PLIB, "A Mat pointer is NULL but not the other: input Mat pointer (%p) v. output Mat pointer (%p)", (void *)daux, (void *)data->aux);
3024   if (data->aux) {
3025     PetscCall(MatMultEqual(data->aux, daux, 20, &flg));
3026     PetscCall(MatDestroy(&daux));
3027     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Input Mat and output Mat are not equal");
3028   }
3029 #endif
3030   PetscFunctionReturn(PETSC_SUCCESS);
3031 }
3032 
3033 /*@
3034   PCHPDDMSetCoarseCorrectionType - Sets the coarse correction type.
3035 
3036   Collective
3037 
3038   Input Parameters:
3039 + pc   - preconditioner context
3040 - type - `PC_HPDDM_COARSE_CORRECTION_DEFLATED`, `PC_HPDDM_COARSE_CORRECTION_ADDITIVE`, `PC_HPDDM_COARSE_CORRECTION_BALANCED`, or `PC_HPDDM_COARSE_CORRECTION_NONE`
3041 
3042   Options Database Key:
3043 . -pc_hpddm_coarse_correction <deflated, additive, balanced, none> - type of coarse correction to apply
3044 
3045   Level: intermediate
3046 
3047 .seealso: [](ch_ksp), `PCHPDDMGetCoarseCorrectionType()`, `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
3048 @*/
3049 PetscErrorCode PCHPDDMSetCoarseCorrectionType(PC pc, PCHPDDMCoarseCorrectionType type)
3050 {
3051   PetscFunctionBegin;
3052   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3053   PetscValidLogicalCollectiveEnum(pc, type, 2);
3054   PetscTryMethod(pc, "PCHPDDMSetCoarseCorrectionType_C", (PC, PCHPDDMCoarseCorrectionType), (pc, type));
3055   PetscFunctionReturn(PETSC_SUCCESS);
3056 }
3057 
3058 /*@
3059   PCHPDDMGetCoarseCorrectionType - Gets the coarse correction type.
3060 
3061   Input Parameter:
3062 . pc - preconditioner context
3063 
3064   Output Parameter:
3065 . type - `PC_HPDDM_COARSE_CORRECTION_DEFLATED`, `PC_HPDDM_COARSE_CORRECTION_ADDITIVE`, `PC_HPDDM_COARSE_CORRECTION_BALANCED`, or `PC_HPDDM_COARSE_CORRECTION_NONE`
3066 
3067   Level: intermediate
3068 
3069 .seealso: [](ch_ksp), `PCHPDDMSetCoarseCorrectionType()`, `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
3070 @*/
3071 PetscErrorCode PCHPDDMGetCoarseCorrectionType(PC pc, PCHPDDMCoarseCorrectionType *type)
3072 {
3073   PetscFunctionBegin;
3074   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3075   if (type) {
3076     PetscAssertPointer(type, 2);
3077     PetscUseMethod(pc, "PCHPDDMGetCoarseCorrectionType_C", (PC, PCHPDDMCoarseCorrectionType *), (pc, type));
3078   }
3079   PetscFunctionReturn(PETSC_SUCCESS);
3080 }
3081 
3082 static PetscErrorCode PCHPDDMSetCoarseCorrectionType_HPDDM(PC pc, PCHPDDMCoarseCorrectionType type)
3083 {
3084   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3085 
3086   PetscFunctionBegin;
3087   data->correction = type;
3088   PetscFunctionReturn(PETSC_SUCCESS);
3089 }
3090 
3091 static PetscErrorCode PCHPDDMGetCoarseCorrectionType_HPDDM(PC pc, PCHPDDMCoarseCorrectionType *type)
3092 {
3093   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3094 
3095   PetscFunctionBegin;
3096   *type = data->correction;
3097   PetscFunctionReturn(PETSC_SUCCESS);
3098 }
3099 
3100 /*@
3101   PCHPDDMSetSTShareSubKSP - Sets whether the `KSP` in SLEPc `ST` and the fine-level subdomain solver should be shared.
3102 
3103   Input Parameters:
3104 + pc    - preconditioner context
3105 - share - whether the `KSP` should be shared or not
3106 
3107   Note:
3108   This is not the same as `PCSetReusePreconditioner()`. Given certain conditions (visible using -info), a symbolic factorization can be skipped
3109   when using a subdomain `PCType` such as `PCLU` or `PCCHOLESKY`.
3110 
3111   Level: advanced
3112 
3113 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMGetSTShareSubKSP()`
3114 @*/
3115 PetscErrorCode PCHPDDMSetSTShareSubKSP(PC pc, PetscBool share)
3116 {
3117   PetscFunctionBegin;
3118   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3119   PetscTryMethod(pc, "PCHPDDMSetSTShareSubKSP_C", (PC, PetscBool), (pc, share));
3120   PetscFunctionReturn(PETSC_SUCCESS);
3121 }
3122 
3123 /*@
3124   PCHPDDMGetSTShareSubKSP - Gets whether the `KSP` in SLEPc `ST` and the fine-level subdomain solver is shared.
3125 
3126   Input Parameter:
3127 . pc - preconditioner context
3128 
3129   Output Parameter:
3130 . share - whether the `KSP` is shared or not
3131 
3132   Note:
3133   This is not the same as `PCGetReusePreconditioner()`. The return value is unlikely to be true, but when it is, a symbolic factorization can be skipped
3134   when using a subdomain `PCType` such as `PCLU` or `PCCHOLESKY`.
3135 
3136   Level: advanced
3137 
3138 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMSetSTShareSubKSP()`
3139 @*/
3140 PetscErrorCode PCHPDDMGetSTShareSubKSP(PC pc, PetscBool *share)
3141 {
3142   PetscFunctionBegin;
3143   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3144   if (share) {
3145     PetscAssertPointer(share, 2);
3146     PetscUseMethod(pc, "PCHPDDMGetSTShareSubKSP_C", (PC, PetscBool *), (pc, share));
3147   }
3148   PetscFunctionReturn(PETSC_SUCCESS);
3149 }
3150 
3151 static PetscErrorCode PCHPDDMSetSTShareSubKSP_HPDDM(PC pc, PetscBool share)
3152 {
3153   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3154 
3155   PetscFunctionBegin;
3156   data->share = share;
3157   PetscFunctionReturn(PETSC_SUCCESS);
3158 }
3159 
3160 static PetscErrorCode PCHPDDMGetSTShareSubKSP_HPDDM(PC pc, PetscBool *share)
3161 {
3162   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3163 
3164   PetscFunctionBegin;
3165   *share = data->share;
3166   PetscFunctionReturn(PETSC_SUCCESS);
3167 }
3168 
3169 /*@
3170   PCHPDDMSetDeflationMat - Sets the deflation space used to assemble a coarser operator.
3171 
3172   Input Parameters:
3173 + pc - preconditioner context
3174 . is - index set of the local deflation matrix
3175 - U  - deflation sequential matrix stored as a `MATSEQDENSE`
3176 
3177   Level: advanced
3178 
3179 .seealso: [](ch_ksp), `PCHPDDM`, `PCDeflationSetSpace()`, `PCMGSetRestriction()`
3180 @*/
3181 PetscErrorCode PCHPDDMSetDeflationMat(PC pc, IS is, Mat U)
3182 {
3183   PetscFunctionBegin;
3184   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3185   PetscValidHeaderSpecific(is, IS_CLASSID, 2);
3186   PetscValidHeaderSpecific(U, MAT_CLASSID, 3);
3187   PetscTryMethod(pc, "PCHPDDMSetDeflationMat_C", (PC, IS, Mat), (pc, is, U));
3188   PetscFunctionReturn(PETSC_SUCCESS);
3189 }
3190 
3191 static PetscErrorCode PCHPDDMSetDeflationMat_HPDDM(PC pc, IS is, Mat U)
3192 {
3193   PetscFunctionBegin;
3194   PetscCall(PCHPDDMSetAuxiliaryMat_Private(pc, is, U, PETSC_TRUE));
3195   PetscFunctionReturn(PETSC_SUCCESS);
3196 }
3197 
3198 PetscErrorCode HPDDMLoadDL_Private(PetscBool *found)
3199 {
3200   PetscBool flg;
3201   char      lib[PETSC_MAX_PATH_LEN], dlib[PETSC_MAX_PATH_LEN], dir[PETSC_MAX_PATH_LEN];
3202 
3203   PetscFunctionBegin;
3204   PetscAssertPointer(found, 1);
3205   PetscCall(PetscStrncpy(dir, "${PETSC_LIB_DIR}", sizeof(dir)));
3206   PetscCall(PetscOptionsGetString(nullptr, nullptr, "-hpddm_dir", dir, sizeof(dir), nullptr));
3207   PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/libhpddm_petsc", dir));
3208   PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3209 #if defined(SLEPC_LIB_DIR) /* this variable is passed during SLEPc ./configure when PETSc has not been configured   */
3210   if (!*found) {           /* with --download-hpddm since slepcconf.h is not yet built (and thus can't be included) */
3211     PetscCall(PetscStrncpy(dir, HPDDM_STR(SLEPC_LIB_DIR), sizeof(dir)));
3212     PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/libhpddm_petsc", dir));
3213     PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3214   }
3215 #endif
3216   if (!*found) { /* probable options for this to evaluate to PETSC_TRUE: system inconsistency (libhpddm_petsc moved by user?) or PETSc configured without --download-slepc */
3217     PetscCall(PetscOptionsGetenv(PETSC_COMM_SELF, "SLEPC_DIR", dir, sizeof(dir), &flg));
3218     if (flg) { /* if both PETSc and SLEPc are configured with --download-hpddm but PETSc has been configured without --download-slepc, one must ensure that libslepc is loaded before libhpddm_petsc */
3219       PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/lib/libslepc", dir));
3220       PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3221       PetscCheck(*found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s not found but SLEPC_DIR=%s", lib, dir);
3222       PetscCall(PetscDLLibraryAppend(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, dlib));
3223       PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/lib/libhpddm_petsc", dir)); /* libhpddm_petsc is always in the same directory as libslepc */
3224       PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3225     }
3226   }
3227   PetscCheck(*found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s not found", lib);
3228   PetscCall(PetscDLLibraryAppend(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, dlib));
3229   PetscFunctionReturn(PETSC_SUCCESS);
3230 }
3231 
3232 /*MC
3233    PCHPDDM - Interface with the HPDDM library.
3234 
3235    This `PC` may be used to build multilevel spectral domain decomposition methods based on the GenEO framework {cite}`spillane2011robust` {cite}`al2021multilevel`.
3236    It may be viewed as an alternative to spectral
3237    AMGe or `PCBDDC` with adaptive selection of constraints. The interface is explained in details in {cite}`jolivetromanzampini2020`
3238 
3239    The matrix used for building the preconditioner (Pmat) may be unassembled (`MATIS`), assembled (`MATAIJ`, `MATBAIJ`, or `MATSBAIJ`), hierarchical (`MATHTOOL`), `MATNORMAL`, `MATNORMALHERMITIAN`, or `MATSCHURCOMPLEMENT` (when `PCHPDDM` is used as part of an outer `PCFIELDSPLIT`).
3240 
3241    For multilevel preconditioning, when using an assembled or hierarchical Pmat, one must provide an auxiliary local `Mat` (unassembled local operator for GenEO) using
3242    `PCHPDDMSetAuxiliaryMat()`. Calling this routine is not needed when using a `MATIS` Pmat, assembly is done internally using `MatConvert()`.
3243 
3244    Options Database Keys:
3245 +   -pc_hpddm_define_subdomains <true, default=false>    - on the finest level, calls `PCASMSetLocalSubdomains()` with the `IS` supplied in `PCHPDDMSetAuxiliaryMat()`
3246                                                          (not relevant with an unassembled Pmat)
3247 .   -pc_hpddm_has_neumann <true, default=false>          - on the finest level, informs the `PC` that the local Neumann matrix is supplied in `PCHPDDMSetAuxiliaryMat()`
3248 -   -pc_hpddm_coarse_correction <type, default=deflated> - determines the `PCHPDDMCoarseCorrectionType` when calling `PCApply()`
3249 
3250    Options for subdomain solvers, subdomain eigensolvers (for computing deflation vectors), and the coarse solver can be set using the following options database prefixes.
3251 .vb
3252       -pc_hpddm_levels_%d_pc_
3253       -pc_hpddm_levels_%d_ksp_
3254       -pc_hpddm_levels_%d_eps_
3255       -pc_hpddm_levels_%d_p
3256       -pc_hpddm_levels_%d_mat_type
3257       -pc_hpddm_coarse_
3258       -pc_hpddm_coarse_p
3259       -pc_hpddm_coarse_mat_type
3260       -pc_hpddm_coarse_mat_filter
3261 .ve
3262 
3263    E.g., -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 10 -pc_hpddm_levels_2_p 4 -pc_hpddm_levels_2_sub_pc_type lu -pc_hpddm_levels_2_eps_nev 10
3264     -pc_hpddm_coarse_p 2 -pc_hpddm_coarse_mat_type baij will use 10 deflation vectors per subdomain on the fine "level 1",
3265     aggregate the fine subdomains into 4 "level 2" subdomains, then use 10 deflation vectors per subdomain on "level 2",
3266     and assemble the coarse matrix (of dimension 4 x 10 = 40) on two processes as a `MATBAIJ` (default is `MATSBAIJ`).
3267 
3268    In order to activate a "level N+1" coarse correction, it is mandatory to call -pc_hpddm_levels_N_eps_nev <nu> or -pc_hpddm_levels_N_eps_threshold_absolute <val>. The default -pc_hpddm_coarse_p value is 1, meaning that the coarse operator is aggregated on a single process.
3269 
3270    Level: intermediate
3271 
3272    Notes:
3273    This preconditioner requires that PETSc is built with SLEPc (``--download-slepc``).
3274 
3275    By default, the underlying concurrent eigenproblems
3276    are solved using SLEPc shift-and-invert spectral transformation. This is usually what gives the best performance for GenEO, cf.
3277    {cite}`spillane2011robust` {cite}`jolivet2013scalabledd`. As
3278    stated above, SLEPc options are available through -pc_hpddm_levels_%d_, e.g., -pc_hpddm_levels_1_eps_type arpack -pc_hpddm_levels_1_eps_nev 10
3279    -pc_hpddm_levels_1_st_type sinvert. There are furthermore three options related to the (subdomain-wise local) eigensolver that are not described in
3280    SLEPc documentation since they are specific to `PCHPDDM`.
3281 .vb
3282       -pc_hpddm_levels_1_st_share_sub_ksp
3283       -pc_hpddm_levels_%d_eps_threshold_absolute
3284       -pc_hpddm_levels_1_eps_use_inertia
3285 .ve
3286 
3287    The first option from the list only applies to the fine-level eigensolver, see `PCHPDDMSetSTShareSubKSP()`. The second option from the list is
3288    used to filter eigenmodes retrieved after convergence of `EPSSolve()` at "level N" such that eigenvectors used to define a "level N+1" coarse
3289    correction are associated to eigenvalues whose magnitude are lower or equal than -pc_hpddm_levels_N_eps_threshold_absolute. When using an `EPS` which cannot
3290    determine a priori the proper -pc_hpddm_levels_N_eps_nev such that all wanted eigenmodes are retrieved, it is possible to get an estimation of the
3291    correct value using the third option from the list, -pc_hpddm_levels_1_eps_use_inertia, see `MatGetInertia()`. In that case, there is no need
3292    to supply -pc_hpddm_levels_1_eps_nev. This last option also only applies to the fine-level (N = 1) eigensolver.
3293 
3294    See also {cite}`dolean2015introduction`, {cite}`al2022robust`, {cite}`al2022robustpd`, and {cite}`nataf2022recent`
3295 
3296 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCHPDDMSetAuxiliaryMat()`, `MATIS`, `PCBDDC`, `PCDEFLATION`, `PCTELESCOPE`, `PCASM`,
3297           `PCHPDDMSetCoarseCorrectionType()`, `PCHPDDMHasNeumannMat()`, `PCHPDDMSetRHSMat()`, `PCHPDDMSetDeflationMat()`, `PCHPDDMSetSTShareSubKSP()`,
3298           `PCHPDDMGetSTShareSubKSP()`, `PCHPDDMGetCoarseCorrectionType()`, `PCHPDDMGetComplexities()`
3299 M*/
3300 PETSC_EXTERN PetscErrorCode PCCreate_HPDDM(PC pc)
3301 {
3302   PC_HPDDM *data;
3303   PetscBool found;
3304 
3305   PetscFunctionBegin;
3306   if (!loadedSym) {
3307     PetscCall(HPDDMLoadDL_Private(&found));
3308     if (found) PetscCall(PetscDLLibrarySym(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, nullptr, "PCHPDDM_Internal", (void **)&loadedSym));
3309   }
3310   PetscCheck(loadedSym, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCHPDDM_Internal symbol not found in loaded libhpddm_petsc");
3311   PetscCall(PetscNew(&data));
3312   pc->data                   = data;
3313   data->Neumann              = PETSC_BOOL3_UNKNOWN;
3314   pc->ops->reset             = PCReset_HPDDM;
3315   pc->ops->destroy           = PCDestroy_HPDDM;
3316   pc->ops->setfromoptions    = PCSetFromOptions_HPDDM;
3317   pc->ops->setup             = PCSetUp_HPDDM;
3318   pc->ops->apply             = PCApply_HPDDM<false>;
3319   pc->ops->matapply          = PCMatApply_HPDDM<false>;
3320   pc->ops->applytranspose    = PCApply_HPDDM<true>;
3321   pc->ops->matapplytranspose = PCMatApply_HPDDM<true>;
3322   pc->ops->view              = PCView_HPDDM;
3323   pc->ops->presolve          = PCPreSolve_HPDDM;
3324 
3325   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetAuxiliaryMat_C", PCHPDDMSetAuxiliaryMat_HPDDM));
3326   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMHasNeumannMat_C", PCHPDDMHasNeumannMat_HPDDM));
3327   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetRHSMat_C", PCHPDDMSetRHSMat_HPDDM));
3328   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetCoarseCorrectionType_C", PCHPDDMSetCoarseCorrectionType_HPDDM));
3329   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetCoarseCorrectionType_C", PCHPDDMGetCoarseCorrectionType_HPDDM));
3330   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetSTShareSubKSP_C", PCHPDDMSetSTShareSubKSP_HPDDM));
3331   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetSTShareSubKSP_C", PCHPDDMGetSTShareSubKSP_HPDDM));
3332   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetDeflationMat_C", PCHPDDMSetDeflationMat_HPDDM));
3333   PetscFunctionReturn(PETSC_SUCCESS);
3334 }
3335 
3336 /*@C
3337   PCHPDDMInitializePackage - This function initializes everything in the `PCHPDDM` package. It is called from `PCInitializePackage()`.
3338 
3339   Level: developer
3340 
3341 .seealso: [](ch_ksp), `PetscInitialize()`
3342 @*/
3343 PetscErrorCode PCHPDDMInitializePackage(void)
3344 {
3345   char ename[32];
3346 
3347   PetscFunctionBegin;
3348   if (PCHPDDMPackageInitialized) PetscFunctionReturn(PETSC_SUCCESS);
3349   PCHPDDMPackageInitialized = PETSC_TRUE;
3350   PetscCall(PetscRegisterFinalize(PCHPDDMFinalizePackage));
3351   /* general events registered once during package initialization */
3352   /* some of these events are not triggered in libpetsc,          */
3353   /* but rather directly in libhpddm_petsc,                       */
3354   /* which is in charge of performing the following operations    */
3355 
3356   /* domain decomposition structure from Pmat sparsity pattern    */
3357   PetscCall(PetscLogEventRegister("PCHPDDMStrc", PC_CLASSID, &PC_HPDDM_Strc));
3358   /* Galerkin product, redistribution, and setup (not triggered in libpetsc)                */
3359   PetscCall(PetscLogEventRegister("PCHPDDMPtAP", PC_CLASSID, &PC_HPDDM_PtAP));
3360   /* Galerkin product with summation, redistribution, and setup (not triggered in libpetsc) */
3361   PetscCall(PetscLogEventRegister("PCHPDDMPtBP", PC_CLASSID, &PC_HPDDM_PtBP));
3362   /* next level construction using PtAP and PtBP (not triggered in libpetsc)                */
3363   PetscCall(PetscLogEventRegister("PCHPDDMNext", PC_CLASSID, &PC_HPDDM_Next));
3364   static_assert(PETSC_PCHPDDM_MAXLEVELS <= 9, "PETSC_PCHPDDM_MAXLEVELS value is too high");
3365   for (PetscInt i = 1; i < PETSC_PCHPDDM_MAXLEVELS; ++i) {
3366     PetscCall(PetscSNPrintf(ename, sizeof(ename), "PCHPDDMSetUp L%1" PetscInt_FMT, i));
3367     /* events during a PCSetUp() at level #i _except_ the assembly */
3368     /* of the Galerkin operator of the coarser level #(i + 1)      */
3369     PetscCall(PetscLogEventRegister(ename, PC_CLASSID, &PC_HPDDM_SetUp[i - 1]));
3370     PetscCall(PetscSNPrintf(ename, sizeof(ename), "PCHPDDMSolve L%1" PetscInt_FMT, i));
3371     /* events during a PCApply() at level #i _except_              */
3372     /* the KSPSolve() of the coarser level #(i + 1)                */
3373     PetscCall(PetscLogEventRegister(ename, PC_CLASSID, &PC_HPDDM_Solve[i - 1]));
3374   }
3375   PetscFunctionReturn(PETSC_SUCCESS);
3376 }
3377 
3378 /*@C
3379   PCHPDDMFinalizePackage - This function frees everything from the `PCHPDDM` package. It is called from `PetscFinalize()`.
3380 
3381   Level: developer
3382 
3383 .seealso: [](ch_ksp), `PetscFinalize()`
3384 @*/
3385 PetscErrorCode PCHPDDMFinalizePackage(void)
3386 {
3387   PetscFunctionBegin;
3388   PCHPDDMPackageInitialized = PETSC_FALSE;
3389   PetscFunctionReturn(PETSC_SUCCESS);
3390 }
3391 
3392 static PetscErrorCode MatMult_Harmonic(Mat A, Vec x, Vec y)
3393 {
3394   Harmonic h; /* [ A_00  A_01       ], furthermore, A_00 = [ A_loc,loc  A_loc,ovl ], thus, A_01 = [         ] */
3395               /* [ A_10  A_11  A_12 ]                      [ A_ovl,loc  A_ovl,ovl ]               [ A_ovl,1 ] */
3396   Vec sub;    /*  y = A x = R_loc R_0 [ A_00  A_01 ]^-1                                   R_loc = [  I_loc  ] */
3397               /*                      [ A_10  A_11 ]    R_1^T A_12 x                              [         ] */
3398   PetscFunctionBegin;
3399   PetscCall(MatShellGetContext(A, &h));
3400   PetscCall(VecSet(h->v, 0.0));
3401   PetscCall(VecGetSubVector(h->v, h->is[0], &sub));
3402   PetscCall(MatMult(h->A[0], x, sub));
3403   PetscCall(VecRestoreSubVector(h->v, h->is[0], &sub));
3404   PetscCall(KSPSolve(h->ksp, h->v, h->v));
3405   PetscCall(VecISCopy(h->v, h->is[1], SCATTER_REVERSE, y));
3406   PetscFunctionReturn(PETSC_SUCCESS);
3407 }
3408 
3409 static PetscErrorCode MatMultTranspose_Harmonic(Mat A, Vec y, Vec x)
3410 {
3411   Harmonic h;   /* x = A^T y =            [ A_00  A_01 ]^-T R_0^T R_loc^T y */
3412   Vec      sub; /*             A_12^T R_1 [ A_10  A_11 ]                    */
3413 
3414   PetscFunctionBegin;
3415   PetscCall(MatShellGetContext(A, &h));
3416   PetscCall(VecSet(h->v, 0.0));
3417   PetscCall(VecISCopy(h->v, h->is[1], SCATTER_FORWARD, y));
3418   PetscCall(KSPSolveTranspose(h->ksp, h->v, h->v));
3419   PetscCall(VecGetSubVector(h->v, h->is[0], &sub));
3420   PetscCall(MatMultTranspose(h->A[0], sub, x));
3421   PetscCall(VecRestoreSubVector(h->v, h->is[0], &sub));
3422   PetscFunctionReturn(PETSC_SUCCESS);
3423 }
3424 
3425 static PetscErrorCode MatProduct_AB_Harmonic(Mat S, Mat X, Mat Y, void *)
3426 {
3427   Harmonic h;
3428   Mat      A, B;
3429   Vec      a, b;
3430 
3431   PetscFunctionBegin;
3432   PetscCall(MatShellGetContext(S, &h));
3433   PetscCall(MatMatMult(h->A[0], X, MAT_INITIAL_MATRIX, PETSC_CURRENT, &A));
3434   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, A->cmap->n, nullptr, &B));
3435   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3436     PetscCall(MatDenseGetColumnVecRead(A, i, &a));
3437     PetscCall(MatDenseGetColumnVecWrite(B, i, &b));
3438     PetscCall(VecISCopy(b, h->is[0], SCATTER_FORWARD, a));
3439     PetscCall(MatDenseRestoreColumnVecWrite(B, i, &b));
3440     PetscCall(MatDenseRestoreColumnVecRead(A, i, &a));
3441   }
3442   PetscCall(MatDestroy(&A));
3443   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, B->cmap->n, nullptr, &A));
3444   PetscCall(KSPMatSolve(h->ksp, B, A));
3445   PetscCall(MatDestroy(&B));
3446   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3447     PetscCall(MatDenseGetColumnVecRead(A, i, &a));
3448     PetscCall(MatDenseGetColumnVecWrite(Y, i, &b));
3449     PetscCall(VecISCopy(a, h->is[1], SCATTER_REVERSE, b));
3450     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &b));
3451     PetscCall(MatDenseRestoreColumnVecRead(A, i, &a));
3452   }
3453   PetscCall(MatDestroy(&A));
3454   PetscFunctionReturn(PETSC_SUCCESS);
3455 }
3456 
3457 static PetscErrorCode MatProduct_AtB_Harmonic(Mat S, Mat Y, Mat X, void *)
3458 {
3459   Harmonic h;
3460   Mat      A, B;
3461   Vec      a, b;
3462 
3463   PetscFunctionBegin;
3464   PetscCall(MatShellGetContext(S, &h));
3465   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, Y->cmap->n, nullptr, &A));
3466   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3467     PetscCall(MatDenseGetColumnVecRead(Y, i, &b));
3468     PetscCall(MatDenseGetColumnVecWrite(A, i, &a));
3469     PetscCall(VecISCopy(a, h->is[1], SCATTER_FORWARD, b));
3470     PetscCall(MatDenseRestoreColumnVecWrite(A, i, &a));
3471     PetscCall(MatDenseRestoreColumnVecRead(Y, i, &b));
3472   }
3473   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, A->cmap->n, nullptr, &B));
3474   PetscCall(KSPMatSolveTranspose(h->ksp, A, B));
3475   PetscCall(MatDestroy(&A));
3476   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->A[0]->rmap->n, B->cmap->n, nullptr, &A));
3477   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3478     PetscCall(MatDenseGetColumnVecRead(B, i, &b));
3479     PetscCall(MatDenseGetColumnVecWrite(A, i, &a));
3480     PetscCall(VecISCopy(b, h->is[0], SCATTER_REVERSE, a));
3481     PetscCall(MatDenseRestoreColumnVecWrite(A, i, &a));
3482     PetscCall(MatDenseRestoreColumnVecRead(B, i, &b));
3483   }
3484   PetscCall(MatDestroy(&B));
3485   PetscCall(MatTransposeMatMult(h->A[0], A, MAT_REUSE_MATRIX, PETSC_CURRENT, &X));
3486   PetscCall(MatDestroy(&A));
3487   PetscFunctionReturn(PETSC_SUCCESS);
3488 }
3489 
3490 static PetscErrorCode MatDestroy_Harmonic(Mat A)
3491 {
3492   Harmonic h;
3493 
3494   PetscFunctionBegin;
3495   PetscCall(MatShellGetContext(A, &h));
3496   for (PetscInt i = 0; i < 5; ++i) PetscCall(ISDestroy(h->is + i));
3497   PetscCall(PetscFree(h->is));
3498   PetscCall(VecDestroy(&h->v));
3499   for (PetscInt i = 0; i < 2; ++i) PetscCall(MatDestroy(h->A + i));
3500   PetscCall(PetscFree(h->A));
3501   PetscCall(KSPDestroy(&h->ksp));
3502   PetscCall(PetscFree(h));
3503   PetscFunctionReturn(PETSC_SUCCESS);
3504 }
3505