xref: /petsc/src/ksp/pc/impls/hpddm/pchpddm.cxx (revision 2cbcbef77dda252e1c494ee04ccf7e93db94407d)
1 #include <petsc/private/sfimpl.h>
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/matimpl.h>
4 #include <petsc/private/petschpddm.h> /*I "petscpc.h" I*/
5 #include <petsc/private/pcimpl.h>
6 #include <petsc/private/dmimpl.h> /* this must be included after petschpddm.h so that DM_MAX_WORK_VECTORS is not defined  */
7                                   /* otherwise, it is assumed that one is compiling libhpddm_petsc => circular dependency */
8 
9 static PetscErrorCode (*loadedSym)(HPDDM::Schwarz<PetscScalar> *const, IS, Mat, Mat, Mat, std::vector<Vec>, PC_HPDDM_Level **const) = nullptr;
10 
11 static PetscBool PCHPDDMPackageInitialized = PETSC_FALSE;
12 
13 PetscLogEvent PC_HPDDM_Strc;
14 PetscLogEvent PC_HPDDM_PtAP;
15 PetscLogEvent PC_HPDDM_PtBP;
16 PetscLogEvent PC_HPDDM_Next;
17 PetscLogEvent PC_HPDDM_SetUp[PETSC_PCHPDDM_MAXLEVELS];
18 PetscLogEvent PC_HPDDM_Solve[PETSC_PCHPDDM_MAXLEVELS];
19 
20 const char *const PCHPDDMCoarseCorrectionTypes[] = {"DEFLATED", "ADDITIVE", "BALANCED", "NONE", "PCHPDDMCoarseCorrectionType", "PC_HPDDM_COARSE_CORRECTION_", nullptr};
21 const char *const PCHPDDMSchurPreTypes[]         = {"LEAST_SQUARES", "GENEO", "PCHPDDMSchurPreType", "PC_HPDDM_SCHUR_PRE", nullptr};
22 
23 static PetscErrorCode PCReset_HPDDM(PC pc)
24 {
25   PC_HPDDM *data = (PC_HPDDM *)pc->data;
26 
27   PetscFunctionBegin;
28   if (data->levels) {
29     for (PetscInt i = 0; i < PETSC_PCHPDDM_MAXLEVELS && data->levels[i]; ++i) {
30       PetscCall(KSPDestroy(&data->levels[i]->ksp));
31       PetscCall(PCDestroy(&data->levels[i]->pc));
32       PetscCall(PetscFree(data->levels[i]));
33     }
34     PetscCall(PetscFree(data->levels));
35   }
36   PetscCall(ISDestroy(&data->is));
37   PetscCall(MatDestroy(&data->aux));
38   PetscCall(MatDestroy(&data->B));
39   PetscCall(VecDestroy(&data->normal));
40   data->correction = PC_HPDDM_COARSE_CORRECTION_DEFLATED;
41   data->Neumann    = PETSC_BOOL3_UNKNOWN;
42   data->deflation  = PETSC_FALSE;
43   data->setup      = nullptr;
44   data->setup_ctx  = nullptr;
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 static PetscErrorCode PCDestroy_HPDDM(PC pc)
49 {
50   PC_HPDDM *data = (PC_HPDDM *)pc->data;
51 
52   PetscFunctionBegin;
53   PetscCall(PCReset_HPDDM(pc));
54   PetscCall(PetscFree(data));
55   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, nullptr));
56   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetAuxiliaryMat_C", nullptr));
57   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMHasNeumannMat_C", nullptr));
58   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetRHSMat_C", nullptr));
59   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetCoarseCorrectionType_C", nullptr));
60   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetCoarseCorrectionType_C", nullptr));
61   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetSTShareSubKSP_C", nullptr));
62   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetSTShareSubKSP_C", nullptr));
63   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetDeflationMat_C", nullptr));
64   PetscCall(PetscObjectCompose((PetscObject)pc, "_PCHPDDM_Schur", nullptr));
65   PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 
68 static inline PetscErrorCode PCHPDDMSetAuxiliaryMat_Private(PC pc, IS is, Mat A, PetscBool deflation)
69 {
70   PC_HPDDM                   *data = (PC_HPDDM *)pc->data;
71   PCHPDDMCoarseCorrectionType type = data->correction;
72 
73   PetscFunctionBegin;
74   PetscValidLogicalCollectiveBool(pc, deflation, 4);
75   if (is && A) {
76     PetscInt m[2];
77 
78     PetscCall(ISGetLocalSize(is, m));
79     PetscCall(MatGetLocalSize(A, m + 1, nullptr));
80     PetscCheck(m[0] == m[1], PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Inconsistent IS and Mat sizes (%" PetscInt_FMT " v. %" PetscInt_FMT ")", m[0], m[1]);
81   }
82   if (is) {
83     PetscCall(PetscObjectReference((PetscObject)is));
84     if (data->is) { /* new overlap definition resets the PC */
85       PetscCall(PCReset_HPDDM(pc));
86       pc->setfromoptionscalled = 0;
87       pc->setupcalled          = PETSC_FALSE;
88       data->correction         = type;
89     }
90     PetscCall(ISDestroy(&data->is));
91     data->is = is;
92   }
93   if (A) {
94     PetscCall(PetscObjectReference((PetscObject)A));
95     PetscCall(MatDestroy(&data->aux));
96     data->aux = A;
97   }
98   data->deflation = deflation;
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 static inline PetscErrorCode PCHPDDMSplittingMatNormal_Private(Mat A, IS *is, Mat *splitting[])
103 {
104   Mat *sub;
105   IS   zero;
106 
107   PetscFunctionBegin;
108   PetscCall(MatSetOption(A, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
109   PetscCall(MatCreateSubMatrices(A, 1, is + 2, is, MAT_INITIAL_MATRIX, splitting));
110   PetscCall(MatCreateSubMatrices(**splitting, 1, is + 2, is + 1, MAT_INITIAL_MATRIX, &sub));
111   PetscCall(MatFindZeroRows(*sub, &zero));
112   PetscCall(MatDestroySubMatrices(1, &sub));
113   PetscCall(MatSetOption(**splitting, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
114   PetscCall(MatZeroRowsIS(**splitting, zero, 0.0, nullptr, nullptr));
115   PetscCall(ISDestroy(&zero));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 static inline PetscErrorCode PCHPDDMSetAuxiliaryMatNormal_Private(PC pc, Mat A, Mat N, Mat *B, const char *pcpre, Vec *diagonal = nullptr, Mat B01 = nullptr)
120 {
121   PC_HPDDM *data         = (PC_HPDDM *)pc->data;
122   Mat      *splitting[2] = {}, aux;
123   Vec       d;
124   IS        is[3];
125   PetscReal norm;
126   PetscBool flg;
127   char      type[256] = {}; /* same size as in src/ksp/pc/interface/pcset.c */
128 
129   PetscFunctionBegin;
130   if (!B01) PetscCall(MatConvert(N, MATAIJ, MAT_INITIAL_MATRIX, B));
131   else PetscCall(MatTransposeMatMult(B01, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, B));
132   PetscCall(MatEliminateZeros(*B, PETSC_TRUE));
133   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->cmap->n, A->cmap->rstart, 1, is));
134   PetscCall(MatIncreaseOverlap(*B, 1, is, 1));
135   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->cmap->n, A->cmap->rstart, 1, is + 2));
136   PetscCall(ISEmbed(is[0], is[2], PETSC_TRUE, is + 1));
137   PetscCall(ISDestroy(is + 2));
138   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->rmap->N, 0, 1, is + 2));
139   PetscCall(PCHPDDMSplittingMatNormal_Private(A, is, &splitting[0]));
140   if (B01) {
141     PetscCall(PCHPDDMSplittingMatNormal_Private(B01, is, &splitting[1]));
142     PetscCall(MatDestroy(&B01));
143   }
144   PetscCall(ISDestroy(is + 2));
145   PetscCall(ISDestroy(is + 1));
146   PetscCall(PetscOptionsGetString(((PetscObject)pc)->options, pcpre, "-pc_hpddm_levels_1_sub_pc_type", type, sizeof(type), nullptr));
147   PetscCall(PetscStrcmp(type, PCQR, &flg));
148   if (!flg) {
149     Mat conjugate = *splitting[splitting[1] ? 1 : 0];
150 
151     if (PetscDefined(USE_COMPLEX) && !splitting[1]) {
152       PetscCall(MatDuplicate(*splitting[0], MAT_COPY_VALUES, &conjugate));
153       PetscCall(MatConjugate(conjugate));
154     }
155     PetscCall(MatTransposeMatMult(conjugate, *splitting[0], MAT_INITIAL_MATRIX, PETSC_DETERMINE, &aux));
156     if (PetscDefined(USE_COMPLEX) && !splitting[1]) PetscCall(MatDestroy(&conjugate));
157     else if (splitting[1]) PetscCall(MatDestroySubMatrices(1, &splitting[1]));
158     PetscCall(MatNorm(aux, NORM_FROBENIUS, &norm));
159     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
160     if (diagonal) {
161       PetscReal norm;
162 
163       PetscCall(VecScale(*diagonal, -1.0));
164       PetscCall(VecNorm(*diagonal, NORM_INFINITY, &norm));
165       if (norm > PETSC_SMALL) {
166         PetscSF  scatter;
167         PetscInt n;
168 
169         PetscCall(ISGetLocalSize(*is, &n));
170         PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), n, PETSC_DECIDE, &d));
171         PetscCall(VecScatterCreate(*diagonal, *is, d, nullptr, &scatter));
172         PetscCall(VecScatterBegin(scatter, *diagonal, d, INSERT_VALUES, SCATTER_FORWARD));
173         PetscCall(VecScatterEnd(scatter, *diagonal, d, INSERT_VALUES, SCATTER_FORWARD));
174         PetscCall(PetscSFDestroy(&scatter));
175         PetscCall(MatDiagonalSet(aux, d, ADD_VALUES));
176         PetscCall(VecDestroy(&d));
177       } else PetscCall(VecDestroy(diagonal));
178     }
179     if (!diagonal) PetscCall(MatShift(aux, PETSC_SMALL * norm));
180     PetscCall(MatEliminateZeros(aux, PETSC_TRUE));
181   } else {
182     PetscBool flg;
183 
184     PetscCheck(!splitting[1], PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot use PCQR when A01 != A10^T");
185     if (diagonal) {
186       PetscCall(VecNorm(*diagonal, NORM_INFINITY, &norm));
187       PetscCheck(norm < PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Nonzero diagonal A11 block");
188       PetscCall(VecDestroy(diagonal));
189     }
190     PetscCall(PetscObjectTypeCompare((PetscObject)N, MATNORMAL, &flg));
191     if (flg) PetscCall(MatCreateNormal(*splitting[0], &aux));
192     else PetscCall(MatCreateNormalHermitian(*splitting[0], &aux));
193   }
194   PetscCall(MatDestroySubMatrices(1, &splitting[0]));
195   PetscCall(PCHPDDMSetAuxiliaryMat(pc, *is, aux, nullptr, nullptr));
196   data->Neumann = PETSC_BOOL3_TRUE;
197   PetscCall(ISDestroy(is));
198   PetscCall(MatDestroy(&aux));
199   PetscFunctionReturn(PETSC_SUCCESS);
200 }
201 
202 static PetscErrorCode PCHPDDMSetAuxiliaryMat_HPDDM(PC pc, IS is, Mat A, PetscErrorCode (*setup)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void *setup_ctx)
203 {
204   PC_HPDDM *data = (PC_HPDDM *)pc->data;
205 
206   PetscFunctionBegin;
207   PetscCall(PCHPDDMSetAuxiliaryMat_Private(pc, is, A, PETSC_FALSE));
208   if (setup) {
209     data->setup     = setup;
210     data->setup_ctx = setup_ctx;
211   }
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 /*@C
216   PCHPDDMSetAuxiliaryMat - Sets the auxiliary matrix used by `PCHPDDM` for the concurrent GenEO problems at the finest level.
217 
218   Input Parameters:
219 + pc    - preconditioner context
220 . is    - index set of the local auxiliary, e.g., Neumann, matrix
221 . A     - auxiliary sequential matrix
222 . setup - function for generating the auxiliary matrix entries, may be `NULL`
223 - ctx   - context for `setup`, may be `NULL`
224 
225   Calling sequence of `setup`:
226 + J   - matrix whose values are to be set
227 . t   - time
228 . X   - linearization point
229 . X_t - time-derivative of the linearization point
230 . s   - step
231 . ovl - index set of the local auxiliary, e.g., Neumann, matrix
232 - ctx - context for `setup`, may be `NULL`
233 
234   Level: intermediate
235 
236   Note:
237   As an example, in a finite element context with nonoverlapping subdomains plus (overlapping) ghost elements, this could be the unassembled (Neumann)
238   local overlapping operator. As opposed to the assembled (Dirichlet) local overlapping operator obtained by summing neighborhood contributions
239   at the interface of ghost elements.
240 
241   Fortran Notes:
242   Only `PETSC_NULL_FUNCTION` is supported for `setup` and `ctx` is never accessed
243 
244 .seealso: [](ch_ksp), `PCHPDDM`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCHPDDMSetRHSMat()`, `MATIS`
245 @*/
246 PetscErrorCode PCHPDDMSetAuxiliaryMat(PC pc, IS is, Mat A, PetscErrorCode (*setup)(Mat J, PetscReal t, Vec X, Vec X_t, PetscReal s, IS ovl, PetscCtx ctx), PetscCtx ctx)
247 {
248   PetscFunctionBegin;
249   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
250   if (is) PetscValidHeaderSpecific(is, IS_CLASSID, 2);
251   if (A) PetscValidHeaderSpecific(A, MAT_CLASSID, 3);
252   PetscTryMethod(pc, "PCHPDDMSetAuxiliaryMat_C", (PC, IS, Mat, PetscErrorCode (*)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void *), (pc, is, A, setup, ctx));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 static PetscErrorCode PCHPDDMHasNeumannMat_HPDDM(PC pc, PetscBool has)
257 {
258   PC_HPDDM *data = (PC_HPDDM *)pc->data;
259 
260   PetscFunctionBegin;
261   data->Neumann = PetscBoolToBool3(has);
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@
266   PCHPDDMHasNeumannMat - Informs `PCHPDDM` that the `Mat` passed to `PCHPDDMSetAuxiliaryMat()` is the local Neumann matrix.
267 
268   Input Parameters:
269 + pc  - preconditioner context
270 - has - Boolean value
271 
272   Level: intermediate
273 
274   Notes:
275   This may be used to bypass a call to `MatCreateSubMatrices()` and to `MatConvert()` for `MATSBAIJ` matrices.
276 
277   If a function is composed with DMCreateNeumannOverlap_C implementation is available in the `DM` attached to the Pmat, or the Amat, or the `PC`, the flag is internally set to `PETSC_TRUE`. Its default value is otherwise `PETSC_FALSE`.
278 
279 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMSetAuxiliaryMat()`
280 @*/
281 PetscErrorCode PCHPDDMHasNeumannMat(PC pc, PetscBool has)
282 {
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
285   PetscTryMethod(pc, "PCHPDDMHasNeumannMat_C", (PC, PetscBool), (pc, has));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode PCHPDDMSetRHSMat_HPDDM(PC pc, Mat B)
290 {
291   PC_HPDDM *data = (PC_HPDDM *)pc->data;
292 
293   PetscFunctionBegin;
294   PetscCall(PetscObjectReference((PetscObject)B));
295   PetscCall(MatDestroy(&data->B));
296   data->B = B;
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /*@
301   PCHPDDMSetRHSMat - Sets the right-hand side matrix used by `PCHPDDM` for the concurrent GenEO problems at the finest level.
302 
303   Input Parameters:
304 + pc - preconditioner context
305 - B  - right-hand side sequential matrix
306 
307   Level: advanced
308 
309   Note:
310   Must be used in conjunction with `PCHPDDMSetAuxiliaryMat`(N), so that Nv = lambda Bv is solved using `EPSSetOperators`(N, B).
311   It is assumed that N and `B` are provided using the same numbering. This provides a means to try more advanced methods such as GenEO-II or H-GenEO.
312 
313 .seealso: [](ch_ksp), `PCHPDDMSetAuxiliaryMat()`, `PCHPDDM`
314 @*/
315 PetscErrorCode PCHPDDMSetRHSMat(PC pc, Mat B)
316 {
317   PetscFunctionBegin;
318   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
319   if (B) {
320     PetscValidHeaderSpecific(B, MAT_CLASSID, 2);
321     PetscTryMethod(pc, "PCHPDDMSetRHSMat_C", (PC, Mat), (pc, B));
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode PCSetFromOptions_HPDDM(PC pc, PetscOptionItems PetscOptionsObject)
327 {
328   PC_HPDDM                   *data   = (PC_HPDDM *)pc->data;
329   PC_HPDDM_Level            **levels = data->levels;
330   char                        prefix[256], deprecated[256];
331   int                         i = 1;
332   PetscMPIInt                 size, previous;
333   PetscInt                    n, overlap = 1;
334   PCHPDDMCoarseCorrectionType type;
335   PetscBool                   flg = PETSC_TRUE, set;
336 
337   PetscFunctionBegin;
338   if (!data->levels) {
339     PetscCall(PetscCalloc1(PETSC_PCHPDDM_MAXLEVELS, &levels));
340     data->levels = levels;
341   }
342   PetscOptionsHeadBegin(PetscOptionsObject, "PCHPDDM options");
343   PetscCall(PetscOptionsBoundedInt("-pc_hpddm_harmonic_overlap", "Overlap prior to computing local harmonic extensions", "PCHPDDM", overlap, &overlap, &set, 1));
344   if (!set) overlap = -1;
345   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
346   previous = size;
347   while (i < PETSC_PCHPDDM_MAXLEVELS) {
348     PetscInt p = 1;
349 
350     if (!data->levels[i - 1]) PetscCall(PetscNew(data->levels + i - 1));
351     data->levels[i - 1]->parent = data;
352     /* if the previous level has a single process, it is not possible to coarsen further */
353     if (previous == 1 || !flg) break;
354     data->levels[i - 1]->nu        = 0;
355     data->levels[i - 1]->threshold = -1.0;
356     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_nev", i));
357     PetscCall(PetscOptionsBoundedInt(prefix, "Local number of deflation vectors computed by SLEPc", "EPSSetDimensions", data->levels[i - 1]->nu, &data->levels[i - 1]->nu, nullptr, 0));
358     PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_eps_threshold", i));
359     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_absolute", i));
360     PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
361     PetscCall(PetscOptionsReal(prefix, "Local absolute threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[i - 1]->threshold, &data->levels[i - 1]->threshold, nullptr));
362     if (i == 1) {
363       PetscCheck(overlap == -1 || PetscAbsReal(data->levels[i - 1]->threshold + static_cast<PetscReal>(1.0)) < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_threshold_absolute and -pc_hpddm_harmonic_overlap");
364       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_nsv", i));
365       if (overlap != -1) {
366         PetscInt  nsv    = 0;
367         PetscBool set[2] = {PETSC_FALSE, PETSC_FALSE};
368 
369         PetscCall(PetscOptionsBoundedInt(prefix, "Local number of deflation vectors computed by SLEPc", "SVDSetDimensions", nsv, &nsv, nullptr, 0));
370         PetscCheck(data->levels[0]->nu == 0 || nsv == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_nev and -pc_hpddm_levels_1_svd_nsv");
371         if (data->levels[0]->nu == 0) { /* -eps_nev has not been used, so nu is 0 */
372           data->levels[0]->nu = nsv;    /* nu may still be 0 if -svd_nsv has not been used */
373           PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_svd_relative_threshold", i));
374           PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_threshold_relative", i));
375           PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
376           PetscCall(PetscOptionsReal(prefix, "Local relative threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[0]->threshold, &data->levels[0]->threshold, set)); /* cache whether this option has been used or not to error out in case of exclusive options being used simultaneously later on */
377         }
378         if (data->levels[0]->nu == 0 || nsv == 0) { /* if neither -eps_nev nor -svd_nsv has been used */
379           PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_eps_relative_threshold", i));
380           PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_relative", i));
381           PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
382           PetscCall(PetscOptionsReal(prefix, "Local relative threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[0]->threshold, &data->levels[0]->threshold, set + 1));
383           PetscCheck(!set[0] || !set[1], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_threshold_relative and -pc_hpddm_levels_1_svd_threshold_relative");
384         }
385         PetscCheck(data->levels[0]->nu || PetscAbsReal(data->levels[i - 1]->threshold + static_cast<PetscReal>(1.0)) > PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Need to supply at least one of 1) -pc_hpddm_levels_1_eps_nev, 2) -pc_hpddm_levels_1_svd_nsv, 3) -pc_hpddm_levels_1_eps_threshold_relative, 4) -pc_hpddm_levels_1_svd_threshold_relative (for nonsymmetric matrices, only option 2 and option 4 are appropriate)");
386       } else if (PetscDefined(USE_DEBUG)) {
387         PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
388         PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply -%spc_hpddm_levels_%d_svd_nsv without -%spc_hpddm_harmonic_overlap", PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "", i,
389                    PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "");
390         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_threshold_relative", i));
391         PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
392         PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply -%spc_hpddm_levels_%d_svd_threshold_relative without -%spc_hpddm_harmonic_overlap", PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "", i,
393                    PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "");
394         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_relative", i));
395         PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
396         PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply -%spc_hpddm_levels_%d_eps_threshold_relative without -%spc_hpddm_harmonic_overlap, maybe you meant to use -%spc_hpddm_levels_%d_eps_threshold_absolute?",
397                    PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "", i, PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "", PetscOptionsObject->prefix ? PetscOptionsObject->prefix : "", i);
398       }
399       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_1_st_share_sub_ksp"));
400       PetscCall(PetscOptionsBool(prefix, "Shared KSP between SLEPc ST and the fine-level subdomain solver", "PCHPDDMSetSTShareSubKSP", PETSC_FALSE, &data->share, nullptr));
401     }
402     /* if there is no prescribed coarsening, just break out of the loop */
403     if (data->levels[i - 1]->threshold <= PetscReal() && data->levels[i - 1]->nu <= 0 && !(data->deflation && i == 1)) break;
404     else {
405       ++i;
406       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_nev", i));
407       PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
408       if (!flg) {
409         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_absolute", i));
410         PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
411       }
412       if (flg) {
413         /* if there are coarsening options for the next level, then register it  */
414         /* otherwise, don't to avoid having both options levels_N_p and coarse_p */
415         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_p", i));
416         PetscCall(PetscOptionsRangeInt(prefix, "Number of processes used to assemble the coarse operator at this level", "PCHPDDM", p, &p, &flg, 1, PetscMax(1, previous / 2)));
417         previous = p;
418       }
419     }
420   }
421   data->N = i;
422   n       = 1;
423   if (i > 1) {
424     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_coarse_p"));
425     PetscCall(PetscOptionsRangeInt(prefix, "Number of processes used to assemble the coarsest operator", "PCHPDDM", n, &n, nullptr, 1, PetscMax(1, previous / 2)));
426 #if PetscDefined(HAVE_MUMPS)
427     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "pc_hpddm_coarse_"));
428     PetscCall(PetscOptionsHasName(PetscOptionsObject->options, prefix, "-mat_mumps_use_omp_threads", &flg));
429     if (flg) {
430       char type[64]; /* same size as in src/ksp/pc/impls/factor/factimpl.c */
431 
432       PetscCall(PetscStrncpy(type, n > 1 && PetscDefined(HAVE_MUMPS) ? MATSOLVERMUMPS : MATSOLVERPETSC, sizeof(type))); /* default solver for a MatMPIAIJ or a MatSeqAIJ */
433       PetscCall(PetscOptionsGetString(PetscOptionsObject->options, prefix, "-pc_factor_mat_solver_type", type, sizeof(type), nullptr));
434       PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
435       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-%smat_mumps_use_omp_threads and -%spc_factor_mat_solver_type != %s", prefix, prefix, MATSOLVERMUMPS);
436       size = n;
437       n    = -1;
438       PetscCall(PetscOptionsGetInt(PetscOptionsObject->options, prefix, "-mat_mumps_use_omp_threads", &n, nullptr));
439       PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Need to specify a positive integer for -%smat_mumps_use_omp_threads", prefix);
440       PetscCheck(n * size <= previous, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%d MPI process%s x %d OpenMP thread%s greater than %d available MPI process%s for the coarsest operator", (int)size, size > 1 ? "es" : "", (int)n, n > 1 ? "s" : "", (int)previous, previous > 1 ? "es" : "");
441     }
442 #endif
443     PetscCall(PetscOptionsEnum("-pc_hpddm_coarse_correction", "Type of coarse correction applied each iteration", "PCHPDDMSetCoarseCorrectionType", PCHPDDMCoarseCorrectionTypes, (PetscEnum)data->correction, (PetscEnum *)&type, &flg));
444     if (flg) PetscCall(PCHPDDMSetCoarseCorrectionType(pc, type));
445     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_has_neumann"));
446     PetscCall(PetscOptionsBool(prefix, "Is the auxiliary Mat the local Neumann matrix?", "PCHPDDMHasNeumannMat", PetscBool3ToBool(data->Neumann), &flg, &set));
447     if (set) data->Neumann = PetscBoolToBool3(flg);
448     data->log_separate = PETSC_FALSE;
449     if (PetscDefined(USE_LOG)) {
450       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_log_separate"));
451       PetscCall(PetscOptionsBool(prefix, "Log events level by level instead of inside PCSetUp()/KSPSolve()", nullptr, data->log_separate, &data->log_separate, nullptr));
452     }
453   }
454   PetscOptionsHeadEnd();
455   while (i < PETSC_PCHPDDM_MAXLEVELS && data->levels[i]) PetscCall(PetscFree(data->levels[i++]));
456   PetscFunctionReturn(PETSC_SUCCESS);
457 }
458 
459 template <bool transpose>
460 static PetscErrorCode PCApply_HPDDM(PC pc, Vec x, Vec y)
461 {
462   PC_HPDDM *data = (PC_HPDDM *)pc->data;
463 
464   PetscFunctionBegin;
465   PetscCall(PetscCitationsRegister(HPDDMCitation, &HPDDMCite));
466   PetscCheck(data->levels[0]->ksp, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No KSP attached to PCHPDDM");
467   if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_Solve[0], data->levels[0]->ksp, nullptr, nullptr, nullptr)); /* coarser-level events are directly triggered in HPDDM */
468   if (!transpose) PetscCall(KSPSolve(data->levels[0]->ksp, x, y));
469   else PetscCall(KSPSolveTranspose(data->levels[0]->ksp, x, y));
470   if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Solve[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
471   PetscFunctionReturn(PETSC_SUCCESS);
472 }
473 
474 template <bool transpose>
475 static PetscErrorCode PCMatApply_HPDDM(PC pc, Mat X, Mat Y)
476 {
477   PC_HPDDM *data = (PC_HPDDM *)pc->data;
478 
479   PetscFunctionBegin;
480   PetscCall(PetscCitationsRegister(HPDDMCitation, &HPDDMCite));
481   PetscCheck(data->levels[0]->ksp, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No KSP attached to PCHPDDM");
482   if (!transpose) PetscCall(KSPMatSolve(data->levels[0]->ksp, X, Y));
483   else PetscCall(KSPMatSolveTranspose(data->levels[0]->ksp, X, Y));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
487 /*@
488   PCHPDDMGetComplexities - Computes the grid and operator complexities.
489 
490   Collective
491 
492   Input Parameter:
493 . pc - preconditioner context
494 
495   Output Parameters:
496 + gc - grid complexity $ \sum_i m_i / m_1 $
497 - oc - operator complexity $ \sum_i nnz_i / nnz_1 $
498 
499   Level: advanced
500 
501 .seealso: [](ch_ksp), `PCMGGetGridComplexity()`, `PCHPDDM`, `PCHYPRE`, `PCGAMG`
502 @*/
503 PetscErrorCode PCHPDDMGetComplexities(PC pc, PetscReal *gc, PetscReal *oc)
504 {
505   PC_HPDDM      *data = (PC_HPDDM *)pc->data;
506   MatInfo        info;
507   PetscLogDouble accumulate[2]{}, nnz1 = 1.0, m1 = 1.0;
508 
509   PetscFunctionBegin;
510   if (gc) {
511     PetscAssertPointer(gc, 2);
512     *gc = 0;
513   }
514   if (oc) {
515     PetscAssertPointer(oc, 3);
516     *oc = 0;
517   }
518   for (PetscInt n = 0; n < data->N; ++n) {
519     if (data->levels[n]->ksp) {
520       Mat       P, A = nullptr;
521       PetscInt  m;
522       PetscBool flg = PETSC_FALSE;
523 
524       PetscCall(KSPGetOperators(data->levels[n]->ksp, nullptr, &P));
525       PetscCall(MatGetSize(P, &m, nullptr));
526       accumulate[0] += m;
527       if (n == 0) {
528         PetscCall(PetscObjectTypeCompareAny((PetscObject)P, &flg, MATNORMAL, MATNORMALHERMITIAN, ""));
529         if (flg) {
530           PetscCall(MatConvert(P, MATAIJ, MAT_INITIAL_MATRIX, &A));
531           P = A;
532         } else {
533           PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
534           PetscCall(PetscObjectReference((PetscObject)P));
535         }
536       }
537       if (!A && flg) accumulate[1] += m * m; /* assumption that a MATSCHURCOMPLEMENT is dense if stored explicitly */
538       else if (P->ops->getinfo) {
539         PetscCall(MatGetInfo(P, MAT_GLOBAL_SUM, &info));
540         accumulate[1] += info.nz_used;
541       }
542       if (n == 0) {
543         m1 = m;
544         if (!A && flg) nnz1 = m * m;
545         else if (P->ops->getinfo) nnz1 = info.nz_used;
546         PetscCall(MatDestroy(&P));
547       }
548     }
549   }
550   /* only process #0 has access to the full hierarchy by construction, so broadcast to ensure consistent outputs */
551   PetscCallMPI(MPI_Bcast(accumulate, 2, MPIU_PETSCLOGDOUBLE, 0, PetscObjectComm((PetscObject)pc)));
552   if (gc) *gc = static_cast<PetscReal>(accumulate[0] / m1);
553   if (oc) *oc = static_cast<PetscReal>(accumulate[1] / nnz1);
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode PCView_HPDDM(PC pc, PetscViewer viewer)
558 {
559   PC_HPDDM         *data = (PC_HPDDM *)pc->data;
560   PetscViewer       subviewer;
561   PetscViewerFormat format;
562   PetscSubcomm      subcomm;
563   PetscReal         oc, gc;
564   PetscInt          tabs;
565   PetscMPIInt       size, color, rank;
566   PetscBool         flg;
567   const char       *name;
568 
569   PetscFunctionBegin;
570   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &flg));
571   if (flg) {
572     PetscCall(PetscViewerASCIIPrintf(viewer, "level%s: %" PetscInt_FMT "\n", data->N > 1 ? "s" : "", data->N));
573     PetscCall(PCHPDDMGetComplexities(pc, &gc, &oc));
574     if (data->N > 1) {
575       if (!data->deflation) {
576         PetscCall(PetscViewerASCIIPrintf(viewer, "Neumann matrix attached? %s\n", PetscBools[PetscBool3ToBool(data->Neumann)]));
577         PetscCall(PetscViewerASCIIPrintf(viewer, "shared subdomain KSP between SLEPc and PETSc? %s\n", PetscBools[data->share]));
578       } else PetscCall(PetscViewerASCIIPrintf(viewer, "user-supplied deflation matrix\n"));
579       PetscCall(PetscViewerASCIIPrintf(viewer, "coarse correction: %s\n", PCHPDDMCoarseCorrectionTypes[data->correction]));
580       PetscCall(PetscViewerASCIIPrintf(viewer, "on process #0, value%s (+ threshold%s if available) for selecting deflation vectors:", data->N > 2 ? "s" : "", data->N > 2 ? "s" : ""));
581       PetscCall(PetscViewerASCIIGetTab(viewer, &tabs));
582       PetscCall(PetscViewerASCIISetTab(viewer, 0));
583       for (PetscInt i = 1; i < data->N; ++i) {
584         PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT, data->levels[i - 1]->nu));
585         if (data->levels[i - 1]->threshold > static_cast<PetscReal>(-0.1)) PetscCall(PetscViewerASCIIPrintf(viewer, " (%g)", (double)data->levels[i - 1]->threshold));
586       }
587       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
588       PetscCall(PetscViewerASCIISetTab(viewer, tabs));
589     }
590     PetscCall(PetscViewerASCIIPrintf(viewer, "grid and operator complexities: %g %g\n", (double)gc, (double)oc));
591     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
592     if (data->levels[0]->ksp) {
593       PetscCall(KSPView(data->levels[0]->ksp, viewer));
594       if (data->levels[0]->pc) PetscCall(PCView(data->levels[0]->pc, viewer));
595       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
596       for (PetscInt i = 1; i < data->N; ++i) {
597         if (data->levels[i]->ksp) color = 1;
598         else color = 0;
599         PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)pc), &subcomm));
600         PetscCall(PetscSubcommSetNumber(subcomm, PetscMin(size, 2)));
601         PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
602         PetscCall(PetscViewerASCIIPushTab(viewer));
603         PetscCall(PetscViewerGetSubViewer(viewer, PetscSubcommChild(subcomm), &subviewer));
604         if (color == 1) {
605           PetscCall(KSPView(data->levels[i]->ksp, subviewer));
606           if (data->levels[i]->pc) PetscCall(PCView(data->levels[i]->pc, subviewer));
607           PetscCall(PetscViewerFlush(subviewer));
608         }
609         PetscCall(PetscViewerRestoreSubViewer(viewer, PetscSubcommChild(subcomm), &subviewer));
610         PetscCall(PetscViewerASCIIPopTab(viewer));
611         PetscCall(PetscSubcommDestroy(&subcomm));
612       }
613     }
614     PetscCall(PetscViewerGetFormat(viewer, &format));
615     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
616       PetscCall(PetscViewerFileGetName(viewer, &name));
617       if (name) {
618         Mat             aux[2];
619         IS              is;
620         const PetscInt *indices;
621         PetscInt        m, n, sizes[5] = {pc->mat->rmap->n, pc->mat->cmap->n, pc->mat->rmap->N, pc->mat->cmap->N, 0};
622         char           *tmp;
623         std::string     prefix, suffix;
624         size_t          pos;
625 
626         PetscCall(PetscStrstr(name, ".", &tmp));
627         if (tmp) {
628           pos    = std::distance(const_cast<char *>(name), tmp);
629           prefix = std::string(name, pos);
630           suffix = std::string(name + pos + 1);
631         } else prefix = name;
632         if (data->aux) {
633           PetscCall(MatGetSize(data->aux, &m, &n));
634           PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), aux));
635           PetscCall(MatSetSizes(aux[0], m, n, PETSC_DETERMINE, PETSC_DETERMINE));
636           PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQAIJ, &flg));
637           if (flg) PetscCall(MatSetType(aux[0], MATMPIAIJ));
638           else {
639             PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQBAIJ, &flg));
640             if (flg) PetscCall(MatSetType(aux[0], MATMPIBAIJ));
641             else {
642               PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQSBAIJ, &flg));
643               PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "MatType of auxiliary Mat (%s) is not any of the following: MATSEQAIJ, MATSEQBAIJ, or MATSEQSBAIJ", ((PetscObject)data->aux)->type_name);
644               PetscCall(MatSetType(aux[0], MATMPISBAIJ));
645             }
646           }
647           PetscCall(MatSetBlockSizesFromMats(aux[0], data->aux, data->aux));
648           PetscCall(MatAssemblyBegin(aux[0], MAT_FINAL_ASSEMBLY));
649           PetscCall(MatAssemblyEnd(aux[0], MAT_FINAL_ASSEMBLY));
650           PetscCall(MatGetDiagonalBlock(aux[0], aux + 1));
651           PetscCall(MatCopy(data->aux, aux[1], DIFFERENT_NONZERO_PATTERN));
652           PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_aux_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
653           PetscCall(MatView(aux[0], subviewer));
654           PetscCall(PetscViewerDestroy(&subviewer));
655           PetscCall(MatDestroy(aux));
656         }
657         if (data->is) {
658           PetscCall(ISGetIndices(data->is, &indices));
659           PetscCall(ISGetSize(data->is, sizes + 4));
660           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), sizes[4], indices, PETSC_USE_POINTER, &is));
661           PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_is_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
662           PetscCall(ISView(is, subviewer));
663           PetscCall(PetscViewerDestroy(&subviewer));
664           PetscCall(ISDestroy(&is));
665           PetscCall(ISRestoreIndices(data->is, &indices));
666         }
667         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), PETSC_STATIC_ARRAY_LENGTH(sizes), sizes, PETSC_USE_POINTER, &is));
668         PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_sizes_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
669         PetscCall(ISView(is, subviewer));
670         PetscCall(PetscViewerDestroy(&subviewer));
671         PetscCall(ISDestroy(&is));
672       }
673     }
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 static PetscErrorCode PCPreSolve_HPDDM(PC pc, KSP ksp, Vec, Vec)
679 {
680   PC_HPDDM *data = (PC_HPDDM *)pc->data;
681   Mat       A;
682   PetscBool flg;
683 
684   PetscFunctionBegin;
685   if (ksp) {
686     PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPLSQR, &flg));
687     if (flg && !data->normal) {
688       PetscCall(KSPGetOperators(ksp, &A, nullptr));
689       PetscCall(MatCreateVecs(A, nullptr, &data->normal)); /* temporary Vec used in PCApply_HPDDMShell() for coarse grid corrections */
690     } else if (!flg) {
691       PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp, &flg, KSPCG, KSPGROPPCG, KSPPIPECG, KSPPIPECGRR, KSPPIPELCG, KSPPIPEPRCG, KSPPIPECG2, KSPSTCG, KSPFCG, KSPPIPEFCG, KSPMINRES, KSPNASH, KSPSYMMLQ, ""));
692       if (!flg) {
693         PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPHPDDM, &flg));
694         if (flg) {
695           KSPHPDDMType type;
696 
697           PetscCall(KSPHPDDMGetType(ksp, &type));
698           flg = (type == KSP_HPDDM_TYPE_CG || type == KSP_HPDDM_TYPE_BCG || type == KSP_HPDDM_TYPE_BFBCG ? PETSC_TRUE : PETSC_FALSE);
699         }
700       }
701     }
702     if (flg) {
703       if (data->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) {
704         PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, ((PetscObject)pc)->prefix, "-pc_hpddm_coarse_correction", &flg));
705         PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "PCHPDDMCoarseCorrectionType %s is known to be not symmetric, but KSPType %s requires a symmetric PC, if you insist on using this configuration, use the additional option -%spc_hpddm_coarse_correction %s, or alternatively, switch to a symmetric PCHPDDMCoarseCorrectionType such as %s",
706                    PCHPDDMCoarseCorrectionTypes[data->correction], ((PetscObject)ksp)->type_name, ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "", PCHPDDMCoarseCorrectionTypes[data->correction], PCHPDDMCoarseCorrectionTypes[PC_HPDDM_COARSE_CORRECTION_BALANCED]);
707       }
708       for (PetscInt n = 0; n < data->N; ++n) {
709         if (data->levels[n]->pc) {
710           PetscCall(PetscObjectTypeCompare((PetscObject)data->levels[n]->pc, PCASM, &flg));
711           if (flg) {
712             PCASMType type;
713 
714             PetscCall(PCASMGetType(data->levels[n]->pc, &type));
715             if (type == PC_ASM_RESTRICT || type == PC_ASM_INTERPOLATE) {
716               PetscCall(PetscOptionsHasName(((PetscObject)data->levels[n]->pc)->options, ((PetscObject)data->levels[n]->pc)->prefix, "-pc_asm_type", &flg));
717               PetscCheck(flg, PetscObjectComm((PetscObject)data->levels[n]->pc), PETSC_ERR_ARG_INCOMP, "PCASMType %s is known to be not symmetric, but KSPType %s requires a symmetric PC, if you insist on using this configuration, use the additional option -%spc_asm_type %s, or alternatively, switch to a symmetric PCASMType such as %s", PCASMTypes[type],
718                          ((PetscObject)ksp)->type_name, ((PetscObject)data->levels[n]->pc)->prefix, PCASMTypes[type], PCASMTypes[PC_ASM_BASIC]);
719             }
720           }
721         }
722       }
723     }
724   }
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 static PetscErrorCode PCSetUp_HPDDMShell(PC pc)
729 {
730   PC_HPDDM_Level *ctx;
731   Mat             A, P;
732   Vec             x;
733   const char     *pcpre;
734 
735   PetscFunctionBegin;
736   PetscCall(PCShellGetContext(pc, &ctx));
737   PetscCall(KSPGetOptionsPrefix(ctx->ksp, &pcpre));
738   PetscCall(KSPGetOperators(ctx->ksp, &A, &P));
739   /* smoother */
740   PetscCall(PCSetOptionsPrefix(ctx->pc, pcpre));
741   PetscCall(PCSetOperators(ctx->pc, A, P));
742   if (!ctx->v[0]) {
743     PetscCall(VecDuplicateVecs(ctx->D, 1, &ctx->v[0]));
744     if (!std::is_same<PetscScalar, PetscReal>::value) PetscCall(VecDestroy(&ctx->D));
745     PetscCall(MatCreateVecs(A, &x, nullptr));
746     PetscCall(VecDuplicateVecs(x, 2, &ctx->v[1]));
747     PetscCall(VecDestroy(&x));
748   }
749   std::fill_n(ctx->V, 3, nullptr);
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
753 template <bool transpose = false, class Type = Vec, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
754 static inline PetscErrorCode PCHPDDMDeflate_Private(PC pc, Type x, Type y)
755 {
756   PC_HPDDM_Level *ctx;
757   PetscScalar    *out;
758 
759   PetscFunctionBegin;
760   PetscCall(PCShellGetContext(pc, &ctx));
761   /* going from PETSc to HPDDM numbering */
762   PetscCall(VecScatterBegin(ctx->scatter, x, ctx->v[0][0], INSERT_VALUES, SCATTER_FORWARD));
763   PetscCall(VecScatterEnd(ctx->scatter, x, ctx->v[0][0], INSERT_VALUES, SCATTER_FORWARD));
764   PetscCall(VecGetArrayWrite(ctx->v[0][0], &out));
765   PetscCallCXX(ctx->P->deflation<false, transpose>(nullptr, out, 1)); /* y = Q x */
766   PetscCall(VecRestoreArrayWrite(ctx->v[0][0], &out));
767   /* going from HPDDM to PETSc numbering */
768   PetscCall(VecScatterBegin(ctx->scatter, ctx->v[0][0], y, INSERT_VALUES, SCATTER_REVERSE));
769   PetscCall(VecScatterEnd(ctx->scatter, ctx->v[0][0], y, INSERT_VALUES, SCATTER_REVERSE));
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 template <bool transpose = false, class Type = Mat, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
774 static inline PetscErrorCode PCHPDDMDeflate_Private(PC pc, Type X, Type Y)
775 {
776   PC_HPDDM_Level    *ctx;
777   PetscSF            strided;
778   const PetscScalar *in;
779   PetscScalar       *out, *tmp;
780   PetscInt           N, ld[2];
781   PetscMemType       type[2];
782 
783   PetscFunctionBegin;
784   PetscCall(PCShellGetContext(pc, &ctx));
785   PetscCall(MatGetSize(X, nullptr, &N));
786   PetscCall(PetscObjectQuery((PetscObject)ctx->scatter, "_HPDDM_StridedSF", (PetscObject *)&strided));
787   PetscCall(MatDenseGetLDA(X, ld));
788   PetscCall(MatDenseGetLDA(Y, ld + 1));
789   PetscCheck(ld[0] == ld[1], PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Leading dimension of input Mat different than the one of output Mat");
790   PetscCall(MatDenseGetLDA(ctx->V[0], ld + 1));
791   if (strided) {
792     PetscInt nroots[2], nleafs[2];
793 
794     PetscCall(PetscSFGetGraph(ctx->scatter, nroots, nleafs, nullptr, nullptr));
795     PetscCall(PetscSFGetGraph(strided, nroots + 1, nleafs + 1, nullptr, nullptr));
796     if (N * nroots[0] != nroots[1] || N * nleafs[0] != nleafs[1]) PetscCall(PetscSFDestroy(&strided));
797   }
798   if (!strided) {
799     PetscCall(PetscSFCreateStridedSF(ctx->scatter, N, ld[0], ld[1], &strided));
800     PetscCall(PetscObjectCompose((PetscObject)ctx->scatter, "_HPDDM_StridedSF", (PetscObject)strided));
801     PetscCall(PetscObjectDereference((PetscObject)strided));
802   }
803   PetscCall(MatDenseGetArrayReadAndMemType(X, &in, type));
804   PetscCall(MatDenseGetArrayWriteAndMemType(ctx->V[0], &tmp, type + 1));
805   /* going from PETSc to HPDDM numbering */
806   PetscCall(PetscSFBcastWithMemTypeBegin(strided, strided->vscat.unit, type[0], in, type[1], tmp, MPI_REPLACE));
807   PetscCall(PetscSFBcastEnd(strided, strided->vscat.unit, in, tmp, MPI_REPLACE));
808   PetscCall(MatDenseRestoreArrayReadAndMemType(X, &in));
809   PetscCallCXX(ctx->P->deflation<false, transpose>(nullptr, tmp, N)); /* Y = Q X */
810   PetscCall(MatDenseGetArrayWriteAndMemType(Y, &out, type + 1));
811   /* going from HPDDM to PETSc numbering */
812   PetscCall(PetscSFReduceWithMemTypeBegin(strided, strided->vscat.unit, type[0], tmp, type[1], out, MPI_REPLACE));
813   PetscCall(PetscSFReduceEnd(strided, strided->vscat.unit, tmp, out, MPI_REPLACE));
814   PetscCall(MatDenseRestoreArrayWriteAndMemType(Y, &out));
815   PetscCall(MatDenseRestoreArrayWriteAndMemType(ctx->V[0], &tmp));
816   PetscFunctionReturn(PETSC_SUCCESS);
817 }
818 
819 /*
820      PCApply_HPDDMShell - Applies a (2) deflated, (1) additive, (3) balanced, or (4) no coarse correction. In what follows, E = Z Pmat Z^T and Q = Z^T E^-1 Z.
821 
822 .vb
823    (1) y =                  Pmat^-1              x + Q x,
824    (2) y =                  Pmat^-1 (I - Amat Q) x + Q x (default),
825    (3) y = (I - Q^T Amat^T) Pmat^-1 (I - Amat Q) x + Q x,
826    (4) y =                  Pmat^-1              x      .
827 .ve
828 
829    Input Parameters:
830 +     pc - preconditioner context
831 -     x - input vector
832 
833    Output Parameter:
834 .     y - output vector
835 
836    Notes:
837      The options of Pmat^1 = pc(Pmat) are prefixed by -pc_hpddm_levels_1_pc_. Z is a tall-and-skiny matrix assembled by HPDDM. The number of processes on which (Z Pmat Z^T) is aggregated is set via -pc_hpddm_coarse_p.
838      The options of (Z Pmat Z^T)^-1 = ksp(Z Pmat Z^T) are prefixed by -pc_hpddm_coarse_ (`KSPPREONLY` and `PCCHOLESKY` by default), unless a multilevel correction is turned on, in which case, this function is called recursively at each level except the coarsest one.
839      (1) and (2) visit the "next" level (in terms of coarsening) once per application, while (3) visits it twice, so it is asymptotically twice costlier. (2) is not symmetric even if both Amat and Pmat are symmetric.
840 
841    Level: advanced
842 
843    Developer Note:
844    Since this is not an actual manual page the material below should be moved to an appropriate manual page with the appropriate context, i.e. explaining when it is used and how
845    to trigger it. Likely the manual page is `PCHPDDM`
846 
847 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
848 */
849 static PetscErrorCode PCApply_HPDDMShell(PC pc, Vec x, Vec y)
850 {
851   PC_HPDDM_Level *ctx;
852   Mat             A;
853 
854   PetscFunctionBegin;
855   PetscCall(PCShellGetContext(pc, &ctx));
856   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
857   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
858   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCApply(ctx->pc, x, y)); /* y = M^-1 x */
859   else {
860     PetscCall(PCHPDDMDeflate_Private(pc, x, y)); /* y = Q x */
861     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
862       if (!ctx->parent->normal || ctx != ctx->parent->levels[0]) PetscCall(MatMult(A, y, ctx->v[1][0])); /* y = A Q x */
863       else {
864         /* KSPLSQR and finest level */
865         PetscCall(MatMult(A, y, ctx->parent->normal));                              /* y = A Q x                 */
866         PetscCall(MatMultHermitianTranspose(A, ctx->parent->normal, ctx->v[1][0])); /* y = A^T A Q x             */
867       }
868       PetscCall(VecWAXPY(ctx->v[1][1], -1.0, ctx->v[1][0], x)); /* y = (I - A Q) x                               */
869       PetscCall(PCApply(ctx->pc, ctx->v[1][1], ctx->v[1][0]));  /* y = M^-1 (I - A Q) x                          */
870       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
871         if (!ctx->parent->normal || ctx != ctx->parent->levels[0]) PetscCall(MatMultHermitianTranspose(A, ctx->v[1][0], ctx->v[1][1])); /* z = A^T y */
872         else {
873           PetscCall(MatMult(A, ctx->v[1][0], ctx->parent->normal));
874           PetscCall(MatMultHermitianTranspose(A, ctx->parent->normal, ctx->v[1][1])); /* z = A^T A y             */
875         }
876         PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->v[1][1], ctx->v[1][1])); /* z = Q^T z                    */
877         PetscCall(VecAXPBYPCZ(y, -1.0, 1.0, 1.0, ctx->v[1][1], ctx->v[1][0]));   /* y = (I - Q^T A^T) y + Q x    */
878       } else PetscCall(VecAXPY(y, 1.0, ctx->v[1][0]));                           /* y = Q M^-1 (I - A Q) x + Q x */
879     } else {
880       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
881       PetscCall(PCApply(ctx->pc, x, ctx->v[1][0]));
882       PetscCall(VecAXPY(y, 1.0, ctx->v[1][0])); /* y = M^-1 x + Q x */
883     }
884   }
885   PetscFunctionReturn(PETSC_SUCCESS);
886 }
887 
888 template <bool transpose>
889 static PetscErrorCode PCHPDDMMatApply_Private(PC_HPDDM_Level *ctx, Mat Y, PetscBool *reset)
890 {
891   Mat            A, *ptr;
892   PetscScalar   *array;
893   PetscInt       m, M, N, prev = 0;
894   PetscContainer container = nullptr;
895 
896   PetscFunctionBegin;
897   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
898   PetscCall(MatGetSize(Y, nullptr, &N));
899   PetscCall(PetscObjectQuery((PetscObject)A, "_HPDDM_MatProduct", (PetscObject *)&container));
900   if (container) { /* MatProduct container already attached */
901     PetscCall(PetscContainerGetPointer(container, &ptr));
902     if (ptr[1] != ctx->V[2]) /* Mat has changed or may have been set first in KSPHPDDM */
903       for (m = 0; m < 2; ++m) {
904         PetscCall(MatDestroy(ctx->V + m + 1));
905         ctx->V[m + 1] = ptr[m];
906         PetscCall(PetscObjectReference((PetscObject)ctx->V[m + 1]));
907       }
908   }
909   if (ctx->V[1]) PetscCall(MatGetSize(ctx->V[1], nullptr, &prev));
910   if (N != prev || !ctx->V[0]) {
911     PetscCall(MatDestroy(ctx->V));
912     PetscCall(VecGetLocalSize(ctx->v[0][0], &m));
913     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, PETSC_DECIDE, N, nullptr, ctx->V));
914     if (N != prev) {
915       PetscCall(MatDestroy(ctx->V + 1));
916       PetscCall(MatDestroy(ctx->V + 2));
917       PetscCall(MatGetLocalSize(Y, &m, nullptr));
918       PetscCall(MatGetSize(Y, &M, nullptr));
919       if (ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) PetscCall(MatDenseGetArrayWrite(ctx->V[0], &array));
920       else array = nullptr;
921       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, M, N, array, ctx->V + 1));
922       if (ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &array));
923       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, M, N, nullptr, ctx->V + 2));
924       PetscCall(MatProductCreateWithMat(A, !transpose ? Y : ctx->V[2], nullptr, ctx->V[1]));
925       PetscCall(MatProductSetType(ctx->V[1], !transpose ? MATPRODUCT_AB : MATPRODUCT_AtB));
926       PetscCall(MatProductSetFromOptions(ctx->V[1]));
927       PetscCall(MatProductSymbolic(ctx->V[1]));
928       if (!container) PetscCall(PetscObjectContainerCompose((PetscObject)A, "_HPDDM_MatProduct", ctx->V + 1, nullptr)); /* no MatProduct container attached, create one to be queried in KSPHPDDM or at the next call to PCMatApply() */
929       else PetscCall(PetscContainerSetPointer(container, ctx->V + 1));                                                  /* need to compose B and D from MatProductCreateWithMat(A, B, NULL, D), which are stored in the contiguous array ctx->V */
930     }
931     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
932       PetscCall(MatProductCreateWithMat(A, !transpose ? ctx->V[1] : Y, nullptr, ctx->V[2]));
933       PetscCall(MatProductSetType(ctx->V[2], !transpose ? MATPRODUCT_AtB : MATPRODUCT_AB));
934       PetscCall(MatProductSetFromOptions(ctx->V[2]));
935       PetscCall(MatProductSymbolic(ctx->V[2]));
936     }
937     PetscCallCXX(ctx->P->start(N));
938   }
939   if (N == prev || container) { /* when MatProduct container is attached, always need to MatProductReplaceMats() since KSPHPDDM may have replaced the Mat as well */
940     PetscCall(MatProductReplaceMats(nullptr, !transpose ? Y : ctx->V[2], nullptr, ctx->V[1]));
941     if (container && ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) {
942       PetscCall(MatDenseGetArrayWrite(ctx->V[0], &array));
943       PetscCall(MatDensePlaceArray(ctx->V[1], array));
944       PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &array));
945       *reset = PETSC_TRUE;
946     }
947   }
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 /*
952      PCMatApply_HPDDMShell - Variant of PCApply_HPDDMShell() for blocks of vectors.
953 
954    Input Parameters:
955 +     pc - preconditioner context
956 -     X - block of input vectors
957 
958    Output Parameter:
959 .     Y - block of output vectors
960 
961    Level: advanced
962 
963 .seealso: [](ch_ksp), `PCHPDDM`, `PCApply_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
964 */
965 static PetscErrorCode PCMatApply_HPDDMShell(PC pc, Mat X, Mat Y)
966 {
967   PC_HPDDM_Level *ctx;
968   PetscBool       reset = PETSC_FALSE;
969 
970   PetscFunctionBegin;
971   PetscCall(PCShellGetContext(pc, &ctx));
972   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
973   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCMatApply(ctx->pc, X, Y));
974   else {
975     PetscCall(PCHPDDMMatApply_Private<false>(ctx, Y, &reset));
976     PetscCall(PCHPDDMDeflate_Private(pc, X, Y));
977     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
978       PetscCall(MatProductNumeric(ctx->V[1]));
979       PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
980       PetscCall(MatAXPY(ctx->V[2], -1.0, X, SAME_NONZERO_PATTERN));
981       PetscCall(PCMatApply(ctx->pc, ctx->V[2], ctx->V[1]));
982       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
983         PetscCall(MatProductNumeric(ctx->V[2]));
984         PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[2], ctx->V[2]));
985         PetscCall(MatAXPY(ctx->V[1], -1.0, ctx->V[2], SAME_NONZERO_PATTERN));
986       }
987       PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
988     } else {
989       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
990       PetscCall(PCMatApply(ctx->pc, X, ctx->V[1]));
991       PetscCall(MatAXPY(Y, 1.0, ctx->V[1], SAME_NONZERO_PATTERN));
992     }
993     if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
994   }
995   PetscFunctionReturn(PETSC_SUCCESS);
996 }
997 
998 /*
999      PCApplyTranspose_HPDDMShell - Applies the transpose of a (2) deflated, (1) additive, (3) balanced, or (4) no coarse correction. In what follows, E = Z Pmat Z^T and Q = Z^T E^-1 Z.
1000 
1001 .vb
1002    (1) y =                  Pmat^-T              x + Q^T x,
1003    (2) y = (I - Q^T Amat^T) Pmat^-T              x + Q^T x (default),
1004    (3) y = (I - Q^T Amat^T) Pmat^-T (I - Amat Q) x + Q^T x,
1005    (4) y =                  Pmat^-T              x        .
1006 .ve
1007 
1008    Input Parameters:
1009 +     pc - preconditioner context
1010 -     x - input vector
1011 
1012    Output Parameter:
1013 .     y - output vector
1014 
1015    Level: advanced
1016 
1017    Developer Note:
1018    Since this is not an actual manual page the material below should be moved to an appropriate manual page with the appropriate context, i.e. explaining when it is used and how
1019    to trigger it. Likely the manual page is `PCHPDDM`
1020 
1021 .seealso: [](ch_ksp), `PCHPDDM`, `PCApply_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
1022 */
1023 static PetscErrorCode PCApplyTranspose_HPDDMShell(PC pc, Vec x, Vec y)
1024 {
1025   PC_HPDDM_Level *ctx;
1026   Mat             A;
1027 
1028   PetscFunctionBegin;
1029   PetscCall(PCShellGetContext(pc, &ctx));
1030   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
1031   PetscCheck(!ctx->parent->normal, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not implemented for the normal equations");
1032   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
1033   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCApplyTranspose(ctx->pc, x, y)); /* y = M^-T x */
1034   else {
1035     PetscCall(PCHPDDMDeflate_Private<true>(pc, x, y)); /* y = Q^T x */
1036     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1037       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1038         /* TODO: checking whether Q^T = Q would make it possible to skip this coarse correction */
1039         PetscCall(PCHPDDMDeflate_Private(pc, x, ctx->v[1][1]));                /* y = Q x                     */
1040         PetscCall(MatMult(A, ctx->v[1][1], ctx->v[1][0]));                     /* y = A Q x                   */
1041         PetscCall(VecWAXPY(ctx->v[1][1], -1.0, ctx->v[1][0], x));              /* y = (I - A Q) x             */
1042         PetscCall(PCApplyTranspose(ctx->pc, ctx->v[1][1], ctx->v[1][0]));      /* y = M^-T (I - A Q) x        */
1043       } else PetscCall(PCApplyTranspose(ctx->pc, x, ctx->v[1][0]));            /* y = M^-T x                  */
1044       PetscCall(MatMultHermitianTranspose(A, ctx->v[1][0], ctx->v[1][1]));     /* z = A^T y                   */
1045       PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->v[1][1], ctx->v[1][1])); /* z = Q^T z                   */
1046       PetscCall(VecAXPBYPCZ(y, -1.0, 1.0, 1.0, ctx->v[1][1], ctx->v[1][0]));   /* y = (I - Q^T A^T) y + Q^T x */
1047     } else {
1048       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
1049       PetscCall(PCApplyTranspose(ctx->pc, x, ctx->v[1][0]));
1050       PetscCall(VecAXPY(y, 1.0, ctx->v[1][0])); /* y = M^-T x + Q^T x */
1051     }
1052   }
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 /*
1057      PCMatApplyTranspose_HPDDMShell - Variant of PCApplyTranspose_HPDDMShell() for blocks of vectors.
1058 
1059    Input Parameters:
1060 +     pc - preconditioner context
1061 -     X - block of input vectors
1062 
1063    Output Parameter:
1064 .     Y - block of output vectors
1065 
1066    Level: advanced
1067 
1068 .seealso: [](ch_ksp), `PCHPDDM`, `PCApplyTranspose_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
1069 */
1070 static PetscErrorCode PCMatApplyTranspose_HPDDMShell(PC pc, Mat X, Mat Y)
1071 {
1072   PC_HPDDM_Level *ctx;
1073   PetscScalar    *array;
1074   PetscBool       reset = PETSC_FALSE;
1075 
1076   PetscFunctionBegin;
1077   PetscCall(PCShellGetContext(pc, &ctx));
1078   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
1079   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCMatApplyTranspose(ctx->pc, X, Y));
1080   else if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1081     /* similar code as in PCMatApply_HPDDMShell() with an extra call to PCHPDDMDeflate_Private<true>() */
1082     PetscCall(PCHPDDMMatApply_Private<false>(ctx, Y, &reset));
1083     PetscCall(PCHPDDMDeflate_Private(pc, X, Y));
1084     PetscCall(MatProductNumeric(ctx->V[1]));
1085     PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
1086     PetscCall(MatAXPY(ctx->V[2], -1.0, X, SAME_NONZERO_PATTERN));
1087     PetscCall(PCMatApplyTranspose(ctx->pc, ctx->V[2], ctx->V[1]));
1088     PetscCall(MatProductNumeric(ctx->V[2]));
1089     PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[2], ctx->V[2]));
1090     PetscCall(MatAXPY(ctx->V[1], -1.0, ctx->V[2], SAME_NONZERO_PATTERN));
1091     PetscCall(PCHPDDMDeflate_Private<true>(pc, X, Y)); /* TODO: checking whether Q^T = Q would make it possible to skip this coarse correction */
1092     PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1093   } else {
1094     PetscCall(PCHPDDMMatApply_Private<true>(ctx, Y, &reset));
1095     PetscCall(PCHPDDMDeflate_Private<true>(pc, X, Y));
1096     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) {
1097       PetscCall(PCMatApplyTranspose(ctx->pc, X, ctx->V[2]));
1098       PetscCall(MatAXPY(Y, 1.0, ctx->V[2], SAME_NONZERO_PATTERN));
1099       PetscCall(MatProductNumeric(ctx->V[1]));
1100       /* ctx->V[0] and ctx->V[1] memory regions overlap, so need to copy to ctx->V[2] and switch array */
1101       PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
1102       if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
1103       PetscCall(MatDenseGetArrayWrite(ctx->V[2], &array));
1104       PetscCall(MatDensePlaceArray(ctx->V[1], array));
1105       PetscCall(MatDenseRestoreArrayWrite(ctx->V[2], &array));
1106       reset = PETSC_TRUE;
1107       PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[1], ctx->V[1]));
1108       PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1109     } else {
1110       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
1111       PetscCall(PCMatApplyTranspose(ctx->pc, X, ctx->V[1]));
1112       PetscCall(MatAXPY(Y, 1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1113     }
1114     if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
1115   }
1116   PetscFunctionReturn(PETSC_SUCCESS);
1117 }
1118 
1119 static PetscErrorCode PCDestroy_HPDDMShell(PC pc)
1120 {
1121   PC_HPDDM_Level *ctx;
1122 
1123   PetscFunctionBegin;
1124   PetscCall(PCShellGetContext(pc, &ctx));
1125   PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(ctx, PETSC_TRUE));
1126   PetscCall(VecDestroyVecs(1, &ctx->v[0]));
1127   PetscCall(VecDestroyVecs(2, &ctx->v[1]));
1128   PetscCall(PetscObjectCompose((PetscObject)ctx->pc->mat, "_HPDDM_MatProduct", nullptr));
1129   PetscCall(MatDestroy(ctx->V));
1130   PetscCall(MatDestroy(ctx->V + 1));
1131   PetscCall(MatDestroy(ctx->V + 2));
1132   PetscCall(VecDestroy(&ctx->D));
1133   PetscCall(PetscObjectCompose((PetscObject)ctx->scatter, "_HPDDM_StridedSF", nullptr));
1134   PetscCall(PetscSFDestroy(&ctx->scatter));
1135   PetscCall(PCDestroy(&ctx->pc));
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 template <class Type, bool T = false, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
1140 static inline PetscErrorCode PCApply_Schur_Private(std::tuple<KSP, IS, Vec[2]> *p, PC factor, Type x, Type y)
1141 {
1142   PetscFunctionBegin;
1143   PetscCall(VecISCopy(std::get<2>(*p)[0], std::get<1>(*p), SCATTER_FORWARD, x));
1144   if (!T) PetscCall(PCApply(factor, std::get<2>(*p)[0], std::get<2>(*p)[1]));
1145   else PetscCall(PCApplyTranspose(factor, std::get<2>(*p)[0], std::get<2>(*p)[1]));
1146   PetscCall(VecISCopy(std::get<2>(*p)[1], std::get<1>(*p), SCATTER_REVERSE, y));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 template <class Type, bool = false, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
1151 static inline PetscErrorCode PCApply_Schur_Private(std::tuple<KSP, IS, Vec[2]> *p, PC factor, Type X, Type Y)
1152 {
1153   Mat B[2];
1154   Vec x, y;
1155 
1156   PetscFunctionBegin;
1157   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, factor->mat->rmap->n, X->cmap->n, nullptr, B));
1158   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, factor->mat->rmap->n, X->cmap->n, nullptr, B + 1));
1159   for (PetscInt i = 0; i < X->cmap->n; ++i) {
1160     PetscCall(MatDenseGetColumnVecRead(X, i, &x));
1161     PetscCall(MatDenseGetColumnVecWrite(B[0], i, &y));
1162     PetscCall(VecISCopy(y, std::get<1>(*p), SCATTER_FORWARD, x));
1163     PetscCall(MatDenseRestoreColumnVecWrite(B[0], i, &y));
1164     PetscCall(MatDenseRestoreColumnVecRead(X, i, &x));
1165   }
1166   PetscCall(PCMatApply(factor, B[0], B[1]));
1167   PetscCall(MatDestroy(B));
1168   for (PetscInt i = 0; i < X->cmap->n; ++i) {
1169     PetscCall(MatDenseGetColumnVecRead(B[1], i, &x));
1170     PetscCall(MatDenseGetColumnVecWrite(Y, i, &y));
1171     PetscCall(VecISCopy(x, std::get<1>(*p), SCATTER_REVERSE, y));
1172     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &y));
1173     PetscCall(MatDenseRestoreColumnVecRead(B[1], i, &x));
1174   }
1175   PetscCall(MatDestroy(B + 1));
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 template <class Type = Vec, bool T = false>
1180 static PetscErrorCode PCApply_Schur(PC pc, Type x, Type y)
1181 {
1182   PC                           factor;
1183   Mat                          A;
1184   MatSolverType                type;
1185   PetscBool                    flg;
1186   std::tuple<KSP, IS, Vec[2]> *p;
1187 
1188   PetscFunctionBegin;
1189   PetscCall(PCShellGetContext(pc, &p));
1190   PetscCall(KSPGetPC(std::get<0>(*p), &factor));
1191   PetscCall(PCFactorGetMatSolverType(factor, &type));
1192   PetscCall(PCFactorGetMatrix(factor, &A));
1193   PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
1194   if (flg) {
1195     PetscCheck(PetscDefined(HAVE_MUMPS), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent MatSolverType");
1196     PetscCall(MatMumpsSetIcntl(A, 26, 0));
1197   } else {
1198     PetscCall(PetscStrcmp(type, MATSOLVERMKL_PARDISO, &flg));
1199     PetscCheck(flg && PetscDefined(HAVE_MKL_PARDISO), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent MatSolverType");
1200     flg = PETSC_FALSE;
1201 #if PetscDefined(HAVE_MKL_PARDISO)
1202     PetscCall(MatMkl_PardisoSetCntl(A, 70, 1));
1203 #endif
1204   }
1205   PetscCall(PCApply_Schur_Private<Type, T>(p, factor, x, y));
1206   if (flg) {
1207     PetscCall(MatMumpsSetIcntl(A, 26, -1));
1208   } else {
1209 #if PetscDefined(HAVE_MKL_PARDISO)
1210     PetscCall(MatMkl_PardisoSetCntl(A, 70, 0));
1211 #endif
1212   }
1213   PetscFunctionReturn(PETSC_SUCCESS);
1214 }
1215 
1216 static PetscErrorCode PCDestroy_Schur(PC pc)
1217 {
1218   std::tuple<KSP, IS, Vec[2]> *p;
1219 
1220   PetscFunctionBegin;
1221   PetscCall(PCShellGetContext(pc, &p));
1222   PetscCall(ISDestroy(&std::get<1>(*p)));
1223   PetscCall(VecDestroy(std::get<2>(*p)));
1224   PetscCall(VecDestroy(std::get<2>(*p) + 1));
1225   PetscCall(PetscFree(p));
1226   PetscFunctionReturn(PETSC_SUCCESS);
1227 }
1228 
1229 template <bool transpose>
1230 static PetscErrorCode PCHPDDMSolve_Private(const PC_HPDDM_Level *ctx, PetscScalar *rhs, const unsigned short &mu)
1231 {
1232   Mat      B, X;
1233   PetscInt n, N, j = 0;
1234 
1235   PetscFunctionBegin;
1236   PetscCall(KSPGetOperators(ctx->ksp, &B, nullptr));
1237   PetscCall(MatGetLocalSize(B, &n, nullptr));
1238   PetscCall(MatGetSize(B, &N, nullptr));
1239   if (ctx->parent->log_separate) {
1240     j = std::distance(ctx->parent->levels, std::find(ctx->parent->levels, ctx->parent->levels + ctx->parent->N, ctx));
1241     PetscCall(PetscLogEventBegin(PC_HPDDM_Solve[j], ctx->ksp, nullptr, nullptr, nullptr));
1242   }
1243   if (mu == 1) {
1244     if (!ctx->ksp->vec_rhs) {
1245       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)ctx->ksp), 1, n, N, nullptr, &ctx->ksp->vec_rhs));
1246       PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)ctx->ksp), n, N, &ctx->ksp->vec_sol));
1247     }
1248     PetscCall(VecPlaceArray(ctx->ksp->vec_rhs, rhs));
1249     if (!transpose) PetscCall(KSPSolve(ctx->ksp, nullptr, nullptr));
1250     else {
1251       PetscCall(VecConjugate(ctx->ksp->vec_rhs));
1252       PetscCall(KSPSolveTranspose(ctx->ksp, nullptr, nullptr)); /* TODO: missing KSPSolveHermitianTranspose() */
1253       PetscCall(VecConjugate(ctx->ksp->vec_sol));
1254     }
1255     PetscCall(VecCopy(ctx->ksp->vec_sol, ctx->ksp->vec_rhs));
1256     PetscCall(VecResetArray(ctx->ksp->vec_rhs));
1257   } else {
1258     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)ctx->ksp), n, PETSC_DECIDE, N, mu, rhs, &B));
1259     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)ctx->ksp), n, PETSC_DECIDE, N, mu, nullptr, &X));
1260     if (!transpose) PetscCall(KSPMatSolve(ctx->ksp, B, X));
1261     else {
1262       PetscCall(MatConjugate(B));
1263       PetscCall(KSPMatSolveTranspose(ctx->ksp, B, X)); /* TODO: missing KSPMatSolveHermitianTranspose() */
1264       PetscCall(MatConjugate(X));
1265     }
1266     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
1267     PetscCall(MatDestroy(&X));
1268     PetscCall(MatDestroy(&B));
1269   }
1270   if (ctx->parent->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Solve[j], ctx->ksp, nullptr, nullptr, nullptr));
1271   PetscFunctionReturn(PETSC_SUCCESS);
1272 }
1273 
1274 static PetscErrorCode PCHPDDMSetUpNeumannOverlap_Private(PC pc)
1275 {
1276   PC_HPDDM *data = (PC_HPDDM *)pc->data;
1277 
1278   PetscFunctionBegin;
1279   if (data->setup) {
1280     Mat       P;
1281     Vec       x, xt = nullptr;
1282     PetscReal t = 0.0, s = 0.0;
1283 
1284     PetscCall(PCGetOperators(pc, nullptr, &P));
1285     PetscCall(PetscObjectQuery((PetscObject)P, "__SNES_latest_X", (PetscObject *)&x));
1286     PetscCallBack("PCHPDDM Neumann callback", (*data->setup)(data->aux, t, x, xt, s, data->is, data->setup_ctx));
1287   }
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 
1291 static PetscErrorCode PCHPDDMCreateSubMatrices_Private(Mat mat, PetscInt n, const IS *, const IS *, MatReuse scall, Mat *submat[])
1292 {
1293   Mat       A;
1294   PetscBool flg;
1295 
1296   PetscFunctionBegin;
1297   PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatCreateSubMatrices() called to extract %" PetscInt_FMT " submatrices, which is different than 1", n);
1298   /* previously composed Mat */
1299   PetscCall(PetscObjectQuery((PetscObject)mat, "_PCHPDDM_SubMatrices", (PetscObject *)&A));
1300   PetscCheck(A, PETSC_COMM_SELF, PETSC_ERR_PLIB, "SubMatrices not found in Mat");
1301   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSCHURCOMPLEMENT, &flg)); /* MATSCHURCOMPLEMENT has neither a MatDuplicate() nor a MatCopy() implementation */
1302   if (scall == MAT_INITIAL_MATRIX) {
1303     PetscCall(PetscCalloc1(2, submat)); /* allocate an extra Mat to avoid errors in MatDestroySubMatrices_Dummy() */
1304     if (!flg) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, *submat));
1305   } else if (!flg) PetscCall(MatCopy(A, (*submat)[0], SAME_NONZERO_PATTERN));
1306   if (flg) {
1307     PetscCall(MatDestroy(*submat)); /* previously created Mat has to be destroyed */
1308     (*submat)[0] = A;
1309     PetscCall(PetscObjectReference((PetscObject)A));
1310   }
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
1314 static PetscErrorCode PCHPDDMCommunicationAvoidingPCASM_Private(PC pc, Mat C, PetscBool sorted)
1315 {
1316   PetscErrorCodeFn *op;
1317 
1318   PetscFunctionBegin;
1319   /* previously-composed Mat */
1320   PetscCall(PetscObjectCompose((PetscObject)pc->pmat, "_PCHPDDM_SubMatrices", (PetscObject)C));
1321   PetscCall(MatGetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, &op));
1322   /* trick suggested by Barry https://lists.mcs.anl.gov/pipermail/petsc-dev/2020-January/025491.html */
1323   PetscCall(MatSetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, (PetscErrorCodeFn *)PCHPDDMCreateSubMatrices_Private));
1324   if (sorted) PetscCall(PCASMSetSortIndices(pc, PETSC_FALSE)); /* everything is already sorted */
1325   PetscCall(PCSetFromOptions(pc));                             /* otherwise -pc_hpddm_levels_1_pc_asm_sub_mat_type is not used */
1326   PetscCall(PCSetUp(pc));
1327   /* reset MatCreateSubMatrices() */
1328   PetscCall(MatSetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, op));
1329   PetscCall(PetscObjectCompose((PetscObject)pc->pmat, "_PCHPDDM_SubMatrices", nullptr));
1330   PetscFunctionReturn(PETSC_SUCCESS);
1331 }
1332 
1333 static PetscErrorCode PCHPDDMPermute_Private(IS is, IS in_is, IS *out_is, Mat in_C, Mat *out_C, IS *p)
1334 {
1335   IS                           perm;
1336   const PetscInt              *ptr;
1337   PetscInt                    *concatenate, size, bs;
1338   std::map<PetscInt, PetscInt> order;
1339   PetscBool                    sorted;
1340 
1341   PetscFunctionBegin;
1342   PetscValidHeaderSpecific(is, IS_CLASSID, 1);
1343   PetscValidHeaderSpecific(in_C, MAT_CLASSID, 4);
1344   PetscCall(ISSorted(is, &sorted));
1345   if (!sorted) {
1346     PetscCall(ISGetLocalSize(is, &size));
1347     PetscCall(ISGetIndices(is, &ptr));
1348     PetscCall(ISGetBlockSize(is, &bs));
1349     /* MatCreateSubMatrices(), called by PCASM, follows the global numbering of Pmat */
1350     for (PetscInt n = 0; n < size; n += bs) order.insert(std::make_pair(ptr[n] / bs, n / bs));
1351     PetscCall(ISRestoreIndices(is, &ptr));
1352     size /= bs;
1353     if (out_C) {
1354       PetscCall(PetscMalloc1(size, &concatenate));
1355       for (const std::pair<const PetscInt, PetscInt> &i : order) *concatenate++ = i.second;
1356       concatenate -= size;
1357       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)in_C), bs, size, concatenate, PETSC_OWN_POINTER, &perm));
1358       PetscCall(ISSetPermutation(perm));
1359       /* permute user-provided Mat so that it matches with MatCreateSubMatrices() numbering */
1360       PetscCall(MatPermute(in_C, perm, perm, out_C));
1361       if (p) *p = perm;
1362       else PetscCall(ISDestroy(&perm)); /* no need to save the permutation */
1363     }
1364     if (out_is) {
1365       PetscCall(PetscMalloc1(size, &concatenate));
1366       for (const std::pair<const PetscInt, PetscInt> &i : order) *concatenate++ = i.first;
1367       concatenate -= size;
1368       /* permute user-provided IS so that it matches with MatCreateSubMatrices() numbering */
1369       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)in_is), bs, size, concatenate, PETSC_OWN_POINTER, out_is));
1370     }
1371   } else { /* input IS is sorted, nothing to permute, simply duplicate inputs when needed */
1372     if (out_C) PetscCall(MatDuplicate(in_C, MAT_COPY_VALUES, out_C));
1373     if (out_is) PetscCall(ISDuplicate(in_is, out_is));
1374   }
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 
1378 static PetscErrorCode PCHPDDMCheckSymmetry_Private(PC pc, Mat A01, Mat A10, Mat *B01 = nullptr)
1379 {
1380   Mat       T, U = nullptr, B = nullptr;
1381   IS        z;
1382   PetscBool flg, conjugate = PETSC_FALSE;
1383 
1384   PetscFunctionBegin;
1385   PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
1386   if (B01) *B01 = nullptr;
1387   if (flg) {
1388     PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1389     PetscCall(MatTransposeGetMat(A10, &U));
1390   } else {
1391     PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1392     if (flg) {
1393       PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1394       PetscCall(MatHermitianTransposeGetMat(A10, &U));
1395       conjugate = PETSC_TRUE;
1396     }
1397   }
1398   if (U) PetscCall(MatDuplicate(U, MAT_COPY_VALUES, &T));
1399   else PetscCall(MatHermitianTranspose(A10, MAT_INITIAL_MATRIX, &T));
1400   PetscCall(PetscObjectTypeCompare((PetscObject)A01, MATTRANSPOSEVIRTUAL, &flg));
1401   if (flg) {
1402     PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1403     PetscCall(MatTransposeGetMat(A01, &A01));
1404     PetscCall(MatTranspose(A01, MAT_INITIAL_MATRIX, &B));
1405     A01 = B;
1406   } else {
1407     PetscCall(PetscObjectTypeCompare((PetscObject)A01, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1408     if (flg) {
1409       PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1410       PetscCall(MatHermitianTransposeGetMat(A01, &A01));
1411       PetscCall(MatHermitianTranspose(A01, MAT_INITIAL_MATRIX, &B));
1412       A01 = B;
1413     }
1414   }
1415   PetscCall(PetscLayoutCompare(T->rmap, A01->rmap, &flg));
1416   if (flg) {
1417     PetscCall(PetscLayoutCompare(T->cmap, A01->cmap, &flg));
1418     if (flg) {
1419       PetscCall(MatFindZeroRows(A01, &z)); /* for essential boundary conditions, some implementations will */
1420       if (z) {                             /*  zero rows in [P00 A01] except for the diagonal of P00       */
1421         if (B01) PetscCall(MatDuplicate(T, MAT_COPY_VALUES, B01));
1422         PetscCall(MatSetOption(T, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
1423         PetscCall(MatZeroRowsIS(T, z, 0.0, nullptr, nullptr)); /* corresponding zero rows from A01 */
1424       }
1425       PetscCall(MatMultEqual(A01, T, 20, &flg));
1426       if (!B01) PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "A01 != A10^T");
1427       else {
1428         PetscCall(PetscInfo(pc, "A01 and A10^T are equal? %s\n", PetscBools[flg]));
1429         if (!flg) {
1430           if (z) PetscCall(MatDestroy(&T));
1431           else *B01 = T;
1432           flg = PETSC_TRUE;
1433         } else PetscCall(MatDestroy(B01));
1434       }
1435       PetscCall(ISDestroy(&z));
1436     }
1437   }
1438   if (!flg) PetscCall(PetscInfo(pc, "A01 and A10^T have non-congruent layouts, cannot test for equality\n"));
1439   if (!B01 || !*B01) PetscCall(MatDestroy(&T));
1440   else if (conjugate) PetscCall(MatConjugate(T));
1441   PetscCall(MatDestroy(&B));
1442   PetscFunctionReturn(PETSC_SUCCESS);
1443 }
1444 
1445 static PetscErrorCode PCHPDDMCheckInclusion_Private(PC pc, IS is, IS is_local, PetscBool check)
1446 {
1447   IS          intersect;
1448   const char *str = "IS of the auxiliary Mat does not include all local rows of A";
1449   PetscBool   equal;
1450 
1451   PetscFunctionBegin;
1452   PetscCall(ISIntersect(is, is_local, &intersect));
1453   PetscCall(ISEqualUnsorted(is_local, intersect, &equal));
1454   PetscCall(ISDestroy(&intersect));
1455   if (check) PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%s", str);
1456   else if (!equal) PetscCall(PetscInfo(pc, "%s\n", str));
1457   PetscFunctionReturn(PETSC_SUCCESS);
1458 }
1459 
1460 static PetscErrorCode PCHPDDMCheckMatStructure_Private(PC pc, Mat A, Mat B)
1461 {
1462   Mat             X, Y;
1463   const PetscInt *i[2], *j[2];
1464   PetscBool       flg = PETSC_TRUE;
1465 
1466   PetscFunctionBegin;
1467   PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &X)); /* no common way to compare sparsity pattern, so just convert to MATSEQAIJ */
1468   PetscCall(MatConvert(B, MATAIJ, MAT_INITIAL_MATRIX, &Y)); /* the second Mat (B = Neumann) should have a SUBSET_NONZERO_PATTERN MatStructure of the first one (A = Dirichlet) */
1469   PetscCall(MatSeqAIJGetCSRAndMemType(X, &i[0], &j[0], nullptr, nullptr));
1470   PetscCall(MatSeqAIJGetCSRAndMemType(Y, &i[1], &j[1], nullptr, nullptr));
1471   for (PetscInt row = 0; (row < X->rmap->n) && flg; ++row) {
1472     const PetscInt n = i[0][row + 1] - i[0][row];
1473 
1474     for (PetscInt k = i[1][row], location; k < i[1][row + 1]; ++k) {
1475       PetscCall(PetscFindInt(j[1][k], n, j[0] + i[0][row], &location));
1476       if (location < 0) {
1477         flg = PETSC_FALSE;
1478         break;
1479       }
1480     }
1481   }
1482   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1483   PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER_INPUT, "Auxiliary Mat is supposedly the local Neumann matrix but it has a sparsity pattern which is not a subset of the one of the local assembled matrix");
1484   PetscCall(MatDestroy(&Y));
1485   PetscCall(MatDestroy(&X));
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
1489 static PetscErrorCode PCHPDDMDestroySubMatrices_Private(PetscBool flg, PetscBool algebraic, Mat *sub)
1490 {
1491   IS is;
1492 
1493   PetscFunctionBegin;
1494   if (!flg) {
1495     if (algebraic) {
1496       PetscCall(PetscObjectQuery((PetscObject)sub[0], "_PCHPDDM_Embed", (PetscObject *)&is));
1497       PetscCall(ISDestroy(&is));
1498       PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Embed", nullptr));
1499       PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Compact", nullptr));
1500     }
1501     PetscCall(MatDestroySubMatrices(algebraic ? 2 : 1, &sub));
1502   }
1503   PetscFunctionReturn(PETSC_SUCCESS);
1504 }
1505 
1506 static PetscErrorCode PCHPDDMAlgebraicAuxiliaryMat_Private(Mat P, IS *is, Mat *sub[], PetscBool block)
1507 {
1508   IS         icol[3], irow[2];
1509   Mat       *M, Q;
1510   PetscReal *ptr;
1511   PetscInt  *idx, p = 0, bs = P->cmap->bs;
1512   PetscBool  flg;
1513 
1514   PetscFunctionBegin;
1515   PetscCall(ISCreateStride(PETSC_COMM_SELF, P->cmap->N, 0, 1, icol + 2));
1516   PetscCall(ISSetBlockSize(icol[2], bs));
1517   PetscCall(ISSetIdentity(icol[2]));
1518   PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg));
1519   if (flg) {
1520     /* MatCreateSubMatrices() does not handle MATMPISBAIJ properly when iscol != isrow, so convert first to MATMPIBAIJ */
1521     PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &Q));
1522     std::swap(P, Q);
1523   }
1524   PetscCall(MatCreateSubMatrices(P, 1, is, icol + 2, MAT_INITIAL_MATRIX, &M));
1525   if (flg) {
1526     std::swap(P, Q);
1527     PetscCall(MatDestroy(&Q));
1528   }
1529   PetscCall(ISDestroy(icol + 2));
1530   PetscCall(ISCreateStride(PETSC_COMM_SELF, M[0]->rmap->N, 0, 1, irow));
1531   PetscCall(ISSetBlockSize(irow[0], bs));
1532   PetscCall(ISSetIdentity(irow[0]));
1533   if (!block) {
1534     PetscCall(PetscMalloc2(P->cmap->N, &ptr, P->cmap->N / bs, &idx));
1535     PetscCall(MatGetColumnNorms(M[0], NORM_INFINITY, ptr));
1536     /* check for nonzero columns so that M[0] may be expressed in compact form */
1537     for (PetscInt n = 0; n < P->cmap->N; n += bs) {
1538       if (std::find_if(ptr + n, ptr + n + bs, [](PetscReal v) { return v > PETSC_MACHINE_EPSILON; }) != ptr + n + bs) idx[p++] = n / bs;
1539     }
1540     PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, p, idx, PETSC_USE_POINTER, icol + 1));
1541     PetscCall(ISSetInfo(icol[1], IS_SORTED, IS_GLOBAL, PETSC_TRUE, PETSC_TRUE));
1542     PetscCall(ISEmbed(*is, icol[1], PETSC_FALSE, icol + 2));
1543     irow[1] = irow[0];
1544     /* first Mat will be used in PCASM (if it is used as a PC on this level) and as the left-hand side of GenEO */
1545     icol[0] = is[0];
1546     PetscCall(MatCreateSubMatrices(M[0], 2, irow, icol, MAT_INITIAL_MATRIX, sub));
1547     PetscCall(ISDestroy(icol + 1));
1548     PetscCall(PetscFree2(ptr, idx));
1549     /* IS used to go back and forth between the augmented and the original local linear system, see eq. (3.4) of [2022b] */
1550     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Embed", (PetscObject)icol[2]));
1551     /* Mat used in eq. (3.1) of [2022b] */
1552     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Compact", (PetscObject)(*sub)[1]));
1553   } else {
1554     Mat aux;
1555 
1556     PetscCall(MatSetOption(M[0], MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
1557     /* diagonal block of the overlapping rows */
1558     PetscCall(MatCreateSubMatrices(M[0], 1, irow, is, MAT_INITIAL_MATRIX, sub));
1559     PetscCall(MatDuplicate((*sub)[0], MAT_COPY_VALUES, &aux));
1560     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1561     if (bs == 1) { /* scalar case */
1562       Vec sum[2];
1563 
1564       PetscCall(MatCreateVecs(aux, sum, sum + 1));
1565       PetscCall(MatGetRowSum(M[0], sum[0]));
1566       PetscCall(MatGetRowSum(aux, sum[1]));
1567       /* off-diagonal block row sum (full rows - diagonal block rows) */
1568       PetscCall(VecAXPY(sum[0], -1.0, sum[1]));
1569       /* subdomain matrix plus off-diagonal block row sum */
1570       PetscCall(MatDiagonalSet(aux, sum[0], ADD_VALUES));
1571       PetscCall(VecDestroy(sum));
1572       PetscCall(VecDestroy(sum + 1));
1573     } else { /* vectorial case */
1574       /* TODO: missing MatGetValuesBlocked(), so the code below is     */
1575       /* an extension of the scalar case for when bs > 1, but it could */
1576       /* be more efficient by avoiding all these MatMatMult()          */
1577       Mat          sum[2], ones;
1578       PetscScalar *ptr;
1579 
1580       PetscCall(PetscCalloc1(M[0]->cmap->n * bs, &ptr));
1581       PetscCall(MatCreateDense(PETSC_COMM_SELF, M[0]->cmap->n, bs, M[0]->cmap->n, bs, ptr, &ones));
1582       for (PetscInt n = 0; n < M[0]->cmap->n; n += bs) {
1583         for (p = 0; p < bs; ++p) ptr[n + p * (M[0]->cmap->n + 1)] = 1.0;
1584       }
1585       PetscCall(MatMatMult(M[0], ones, MAT_INITIAL_MATRIX, PETSC_CURRENT, sum));
1586       PetscCall(MatDestroy(&ones));
1587       PetscCall(MatCreateDense(PETSC_COMM_SELF, aux->cmap->n, bs, aux->cmap->n, bs, ptr, &ones));
1588       PetscCall(MatDenseSetLDA(ones, M[0]->cmap->n));
1589       PetscCall(MatMatMult(aux, ones, MAT_INITIAL_MATRIX, PETSC_CURRENT, sum + 1));
1590       PetscCall(MatDestroy(&ones));
1591       PetscCall(PetscFree(ptr));
1592       /* off-diagonal block row sum (full rows - diagonal block rows) */
1593       PetscCall(MatAXPY(sum[0], -1.0, sum[1], SAME_NONZERO_PATTERN));
1594       PetscCall(MatDestroy(sum + 1));
1595       /* re-order values to be consistent with MatSetValuesBlocked()           */
1596       /* equivalent to MatTranspose() which does not truly handle              */
1597       /* MAT_INPLACE_MATRIX in the rectangular case, as it calls PetscMalloc() */
1598       PetscCall(MatDenseGetArrayWrite(sum[0], &ptr));
1599       HPDDM::Wrapper<PetscScalar>::imatcopy<'T'>(bs, sum[0]->rmap->n, ptr, sum[0]->rmap->n, bs);
1600       /* subdomain matrix plus off-diagonal block row sum */
1601       for (PetscInt n = 0; n < aux->cmap->n / bs; ++n) PetscCall(MatSetValuesBlocked(aux, 1, &n, 1, &n, ptr + n * bs * bs, ADD_VALUES));
1602       PetscCall(MatAssemblyBegin(aux, MAT_FINAL_ASSEMBLY));
1603       PetscCall(MatAssemblyEnd(aux, MAT_FINAL_ASSEMBLY));
1604       PetscCall(MatDenseRestoreArrayWrite(sum[0], &ptr));
1605       PetscCall(MatDestroy(sum));
1606     }
1607     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1608     /* left-hand side of GenEO, with the same sparsity pattern as PCASM subdomain solvers  */
1609     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Neumann_Mat", (PetscObject)aux));
1610   }
1611   PetscCall(ISDestroy(irow));
1612   PetscCall(MatDestroySubMatrices(1, &M));
1613   PetscFunctionReturn(PETSC_SUCCESS);
1614 }
1615 
1616 static PetscErrorCode PCApply_Nest(PC pc, Vec x, Vec y)
1617 {
1618   Mat                    A;
1619   MatSolverType          type;
1620   IS                     is[2];
1621   PetscBool              flg;
1622   std::pair<PC, Vec[2]> *p;
1623 
1624   PetscFunctionBegin;
1625   PetscCall(PCShellGetContext(pc, &p));
1626   if (p->second[0]) { /* in case of a centralized Schur complement, some processes may have no local operator */
1627     PetscCall(PCGetOperators(p->first, &A, nullptr));
1628     PetscCall(MatNestGetISs(A, is, nullptr));
1629     PetscCall(PetscObjectTypeCompareAny((PetscObject)p->first, &flg, PCLU, PCCHOLESKY, ""));
1630     if (flg) { /* partial solve currently only makes sense with exact factorizations */
1631       PetscCall(PCFactorGetMatSolverType(p->first, &type));
1632       PetscCall(PCFactorGetMatrix(p->first, &A));
1633       if (A->schur) {
1634         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
1635         if (flg) PetscCall(MatMumpsSetIcntl(A, 26, 1)); /* reduction/condensation phase followed by Schur complement solve */
1636       } else flg = PETSC_FALSE;
1637     }
1638     PetscCall(VecISCopy(p->second[0], is[1], SCATTER_FORWARD, x)); /* assign the RHS associated to the Schur complement */
1639     PetscCall(PCApply(p->first, p->second[0], p->second[1]));
1640     PetscCall(VecISCopy(p->second[1], is[1], SCATTER_REVERSE, y)); /* retrieve the partial solution associated to the Schur complement */
1641     if (flg) PetscCall(MatMumpsSetIcntl(A, 26, -1));               /* default ICNTL(26) value in PETSc */
1642   }
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 static PetscErrorCode PCView_Nest(PC pc, PetscViewer viewer)
1647 {
1648   std::pair<PC, Vec[2]> *p;
1649 
1650   PetscFunctionBegin;
1651   PetscCall(PCShellGetContext(pc, &p));
1652   PetscCall(PCView(p->first, viewer));
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 static PetscErrorCode PCDestroy_Nest(PC pc)
1657 {
1658   std::pair<PC, Vec[2]> *p;
1659 
1660   PetscFunctionBegin;
1661   PetscCall(PCShellGetContext(pc, &p));
1662   PetscCall(VecDestroy(p->second));
1663   PetscCall(VecDestroy(p->second + 1));
1664   PetscCall(PCDestroy(&p->first));
1665   PetscCall(PetscFree(p));
1666   PetscFunctionReturn(PETSC_SUCCESS);
1667 }
1668 
1669 template <bool T = false>
1670 static PetscErrorCode MatMult_Schur(Mat A, Vec x, Vec y)
1671 {
1672   std::tuple<Mat, PetscSF, Vec[2]> *ctx;
1673 
1674   PetscFunctionBegin;
1675   PetscCall(MatShellGetContext(A, &ctx));
1676   PetscCall(VecScatterBegin(std::get<1>(*ctx), x, std::get<2>(*ctx)[0], INSERT_VALUES, SCATTER_FORWARD)); /* local Vec with overlap */
1677   PetscCall(VecScatterEnd(std::get<1>(*ctx), x, std::get<2>(*ctx)[0], INSERT_VALUES, SCATTER_FORWARD));
1678   if (!T) PetscCall(MatMult(std::get<0>(*ctx), std::get<2>(*ctx)[0], std::get<2>(*ctx)[1])); /* local Schur complement */
1679   else PetscCall(MatMultTranspose(std::get<0>(*ctx), std::get<2>(*ctx)[0], std::get<2>(*ctx)[1]));
1680   PetscCall(VecSet(y, 0.0));
1681   PetscCall(VecScatterBegin(std::get<1>(*ctx), std::get<2>(*ctx)[1], y, ADD_VALUES, SCATTER_REVERSE)); /* global Vec with summed up contributions on the overlap */
1682   PetscCall(VecScatterEnd(std::get<1>(*ctx), std::get<2>(*ctx)[1], y, ADD_VALUES, SCATTER_REVERSE));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 static PetscErrorCode MatDestroy_Schur(Mat A)
1687 {
1688   std::tuple<Mat, PetscSF, Vec[2]> *ctx;
1689 
1690   PetscFunctionBegin;
1691   PetscCall(MatShellGetContext(A, &ctx));
1692   PetscCall(VecDestroy(std::get<2>(*ctx)));
1693   PetscCall(VecDestroy(std::get<2>(*ctx) + 1));
1694   PetscCall(PetscFree(ctx));
1695   PetscFunctionReturn(PETSC_SUCCESS);
1696 }
1697 
1698 static PetscErrorCode MatMult_SchurCorrection(Mat A, Vec x, Vec y)
1699 {
1700   PC                                         pc;
1701   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1702 
1703   PetscFunctionBegin;
1704   PetscCall(MatShellGetContext(A, &ctx));
1705   pc = ((PC_HPDDM *)std::get<0>(*ctx)[0]->data)->levels[0]->ksp->pc;
1706   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {             /* Q_0 is the coarse correction associated to the A00 block from PCFIELDSPLIT */
1707     PetscCall(MatMult(std::get<1>(*ctx)[0], x, std::get<3>(*ctx)[1]));                    /*     A_01 x                 */
1708     PetscCall(PCHPDDMDeflate_Private(pc, std::get<3>(*ctx)[1], std::get<3>(*ctx)[1]));    /*     Q_0 A_01 x             */
1709     PetscCall(MatMult(std::get<1>(*ctx)[1], std::get<3>(*ctx)[1], std::get<3>(*ctx)[0])); /*     A_10 Q_0 A_01 x        */
1710     PetscCall(PCApply(std::get<0>(*ctx)[1], std::get<3>(*ctx)[0], y));                    /* y = M_S^-1 A_10 Q_0 A_01 x */
1711   } else {
1712     PetscCall(PCApply(std::get<0>(*ctx)[1], x, std::get<3>(*ctx)[0]));                    /*     M_S^-1 x               */
1713     PetscCall(MatMult(std::get<1>(*ctx)[0], std::get<3>(*ctx)[0], std::get<3>(*ctx)[1])); /*     A_01 M_S^-1 x          */
1714     PetscCall(PCHPDDMDeflate_Private(pc, std::get<3>(*ctx)[1], std::get<3>(*ctx)[1]));    /*     Q_0 A_01 M_S^-1 x      */
1715     PetscCall(MatMult(std::get<1>(*ctx)[1], std::get<3>(*ctx)[1], y));                    /* y = A_10 Q_0 A_01 M_S^-1 x */
1716   }
1717   PetscCall(VecAXPY(y, -1.0, x)); /* y -= x, preconditioned eq. (24) of https://hal.science/hal-02343808v6/document (with a sign flip) */
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 static PetscErrorCode MatView_SchurCorrection(Mat A, PetscViewer viewer)
1722 {
1723   PetscBool                                  ascii;
1724   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1725 
1726   PetscFunctionBegin;
1727   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &ascii));
1728   if (ascii) {
1729     PetscCall(MatShellGetContext(A, &ctx));
1730     PetscCall(PetscViewerASCIIPrintf(viewer, "action of %s\n", std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT ? "(I - M_S^-1 A_10 Q_0 A_01)" : "(I - A_10 Q_0 A_01 M_S^-1)"));
1731     PetscCall(PCView(std::get<0>(*ctx)[1], viewer)); /* no need to PCView(Q_0) since it will be done by PCFIELDSPLIT */
1732   }
1733   PetscFunctionReturn(PETSC_SUCCESS);
1734 }
1735 
1736 static PetscErrorCode MatDestroy_SchurCorrection(Mat A)
1737 {
1738   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1739 
1740   PetscFunctionBegin;
1741   PetscCall(MatShellGetContext(A, &ctx));
1742   PetscCall(VecDestroy(std::get<3>(*ctx)));
1743   PetscCall(VecDestroy(std::get<3>(*ctx) + 1));
1744   PetscCall(VecDestroy(std::get<3>(*ctx) + 2));
1745   PetscCall(PCDestroy(std::get<0>(*ctx) + 1));
1746   PetscCall(PetscFree(ctx));
1747   PetscFunctionReturn(PETSC_SUCCESS);
1748 }
1749 
1750 static PetscErrorCode PCPostSolve_SchurPreLeastSquares(PC, KSP, Vec, Vec x)
1751 {
1752   PetscFunctionBegin;
1753   PetscCall(VecScale(x, -1.0));
1754   PetscFunctionReturn(PETSC_SUCCESS);
1755 }
1756 
1757 static PetscErrorCode KSPPreSolve_SchurCorrection(KSP, Vec b, Vec, void *context)
1758 {
1759   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = reinterpret_cast<std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *>(context);
1760 
1761   PetscFunctionBegin;
1762   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {
1763     PetscCall(PCApply(std::get<0>(*ctx)[1], b, std::get<3>(*ctx)[2]));
1764     std::swap(*b, *std::get<3>(*ctx)[2]); /* replace b by M^-1 b, but need to keep a copy of the original RHS, so swap it with the work Vec */
1765   }
1766   PetscFunctionReturn(PETSC_SUCCESS);
1767 }
1768 
1769 static PetscErrorCode KSPPostSolve_SchurCorrection(KSP, Vec b, Vec x, void *context)
1770 {
1771   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = reinterpret_cast<std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *>(context);
1772 
1773   PetscFunctionBegin;
1774   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) std::swap(*b, *std::get<3>(*ctx)[2]); /* put back the original RHS where it belongs */
1775   else {
1776     PetscCall(PCApply(std::get<0>(*ctx)[1], x, std::get<3>(*ctx)[2]));
1777     PetscCall(VecCopy(std::get<3>(*ctx)[2], x)); /* replace x by M^-1 x */
1778   }
1779   PetscFunctionReturn(PETSC_SUCCESS);
1780 }
1781 
1782 static PetscErrorCode MatMult_Harmonic(Mat, Vec, Vec);
1783 static PetscErrorCode MatMultTranspose_Harmonic(Mat, Vec, Vec);
1784 static PetscErrorCode MatProduct_AB_Harmonic(Mat, Mat, Mat, void *);
1785 static PetscErrorCode MatProduct_AtB_Harmonic(Mat, Mat, Mat, void *);
1786 static PetscErrorCode MatDestroy_Harmonic(Mat);
1787 
1788 static PetscErrorCode PCSetUp_HPDDM(PC pc)
1789 {
1790   PC_HPDDM                                  *data = (PC_HPDDM *)pc->data;
1791   PC                                         inner;
1792   KSP                                       *ksp;
1793   Mat                                       *sub, A, P, N, C = nullptr, uaux = nullptr, weighted, subA[2], S;
1794   Vec                                        xin, v;
1795   std::vector<Vec>                           initial;
1796   IS                                         is[1], loc, uis = data->is, unsorted = nullptr;
1797   ISLocalToGlobalMapping                     l2g;
1798   char                                       prefix[256];
1799   const char                                *pcpre;
1800   const PetscScalar *const                  *ev;
1801   PetscInt                                   n, requested = data->N, reused = 0, overlap = -1;
1802   MatStructure                               structure  = UNKNOWN_NONZERO_PATTERN;
1803   PetscBool                                  subdomains = PETSC_FALSE, flg = PETSC_FALSE, ismatis, swap = PETSC_FALSE, algebraic = PETSC_FALSE, block = PETSC_FALSE;
1804   DM                                         dm;
1805   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = nullptr;
1806 #if PetscDefined(USE_DEBUG)
1807   IS  dis  = nullptr;
1808   Mat daux = nullptr;
1809 #endif
1810 
1811   PetscFunctionBegin;
1812   PetscCheck(data->levels && data->levels[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not a single level allocated");
1813   PetscCall(PCGetOptionsPrefix(pc, &pcpre));
1814   PetscCall(PCGetOperators(pc, &A, &P));
1815   if (!data->levels[0]->ksp) {
1816     PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->ksp));
1817     PetscCall(KSPSetNestLevel(data->levels[0]->ksp, pc->kspnestlevel));
1818     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_%s_", pcpre ? pcpre : "", data->N > 1 ? "levels_1" : "coarse"));
1819     PetscCall(KSPSetOptionsPrefix(data->levels[0]->ksp, prefix));
1820     PetscCall(KSPSetType(data->levels[0]->ksp, KSPPREONLY));
1821   } else if (data->levels[0]->ksp->pc && data->levels[0]->ksp->pc->setupcalled && data->levels[0]->ksp->pc->reusepreconditioner) {
1822     /* if the fine-level PCSHELL exists, its setup has succeeded, and one wants to reuse it, */
1823     /* then just propagate the appropriate flag to the coarser levels                        */
1824     for (n = 0; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
1825       /* the following KSP and PC may be NULL for some processes, hence the check            */
1826       if (data->levels[n]->ksp) PetscCall(KSPSetReusePreconditioner(data->levels[n]->ksp, PETSC_TRUE));
1827       if (data->levels[n]->pc) PetscCall(PCSetReusePreconditioner(data->levels[n]->pc, PETSC_TRUE));
1828     }
1829     /* early bail out because there is nothing to do */
1830     PetscFunctionReturn(PETSC_SUCCESS);
1831   } else {
1832     /* reset coarser levels */
1833     for (n = 1; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
1834       if (data->levels[n]->ksp && data->levels[n]->ksp->pc && data->levels[n]->ksp->pc->setupcalled && data->levels[n]->ksp->pc->reusepreconditioner && n < data->N) {
1835         reused = data->N - n;
1836         break;
1837       }
1838       PetscCall(KSPDestroy(&data->levels[n]->ksp));
1839       PetscCall(PCDestroy(&data->levels[n]->pc));
1840     }
1841     /* check if some coarser levels are being reused */
1842     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reused, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
1843     const int *addr = data->levels[0]->P ? data->levels[0]->P->getAddrLocal() : &HPDDM::i__0;
1844 
1845     if (addr != &HPDDM::i__0 && reused != data->N - 1) {
1846       /* reuse previously computed eigenvectors */
1847       ev = data->levels[0]->P->getVectors();
1848       if (ev) {
1849         initial.reserve(*addr);
1850         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, data->levels[0]->P->getDof(), ev[0], &xin));
1851         for (n = 0; n < *addr; ++n) {
1852           PetscCall(VecDuplicate(xin, &v));
1853           PetscCall(VecPlaceArray(xin, ev[n]));
1854           PetscCall(VecCopy(xin, v));
1855           initial.emplace_back(v);
1856           PetscCall(VecResetArray(xin));
1857         }
1858         PetscCall(VecDestroy(&xin));
1859       }
1860     }
1861   }
1862   data->N -= reused;
1863   PetscCall(KSPSetOperators(data->levels[0]->ksp, A, P));
1864 
1865   PetscCall(PetscObjectTypeCompare((PetscObject)P, MATIS, &ismatis));
1866   if (!data->is && !ismatis) {
1867     PetscErrorCode (*create)(DM, IS *, Mat *, PetscErrorCode (**)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void **) = nullptr;
1868     PetscErrorCode (*usetup)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *)                                                = nullptr;
1869     void *uctx                                                                                                               = nullptr;
1870 
1871     /* first see if we can get the data from the DM */
1872     PetscCall(MatGetDM(P, &dm));
1873     if (!dm) PetscCall(MatGetDM(A, &dm));
1874     if (!dm) PetscCall(PCGetDM(pc, &dm));
1875     if (dm) { /* this is the hook for DMPLEX for which the auxiliary Mat is the local Neumann matrix */
1876       PetscCall(PetscObjectQueryFunction((PetscObject)dm, "DMCreateNeumannOverlap_C", &create));
1877       if (create) {
1878         PetscCall((*create)(dm, &uis, &uaux, &usetup, &uctx));
1879         if (data->Neumann == PETSC_BOOL3_UNKNOWN) data->Neumann = PETSC_BOOL3_TRUE; /* set the value only if it was not already provided by the user */
1880       }
1881     }
1882     if (!create) {
1883       if (!uis) {
1884         PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Neumann_IS", (PetscObject *)&uis));
1885         PetscCall(PetscObjectReference((PetscObject)uis));
1886       }
1887       if (!uaux) {
1888         PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Neumann_Mat", (PetscObject *)&uaux));
1889         PetscCall(PetscObjectReference((PetscObject)uaux));
1890       }
1891       /* look inside the Pmat instead of the PC, needed for MatSchurComplementComputeExplicitOperator() */
1892       if (!uis) {
1893         PetscCall(PetscObjectQuery((PetscObject)P, "_PCHPDDM_Neumann_IS", (PetscObject *)&uis));
1894         PetscCall(PetscObjectReference((PetscObject)uis));
1895       }
1896       if (!uaux) {
1897         PetscCall(PetscObjectQuery((PetscObject)P, "_PCHPDDM_Neumann_Mat", (PetscObject *)&uaux));
1898         PetscCall(PetscObjectReference((PetscObject)uaux));
1899       }
1900     }
1901     PetscCall(PCHPDDMSetAuxiliaryMat(pc, uis, uaux, usetup, uctx));
1902     PetscCall(MatDestroy(&uaux));
1903     PetscCall(ISDestroy(&uis));
1904   }
1905 
1906   if (!ismatis) {
1907     PetscCall(PCHPDDMSetUpNeumannOverlap_Private(pc));
1908     PetscCall(PetscOptionsGetBool(((PetscObject)pc)->options, pcpre, "-pc_hpddm_block_splitting", &block, nullptr));
1909     PetscCall(PetscOptionsGetInt(((PetscObject)pc)->options, pcpre, "-pc_hpddm_harmonic_overlap", &overlap, nullptr));
1910     PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
1911     if (data->is || flg) {
1912       if (block || overlap != -1) {
1913         PetscCall(ISDestroy(&data->is));
1914         PetscCall(MatDestroy(&data->aux));
1915       } else if (flg) {
1916         PCHPDDMSchurPreType type = PC_HPDDM_SCHUR_PRE_GENEO;
1917 
1918         PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, pcpre, "-pc_hpddm_schur_precondition", PCHPDDMSchurPreTypes, (PetscEnum *)&type, &flg));
1919         if (type == PC_HPDDM_SCHUR_PRE_LEAST_SQUARES) {
1920           PetscCall(ISDestroy(&data->is)); /* destroy any previously user-set objects since they will be set automatically */
1921           PetscCall(MatDestroy(&data->aux));
1922         } else if (type == PC_HPDDM_SCHUR_PRE_GENEO) {
1923           PetscContainer container = nullptr;
1924 
1925           PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Schur", (PetscObject *)&container));
1926           if (!container) { /* first call to PCSetUp() on the PC associated to the Schur complement */
1927             PC_HPDDM       *data_00;
1928             KSP             ksp, inner_ksp;
1929             PC              pc_00;
1930             Mat             A11 = nullptr;
1931             Vec             d   = nullptr;
1932             PetscReal       norm;
1933             const PetscInt *ranges;
1934             PetscMPIInt     size;
1935             char           *prefix;
1936 
1937             PetscCall(MatSchurComplementGetKSP(P, &ksp));
1938             PetscCall(KSPGetPC(ksp, &pc_00));
1939             PetscCall(PetscObjectTypeCompare((PetscObject)pc_00, PCHPDDM, &flg));
1940             PetscCheck(flg, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and -%spc_type %s (!= %s)", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], ((PetscObject)pc_00)->prefix ? ((PetscObject)pc_00)->prefix : "",
1941                        ((PetscObject)pc_00)->type_name, PCHPDDM);
1942             data_00 = (PC_HPDDM *)pc_00->data;
1943             PetscCheck(data_00->N == 2, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and %" PetscInt_FMT " level%s instead of 2 for the A00 block -%s", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type],
1944                        data_00->N, data_00->N > 1 ? "s" : "", ((PetscObject)pc_00)->prefix);
1945             PetscCheck(data_00->levels[0]->pc, PetscObjectComm((PetscObject)P), PETSC_ERR_ORDER, "PC of the first block%s not setup yet", ((PetscObject)pc_00)->prefix ? std::string(std::string(" (") + ((PetscObject)pc_00)->prefix + std::string(")")).c_str() : "");
1946             PetscCall(PetscObjectTypeCompare((PetscObject)data_00->levels[0]->pc, PCASM, &flg));
1947             PetscCheck(flg, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and -%spc_type %s (!= %s)", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], ((PetscObject)data_00->levels[0]->pc)->prefix,
1948                        ((PetscObject)data_00->levels[0]->pc)->type_name, PCASM);
1949             PetscCall(PetscNew(&ctx)); /* context to pass data around for the inner-most PC, which will be a proper PCHPDDM (or a dummy variable if the Schur complement is centralized on a single process)  */
1950             PetscCall(MatSchurComplementGetSubMatrices(P, nullptr, nullptr, nullptr, nullptr, &A11));
1951             PetscCall(MatGetOwnershipRanges(A11, &ranges));
1952             PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A11), &size));
1953             flg = PetscBool(std::find_if(ranges, ranges + size + 1, [&](PetscInt v) { return v != ranges[0] && v != ranges[size]; }) == ranges + size + 1); /* are all local matrices but one of dimension 0 (centralized Schur complement)? */
1954             if (!flg) {
1955               if (PetscDefined(USE_DEBUG) || !data->is) {
1956                 Mat A01, A10, B = nullptr, C = nullptr, *sub;
1957 
1958                 PetscCall(MatSchurComplementGetSubMatrices(P, &A, nullptr, &A01, &A10, nullptr));
1959                 PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
1960                 if (flg) {
1961                   PetscCall(MatTransposeGetMat(A10, &C));
1962                   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &B));
1963                 } else {
1964                   PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1965                   if (flg) {
1966                     PetscCall(MatHermitianTransposeGetMat(A10, &C));
1967                     PetscCall(MatHermitianTranspose(C, MAT_INITIAL_MATRIX, &B));
1968                   }
1969                 }
1970                 if (flg)
1971                   PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1972                 if (!B) {
1973                   B = A10;
1974                   PetscCall(PetscObjectReference((PetscObject)B));
1975                 } else if (!data->is) {
1976                   PetscCall(PetscObjectTypeCompareAny((PetscObject)A01, &flg, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
1977                   if (!flg) C = A01;
1978                   else
1979                     PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1980                 }
1981                 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->rmap->N, 0, 1, &uis));
1982                 PetscCall(ISSetIdentity(uis));
1983                 if (!data->is) {
1984                   if (C) PetscCall(PetscObjectReference((PetscObject)C));
1985                   else PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &C));
1986                   PetscCall(ISDuplicate(data_00->is, is));
1987                   PetscCall(MatIncreaseOverlap(A, 1, is, 1));
1988                   PetscCall(MatSetOption(C, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
1989                   PetscCall(MatCreateSubMatrices(C, 1, is, &uis, MAT_INITIAL_MATRIX, &sub));
1990                   PetscCall(MatDestroy(&C));
1991                   PetscCall(MatTranspose(sub[0], MAT_INITIAL_MATRIX, &C));
1992                   PetscCall(MatDestroySubMatrices(1, &sub));
1993                   PetscCall(MatFindNonzeroRows(C, &data->is));
1994                   PetscCheck(data->is, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "No empty row, which likely means that some rows of A_10 are dense");
1995                   PetscCall(MatDestroy(&C));
1996                   PetscCall(ISDestroy(is));
1997                   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data->is), A11->rmap->n, A11->rmap->rstart, 1, &loc));
1998                   if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckInclusion_Private(pc, data->is, loc, PETSC_FALSE));
1999                   PetscCall(ISExpand(data->is, loc, is));
2000                   PetscCall(ISDestroy(&loc));
2001                   PetscCall(ISDestroy(&data->is));
2002                   data->is = is[0];
2003                   is[0]    = nullptr;
2004                 }
2005                 if (PetscDefined(USE_DEBUG)) {
2006                   PetscCall(PCHPDDMCheckSymmetry_Private(pc, A01, A10));
2007                   PetscCall(MatCreateSubMatrices(B, 1, &uis, &data_00->is, MAT_INITIAL_MATRIX, &sub)); /* expensive check since all processes fetch all rows (but only some columns) of the constraint matrix */
2008                   PetscCall(ISDestroy(&uis));
2009                   PetscCall(ISDuplicate(data->is, &uis));
2010                   PetscCall(ISSort(uis));
2011                   PetscCall(ISComplement(uis, 0, B->rmap->N, is));
2012                   PetscCall(MatDuplicate(sub[0], MAT_COPY_VALUES, &C));
2013                   PetscCall(MatZeroRowsIS(C, is[0], 0.0, nullptr, nullptr));
2014                   PetscCall(ISDestroy(is));
2015                   PetscCall(MatMultEqual(sub[0], C, 20, &flg));
2016                   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "The image of A_10 (R_i^p)^T from the local primal (e.g., velocity) space to the full dual (e.g., pressure) space is not restricted to the local dual space: A_10 (R_i^p)^T != R_i^d (R_i^d)^T A_10 (R_i^p)^T"); /* cf. eq. (9) of https://hal.science/hal-02343808v6/document */
2017                   PetscCall(MatDestroy(&C));
2018                   PetscCall(MatDestroySubMatrices(1, &sub));
2019                 }
2020                 PetscCall(ISDestroy(&uis));
2021                 PetscCall(MatDestroy(&B));
2022               }
2023               flg = PETSC_FALSE;
2024               if (!data->aux) {
2025                 Mat D;
2026 
2027                 PetscCall(MatCreateVecs(A11, &d, nullptr));
2028                 PetscCall(MatGetDiagonal(A11, d));
2029                 PetscCall(PetscObjectTypeCompareAny((PetscObject)A11, &flg, MATDIAGONAL, MATCONSTANTDIAGONAL, ""));
2030                 if (!flg) {
2031                   PetscCall(MatCreateDiagonal(d, &D));
2032                   PetscCall(MatMultEqual(A11, D, 20, &flg));
2033                   PetscCall(MatDestroy(&D));
2034                 }
2035                 if (flg) PetscCall(PetscInfo(pc, "A11 block is likely diagonal so the PC will build an auxiliary Mat (which was not initially provided by the user)\n"));
2036               }
2037               if ((PetscDefined(USE_DEBUG) || (data->Neumann != PETSC_BOOL3_TRUE && !flg)) && A11) {
2038                 PetscCall(MatNorm(A11, NORM_INFINITY, &norm));
2039                 if (data->Neumann != PETSC_BOOL3_TRUE && !flg) {
2040                   PetscCheck(norm < PETSC_MACHINE_EPSILON * static_cast<PetscReal>(10.0), PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_has_neumann != true with a nonzero or non-diagonal A11 block", pcpre ? pcpre : "", pcpre ? pcpre : "");
2041                   PetscCall(PetscInfo(pc, "A11 block is likely zero so the PC will build an auxiliary Mat (which was%s initially provided by the user)\n", data->aux ? "" : " not"));
2042                   PetscCall(MatDestroy(&data->aux));
2043                   flg = PETSC_TRUE;
2044                 }
2045               }
2046               if (!data->aux) { /* if A11 is near zero, e.g., Stokes equation, or diagonal, build an auxiliary (Neumann) Mat which is a (possibly slightly shifted) diagonal weighted by the inverse of the multiplicity */
2047                 PetscSF            scatter;
2048                 const PetscScalar *read;
2049                 PetscScalar       *write, *diagonal = nullptr;
2050 
2051                 PetscCall(MatDestroy(&data->aux));
2052                 PetscCall(ISGetLocalSize(data->is, &n));
2053                 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)P), n, PETSC_DECIDE, &xin));
2054                 PetscCall(VecDuplicate(xin, &v));
2055                 PetscCall(VecScatterCreate(xin, data->is, v, nullptr, &scatter));
2056                 PetscCall(VecSet(v, 1.0));
2057                 PetscCall(VecSet(xin, 1.0));
2058                 PetscCall(VecScatterBegin(scatter, v, xin, ADD_VALUES, SCATTER_REVERSE));
2059                 PetscCall(VecScatterEnd(scatter, v, xin, ADD_VALUES, SCATTER_REVERSE)); /* v has the multiplicity of all unknowns on the overlap */
2060                 PetscCall(PetscSFDestroy(&scatter));
2061                 if (d) {
2062                   PetscCall(VecScatterCreate(d, data->is, v, nullptr, &scatter));
2063                   PetscCall(VecScatterBegin(scatter, d, v, INSERT_VALUES, SCATTER_FORWARD));
2064                   PetscCall(VecScatterEnd(scatter, d, v, INSERT_VALUES, SCATTER_FORWARD));
2065                   PetscCall(PetscSFDestroy(&scatter));
2066                   PetscCall(VecDestroy(&d));
2067                   PetscCall(PetscMalloc1(n, &diagonal));
2068                   PetscCall(VecGetArrayRead(v, &read));
2069                   PetscCallCXX(std::copy_n(read, n, diagonal));
2070                   PetscCall(VecRestoreArrayRead(v, &read));
2071                 }
2072                 PetscCall(VecDestroy(&v));
2073                 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &v));
2074                 PetscCall(VecGetArrayRead(xin, &read));
2075                 PetscCall(VecGetArrayWrite(v, &write));
2076                 for (PetscInt i = 0; i < n; ++i) write[i] = (!diagonal || std::abs(diagonal[i]) < PETSC_MACHINE_EPSILON) ? PETSC_SMALL / (static_cast<PetscReal>(1000.0) * read[i]) : diagonal[i] / read[i];
2077                 PetscCall(PetscFree(diagonal));
2078                 PetscCall(VecRestoreArrayRead(xin, &read));
2079                 PetscCall(VecRestoreArrayWrite(v, &write));
2080                 PetscCall(VecDestroy(&xin));
2081                 PetscCall(MatCreateDiagonal(v, &data->aux));
2082                 PetscCall(VecDestroy(&v));
2083               }
2084               uis  = data->is;
2085               uaux = data->aux;
2086               PetscCall(PetscObjectReference((PetscObject)uis));
2087               PetscCall(PetscObjectReference((PetscObject)uaux));
2088               PetscCall(PetscStrallocpy(pcpre, &prefix));
2089               PetscCall(PCSetOptionsPrefix(pc, nullptr));
2090               PetscCall(PCSetType(pc, PCKSP));                                    /* replace the PC associated to the Schur complement by PCKSP */
2091               PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &inner_ksp)); /* new KSP that will be attached to the previously set PC */
2092               PetscCall(PetscObjectGetTabLevel((PetscObject)pc, &n));
2093               PetscCall(PetscObjectSetTabLevel((PetscObject)inner_ksp, n + 2));
2094               PetscCall(KSPSetOperators(inner_ksp, pc->mat, pc->pmat));
2095               PetscCall(KSPSetOptionsPrefix(inner_ksp, std::string(std::string(prefix) + "pc_hpddm_").c_str()));
2096               PetscCall(KSPSetSkipPCSetFromOptions(inner_ksp, PETSC_TRUE));
2097               PetscCall(KSPSetFromOptions(inner_ksp));
2098               PetscCall(KSPGetPC(inner_ksp, &inner));
2099               PetscCall(PCSetOptionsPrefix(inner, nullptr));
2100               PetscCall(PCSetType(inner, PCNONE)); /* no preconditioner since the action of M^-1 A or A M^-1 will be computed by the Amat */
2101               PetscCall(PCKSPSetKSP(pc, inner_ksp));
2102               std::get<0>(*ctx)[0] = pc_00; /* for coarse correction on the primal (e.g., velocity) space */
2103               PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &std::get<0>(*ctx)[1]));
2104               PetscCall(PCSetOptionsPrefix(pc, prefix)); /* both PC share the same prefix so that the outer PC can be reset with PCSetFromOptions() */
2105               PetscCall(PCSetOptionsPrefix(std::get<0>(*ctx)[1], prefix));
2106               PetscCall(PetscFree(prefix));
2107               PetscCall(PCSetOperators(std::get<0>(*ctx)[1], pc->mat, pc->pmat));
2108               PetscCall(PCSetType(std::get<0>(*ctx)[1], PCHPDDM));
2109               PetscCall(PCHPDDMSetAuxiliaryMat(std::get<0>(*ctx)[1], uis, uaux, nullptr, nullptr)); /* transfer ownership of the auxiliary inputs from the inner (PCKSP) to the inner-most (PCHPDDM) PC */
2110               if (flg) static_cast<PC_HPDDM *>(std::get<0>(*ctx)[1]->data)->Neumann = PETSC_BOOL3_TRUE;
2111               else if (PetscDefined(USE_DEBUG) && norm > PETSC_MACHINE_EPSILON * static_cast<PetscReal>(10.0)) {
2112                 /* no check when A11 is near zero */
2113                 PetscCall(MatCreateSubMatrices(A11, 1, &uis, &uis, MAT_INITIAL_MATRIX, &sub));
2114                 PetscCall(PCHPDDMCheckMatStructure_Private(pc, sub[0], uaux));
2115                 PetscCall(MatDestroySubMatrices(1, &sub));
2116               }
2117               PetscCall(PCSetFromOptions(std::get<0>(*ctx)[1]));
2118               PetscCall(PetscObjectDereference((PetscObject)uis));
2119               PetscCall(PetscObjectDereference((PetscObject)uaux));
2120               PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc), inner->mat->rmap->n, inner->mat->cmap->n, inner->mat->rmap->N, inner->mat->cmap->N, ctx, &S)); /* MatShell computing the action of M^-1 A or A M^-1 */
2121               PetscCall(MatShellSetOperation(S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_SchurCorrection));
2122               PetscCall(MatShellSetOperation(S, MATOP_VIEW, (PetscErrorCodeFn *)MatView_SchurCorrection));
2123               PetscCall(MatShellSetOperation(S, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_SchurCorrection));
2124               PetscCall(KSPGetPCSide(inner_ksp, &(std::get<2>(*ctx))));
2125               if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {
2126                 PetscCall(KSPSetPreSolve(inner_ksp, KSPPreSolve_SchurCorrection, ctx));
2127               } else { /* no support for PC_SYMMETRIC */
2128                 PetscCheck(std::get<2>(*ctx) == PC_RIGHT, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "PCSide %s (!= %s or %s or %s)", PCSides[std::get<2>(*ctx)], PCSides[PC_SIDE_DEFAULT], PCSides[PC_LEFT], PCSides[PC_RIGHT]);
2129               }
2130               PetscCall(KSPSetPostSolve(inner_ksp, KSPPostSolve_SchurCorrection, ctx));
2131               PetscCall(PetscObjectContainerCompose((PetscObject)std::get<0>(*ctx)[1], "_PCHPDDM_Schur", ctx, nullptr));
2132               PetscCall(PCSetUp(std::get<0>(*ctx)[1]));
2133               PetscCall(KSPSetOperators(inner_ksp, S, S));
2134               PetscCall(MatCreateVecs(std::get<1>(*ctx)[0], std::get<3>(*ctx), std::get<3>(*ctx) + 1));
2135               PetscCall(VecDuplicate(std::get<3>(*ctx)[0], std::get<3>(*ctx) + 2));
2136               PetscCall(PetscObjectDereference((PetscObject)inner_ksp));
2137               PetscCall(PetscObjectDereference((PetscObject)S));
2138             } else {
2139               std::get<0>(*ctx)[0] = pc_00;
2140               PetscCall(PetscObjectContainerCompose((PetscObject)pc, "_PCHPDDM_Schur", ctx, nullptr));
2141               PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data_00->is), A11->rmap->n, A11->rmap->rstart, 1, &data->is)); /* dummy variables in the case of a centralized Schur complement */
2142               PetscCall(MatGetDiagonalBlock(A11, &data->aux));
2143               PetscCall(PetscObjectReference((PetscObject)data->aux));
2144               PetscCall(PCSetUp(pc));
2145             }
2146             for (std::vector<Vec>::iterator it = initial.begin(); it != initial.end(); ++it) PetscCall(VecDestroy(&*it));
2147             PetscFunctionReturn(PETSC_SUCCESS);
2148           } else { /* second call to PCSetUp() on the PC associated to the Schur complement, retrieve previously set context */
2149             PetscCall(PetscContainerGetPointer(container, &ctx));
2150           }
2151         }
2152       }
2153     }
2154     if (!data->is && data->N > 1) {
2155       char type[256] = {}; /* same size as in src/ksp/pc/interface/pcset.c */
2156 
2157       PetscCall(PetscObjectTypeCompareAny((PetscObject)P, &flg, MATNORMAL, MATNORMALHERMITIAN, ""));
2158       if (flg || (A->rmap->N != A->cmap->N && P->rmap->N == P->cmap->N && P->rmap->N == A->cmap->N)) {
2159         Mat B;
2160 
2161         PetscCall(PCHPDDMSetAuxiliaryMatNormal_Private(pc, A, P, &B, pcpre));
2162         if (data->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) data->correction = PC_HPDDM_COARSE_CORRECTION_BALANCED;
2163         PetscCall(MatDestroy(&B));
2164       } else {
2165         PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
2166         if (flg) {
2167           Mat                 A00, P00, A01, A10, A11, B, N;
2168           PCHPDDMSchurPreType type = PC_HPDDM_SCHUR_PRE_LEAST_SQUARES;
2169 
2170           PetscCall(MatSchurComplementGetSubMatrices(P, &A00, &P00, &A01, &A10, &A11));
2171           PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, pcpre, "-pc_hpddm_schur_precondition", PCHPDDMSchurPreTypes, (PetscEnum *)&type, &flg));
2172           if (type == PC_HPDDM_SCHUR_PRE_LEAST_SQUARES) {
2173             Mat                        B01;
2174             Vec                        diagonal = nullptr;
2175             const PetscScalar         *array;
2176             MatSchurComplementAinvType type;
2177 
2178             PetscCall(PCHPDDMCheckSymmetry_Private(pc, A01, A10, &B01));
2179             if (A11) {
2180               PetscCall(MatCreateVecs(A11, &diagonal, nullptr));
2181               PetscCall(MatGetDiagonal(A11, diagonal));
2182             }
2183             PetscCall(MatCreateVecs(P00, &v, nullptr));
2184             PetscCall(MatSchurComplementGetAinvType(P, &type));
2185             PetscCheck(type == MAT_SCHUR_COMPLEMENT_AINV_DIAG || type == MAT_SCHUR_COMPLEMENT_AINV_LUMP || type == MAT_SCHUR_COMPLEMENT_AINV_BLOCK_DIAG, PetscObjectComm((PetscObject)P), PETSC_ERR_SUP, "-%smat_schur_complement_ainv_type %s",
2186                        ((PetscObject)P)->prefix ? ((PetscObject)P)->prefix : "", MatSchurComplementAinvTypes[type]);
2187             if (type != MAT_SCHUR_COMPLEMENT_AINV_BLOCK_DIAG) {
2188               if (type == MAT_SCHUR_COMPLEMENT_AINV_LUMP) {
2189                 PetscCall(MatGetRowSum(P00, v));
2190                 if (A00 == P00) PetscCall(PetscObjectReference((PetscObject)A00));
2191                 PetscCall(MatDestroy(&P00));
2192                 PetscCall(VecGetArrayRead(v, &array));
2193                 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A00), A00->rmap->n, A00->cmap->n, A00->rmap->N, A00->cmap->N, 1, nullptr, 0, nullptr, &P00));
2194                 PetscCall(MatSetOption(P00, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2195                 for (n = A00->rmap->rstart; n < A00->rmap->rend; ++n) PetscCall(MatSetValue(P00, n, n, array[n - A00->rmap->rstart], INSERT_VALUES));
2196                 PetscCall(MatAssemblyBegin(P00, MAT_FINAL_ASSEMBLY));
2197                 PetscCall(MatAssemblyEnd(P00, MAT_FINAL_ASSEMBLY));
2198                 PetscCall(VecRestoreArrayRead(v, &array));
2199                 PetscCall(MatSchurComplementUpdateSubMatrices(P, A00, P00, A01, A10, A11)); /* replace P00 by diag(sum of each row of P00) */
2200                 PetscCall(MatDestroy(&P00));
2201               } else PetscCall(MatGetDiagonal(P00, v));
2202               PetscCall(VecReciprocal(v)); /* inv(diag(P00))       */
2203               PetscCall(VecSqrtAbs(v));    /* sqrt(inv(diag(P00))) */
2204               PetscCall(MatDuplicate(A01, MAT_COPY_VALUES, &B));
2205               PetscCall(MatDiagonalScale(B, v, nullptr));
2206               if (B01) PetscCall(MatDiagonalScale(B01, v, nullptr));
2207             } else {
2208               Mat     D00;
2209               MatType type;
2210 
2211               PetscCall(MatCreate(PetscObjectComm((PetscObject)A00), &D00));
2212               PetscCall(MatSetType(D00, MATAIJ));
2213               PetscCall(MatSetOptionsPrefix(D00, ((PetscObject)A00)->prefix));
2214               PetscCall(MatAppendOptionsPrefix(D00, "block_diagonal_"));
2215               PetscCall(MatSetFromOptions(D00));                          /* for setting -mat_block_size dynamically */
2216               PetscCall(MatConvert(A00, MATAIJ, MAT_INITIAL_MATRIX, &B)); /* not all MatTypes have a MatInvertBlockDiagonal() implementation, plus one may want to use a different block size than the one of A00 */
2217               PetscCall(MatSetBlockSizesFromMats(B, D00, D00));
2218               PetscCall(MatInvertBlockDiagonalMat(B, D00));
2219               PetscCall(MatDestroy(&B));
2220               PetscCall(MatGetType(A01, &type));                            /* cache MatType */
2221               PetscCall(MatConvert(A01, MATAIJ, MAT_INPLACE_MATRIX, &A01)); /* MatProduct is not versatile enough to fallback gracefully if no implementation found, so MatConvert() */
2222               PetscCall(MatMatMult(D00, A01, MAT_INITIAL_MATRIX, PETSC_CURRENT, &B));
2223               PetscCall(MatDestroy(&D00));
2224               PetscCall(MatConvert(A01, type, MAT_INPLACE_MATRIX, &A01)); /* reset to previous MatType */
2225               PetscCall(MatConvert(B, type, MAT_INPLACE_MATRIX, &B));
2226               if (!B01) { /* symmetric case */
2227                 B01 = A01;
2228                 PetscCall(PetscObjectReference((PetscObject)B01));
2229               }
2230             }
2231             if (B01 && B01 != A01) PetscCall(MatSetBlockSizesFromMats(B01, A01, A01)); /* TODO: remove this line once Firedrake is fixed */
2232             PetscCall(VecDestroy(&v));
2233             PetscCall(MatCreateNormalHermitian(B, &N));
2234             PetscCall(PCHPDDMSetAuxiliaryMatNormal_Private(pc, B, N, &P, pcpre, &diagonal, B01));
2235             PetscCall(PetscObjectTypeCompare((PetscObject)data->aux, MATSEQAIJ, &flg));
2236             if (!flg) {
2237               PetscCall(MatDestroy(&P));
2238               P = N;
2239               PetscCall(PetscObjectReference((PetscObject)P));
2240             }
2241             if (diagonal) {
2242               PetscCall(MatSetOption(P, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); /* may have missing diagonal entries */
2243               PetscCall(MatDiagonalSet(P, diagonal, ADD_VALUES));
2244               PetscCall(PCSetOperators(pc, P, P)); /* replace P by A01^T inv(diag(P00)) A01 - diag(P11) */
2245               PetscCall(VecDestroy(&diagonal));
2246             } else PetscCall(PCSetOperators(pc, B01 ? P : N, P));  /* replace P by A01^T inv(diag(P00)) A01                         */
2247             pc->ops->postsolve = PCPostSolve_SchurPreLeastSquares; /*  PCFIELDSPLIT expect a KSP for (P11 - A10 inv(diag(P00)) A01) */
2248             PetscCall(MatDestroy(&N));                             /*  but a PC for (A10 inv(diag(P00)) A10 - P11) is setup instead */
2249             PetscCall(MatDestroy(&P));                             /*  so the sign of the solution must be flipped                  */
2250             PetscCall(MatDestroy(&B));
2251           } else
2252             PetscCheck(type != PC_HPDDM_SCHUR_PRE_GENEO, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s without a prior call to PCHPDDMSetAuxiliaryMat() on the A11 block%s%s", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], pcpre ? " -" : "", pcpre ? pcpre : "");
2253           for (std::vector<Vec>::iterator it = initial.begin(); it != initial.end(); ++it) PetscCall(VecDestroy(&*it));
2254           PetscFunctionReturn(PETSC_SUCCESS);
2255         } else {
2256           PetscCall(PetscOptionsGetString(((PetscObject)pc)->options, pcpre, "-pc_hpddm_levels_1_st_pc_type", type, sizeof(type), nullptr));
2257           PetscCall(PetscStrcmp(type, PCMAT, &algebraic));
2258           PetscCheck(!algebraic || !block, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_levels_1_st_pc_type mat and -%spc_hpddm_block_splitting", pcpre ? pcpre : "", pcpre ? pcpre : "");
2259           if (overlap != -1) {
2260             PetscCheck(!block && !algebraic, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_%s and -%spc_hpddm_harmonic_overlap", pcpre ? pcpre : "", block ? "block_splitting" : "levels_1_st_pc_type mat", pcpre ? pcpre : "");
2261             PetscCheck(overlap >= 1, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_WRONG, "-%spc_hpddm_harmonic_overlap %" PetscInt_FMT " < 1", pcpre ? pcpre : "", overlap);
2262           }
2263           if (block || overlap != -1) algebraic = PETSC_TRUE;
2264           if (algebraic) {
2265             PetscCall(ISCreateStride(PETSC_COMM_SELF, P->rmap->n, P->rmap->rstart, 1, &data->is));
2266             PetscCall(MatIncreaseOverlap(P, 1, &data->is, 1));
2267             PetscCall(ISSort(data->is));
2268           } else
2269             PetscCall(PetscInfo(pc, "Cannot assemble a fully-algebraic coarse operator with an assembled Pmat and -%spc_hpddm_levels_1_st_pc_type != mat and -%spc_hpddm_block_splitting != true and -%spc_hpddm_harmonic_overlap < 1\n", pcpre ? pcpre : "", pcpre ? pcpre : "", pcpre ? pcpre : ""));
2270         }
2271       }
2272     }
2273   }
2274 #if PetscDefined(USE_DEBUG)
2275   if (data->is) PetscCall(ISDuplicate(data->is, &dis));
2276   if (data->aux) PetscCall(MatDuplicate(data->aux, MAT_COPY_VALUES, &daux));
2277 #endif
2278   if (data->is || (ismatis && data->N > 1)) {
2279     if (ismatis) {
2280       std::initializer_list<std::string> list = {MATSEQBAIJ, MATSEQSBAIJ};
2281       PetscCall(MatISGetLocalMat(P, &N));
2282       std::initializer_list<std::string>::const_iterator it = std::find(list.begin(), list.end(), ((PetscObject)N)->type_name);
2283       PetscCall(MatISRestoreLocalMat(P, &N));
2284       switch (std::distance(list.begin(), it)) {
2285       case 0:
2286         PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &C));
2287         break;
2288       case 1:
2289         /* MatCreateSubMatrices() does not work with MATSBAIJ and unsorted ISes, so convert to MPIBAIJ */
2290         PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &C));
2291         break;
2292       default:
2293         PetscCall(MatConvert(P, MATMPIAIJ, MAT_INITIAL_MATRIX, &C));
2294       }
2295       PetscCall(MatISGetLocalToGlobalMapping(P, &l2g, nullptr));
2296       PetscCall(PetscObjectReference((PetscObject)P));
2297       PetscCall(KSPSetOperators(data->levels[0]->ksp, A, C));
2298       std::swap(C, P);
2299       PetscCall(ISLocalToGlobalMappingGetSize(l2g, &n));
2300       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &loc));
2301       PetscCall(ISLocalToGlobalMappingApplyIS(l2g, loc, &is[0]));
2302       PetscCall(ISDestroy(&loc));
2303       /* the auxiliary Mat is _not_ the local Neumann matrix                                */
2304       /* it is the local Neumann matrix augmented (with zeros) through MatIncreaseOverlap() */
2305       data->Neumann = PETSC_BOOL3_FALSE;
2306       structure     = SAME_NONZERO_PATTERN;
2307     } else {
2308       is[0] = data->is;
2309       if (algebraic || ctx) subdomains = PETSC_TRUE;
2310       PetscCall(PetscOptionsGetBool(((PetscObject)pc)->options, pcpre, "-pc_hpddm_define_subdomains", &subdomains, nullptr));
2311       if (ctx) PetscCheck(subdomains, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_define_subdomains false", pcpre, pcpre);
2312       if (PetscBool3ToBool(data->Neumann)) {
2313         PetscCheck(!block, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_block_splitting and -%spc_hpddm_has_neumann", pcpre ? pcpre : "", pcpre ? pcpre : "");
2314         PetscCheck(overlap == -1, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_harmonic_overlap %" PetscInt_FMT " and -%spc_hpddm_has_neumann", pcpre ? pcpre : "", overlap, pcpre ? pcpre : "");
2315         PetscCheck(!algebraic, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_levels_1_st_pc_type mat and -%spc_hpddm_has_neumann", pcpre ? pcpre : "", pcpre ? pcpre : "");
2316       }
2317       if (PetscBool3ToBool(data->Neumann) || block) structure = SAME_NONZERO_PATTERN;
2318       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data->is), P->rmap->n, P->rmap->rstart, 1, &loc));
2319     }
2320     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2321     PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, prefix, "-st_matstructure", MatStructures, (PetscEnum *)&structure, &flg)); /* if not user-provided, force its value when possible */
2322     if (!flg && structure == SAME_NONZERO_PATTERN) { /* cannot call STSetMatStructure() yet, insert the appropriate option in the database, parsed by STSetFromOptions() */
2323       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-%spc_hpddm_levels_1_st_matstructure", pcpre ? pcpre : ""));
2324       PetscCall(PetscOptionsSetValue(((PetscObject)pc)->options, prefix, MatStructures[structure]));
2325     }
2326     flg = PETSC_FALSE;
2327     if (data->share) {
2328       data->share = PETSC_FALSE; /* will be reset to PETSC_TRUE if none of the conditions below are true */
2329       if (!subdomains) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since -%spc_hpddm_define_subdomains is not true\n", pcpre ? pcpre : ""));
2330       else if (data->deflation) PetscCall(PetscInfo(pc, "Nothing to share since PCHPDDMSetDeflationMat() has been called\n"));
2331       else if (ismatis) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc with a Pmat of type MATIS\n"));
2332       else if (!algebraic && structure != SAME_NONZERO_PATTERN)
2333         PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since -%spc_hpddm_levels_1_st_matstructure %s (!= %s)\n", pcpre ? pcpre : "", MatStructures[structure], MatStructures[SAME_NONZERO_PATTERN]));
2334       else data->share = PETSC_TRUE;
2335     }
2336     if (!ismatis) {
2337       if (data->share || (!PetscBool3ToBool(data->Neumann) && subdomains)) PetscCall(ISDuplicate(is[0], &unsorted));
2338       else unsorted = is[0];
2339     }
2340     if ((ctx || data->N > 1) && (data->aux || ismatis || algebraic)) {
2341       PetscCheck(loadedSym, PETSC_COMM_SELF, PETSC_ERR_PLIB, "HPDDM library not loaded, cannot use more than one level");
2342       PetscCall(MatSetOption(P, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2343       if (ismatis) {
2344         /* needed by HPDDM (currently) so that the partition of unity is 0 on subdomain interfaces */
2345         PetscCall(MatIncreaseOverlap(P, 1, is, 1));
2346         PetscCall(ISDestroy(&data->is));
2347         data->is = is[0];
2348       } else {
2349         if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckInclusion_Private(pc, data->is, loc, PETSC_TRUE));
2350         if (!ctx && overlap == -1) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", PCHPDDMAlgebraicAuxiliaryMat_Private));
2351         if (!PetscBool3ToBool(data->Neumann) && (!algebraic || overlap != -1)) {
2352           PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg));
2353           if (flg) {
2354             /* maybe better to ISSort(is[0]), MatCreateSubMatrices(), and then MatPermute() */
2355             /* but there is no MatPermute_SeqSBAIJ(), so as before, just use MATMPIBAIJ     */
2356             PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &uaux));
2357             flg = PETSC_FALSE;
2358           }
2359         }
2360       }
2361       if (algebraic && overlap == -1) {
2362         PetscUseMethod(pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", (Mat, IS *, Mat *[], PetscBool), (P, is, &sub, block));
2363         if (block) {
2364           PetscCall(PetscObjectQuery((PetscObject)sub[0], "_PCHPDDM_Neumann_Mat", (PetscObject *)&data->aux));
2365           PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Neumann_Mat", nullptr));
2366         }
2367       } else if (!uaux || overlap != -1) {
2368         if (!ctx) {
2369           if (PetscBool3ToBool(data->Neumann)) sub = &data->aux;
2370           else {
2371             PetscBool flg;
2372             if (overlap != -1) {
2373               Harmonic              h;
2374               Mat                   A0, *a;                    /* with an SVD: [ A_00  A_01       ] */
2375               IS                    ov[2], rows, cols, stride; /*              [ A_10  A_11  A_12 ] */
2376               const PetscInt       *i[2], bs = P->cmap->bs;    /* with a GEVP: [ A_00  A_01       ] */
2377               PetscInt              n[2], location;            /*              [ A_10  A_11  A_12 ] */
2378               std::vector<PetscInt> v[2];                      /*              [       A_21  A_22 ] */
2379 
2380               do {
2381                 PetscCall(ISDuplicate(data->is, ov));
2382                 if (overlap > 1) PetscCall(MatIncreaseOverlap(P, 1, ov, overlap - 1));
2383                 PetscCall(ISDuplicate(ov[0], ov + 1));
2384                 PetscCall(MatIncreaseOverlap(P, 1, ov + 1, 1));
2385                 PetscCall(ISGetLocalSize(ov[0], n));
2386                 PetscCall(ISGetLocalSize(ov[1], n + 1));
2387                 flg = PetscBool(n[0] == n[1] && n[0] != P->rmap->n);
2388                 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
2389                 if (flg) {
2390                   PetscCall(ISDestroy(ov));
2391                   PetscCall(ISDestroy(ov + 1));
2392                   PetscCheck(--overlap, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "No oversampling possible");
2393                   PetscCall(PetscInfo(pc, "Supplied -%spc_hpddm_harmonic_overlap parameter is too large, it has been decreased to %" PetscInt_FMT "\n", pcpre ? pcpre : "", overlap));
2394                 } else break;
2395               } while (1);
2396               PetscCall(PetscNew(&h));
2397               h->ksp = nullptr;
2398               PetscCall(PetscCalloc1(2, &h->A));
2399               PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-eps_nev", &flg));
2400               if (!flg) {
2401                 PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-svd_nsv", &flg));
2402                 if (!flg) PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-svd_threshold_relative", &flg));
2403               } else flg = PETSC_FALSE;
2404               PetscCall(ISSort(ov[0]));
2405               if (!flg) PetscCall(ISSort(ov[1]));
2406               PetscCall(PetscCalloc1(5, &h->is));
2407               PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, ov + !flg, ov + 1, MAT_INITIAL_MATRIX, &a)); /* submatrix from above, either square (!flg) or rectangular (flg) */
2408               for (PetscInt j = 0; j < 2; ++j) PetscCall(ISGetIndices(ov[j], i + j));
2409               v[1].reserve((n[1] - n[0]) / bs);
2410               for (PetscInt j = 0; j < n[1]; j += bs) { /* indices of the (2,2) block */
2411                 PetscCall(ISLocate(ov[0], i[1][j], &location));
2412                 if (location < 0) v[1].emplace_back(j / bs);
2413               }
2414               if (!flg) {
2415                 h->A[1] = a[0];
2416                 PetscCall(PetscObjectReference((PetscObject)h->A[1]));
2417                 v[0].reserve((n[0] - P->rmap->n) / bs);
2418                 for (PetscInt j = 0; j < n[1]; j += bs) { /* row indices of the (1,2) block */
2419                   PetscCall(ISLocate(loc, i[1][j], &location));
2420                   if (location < 0) {
2421                     PetscCall(ISLocate(ov[0], i[1][j], &location));
2422                     if (location >= 0) v[0].emplace_back(j / bs);
2423                   }
2424                 }
2425                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[0].size(), v[0].data(), PETSC_USE_POINTER, &rows));
2426                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[1].size(), v[1].data(), PETSC_COPY_VALUES, h->is + 4));
2427                 PetscCall(MatCreateSubMatrix(a[0], rows, h->is[4], MAT_INITIAL_MATRIX, h->A)); /* A_12 submatrix from above */
2428                 PetscCall(ISDestroy(&rows));
2429                 if (uaux) PetscCall(MatConvert(a[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, a)); /* initial Pmat was MATSBAIJ, convert back to the same format since the rectangular A_12 submatrix has been created */
2430                 PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &rows));
2431                 PetscCall(MatCreateSubMatrix(a[0], rows, cols = rows, MAT_INITIAL_MATRIX, &A0)); /* [ A_00  A_01 ; A_10  A_11 ] submatrix from above */
2432                 PetscCall(ISDestroy(&rows));
2433                 v[0].clear();
2434                 PetscCall(ISEmbed(loc, ov[1], PETSC_TRUE, h->is + 3));
2435                 PetscCall(ISEmbed(data->is, ov[1], PETSC_TRUE, h->is + 2));
2436               }
2437               v[0].reserve((n[0] - P->rmap->n) / bs);
2438               for (PetscInt j = 0; j < n[0]; j += bs) {
2439                 PetscCall(ISLocate(loc, i[0][j], &location));
2440                 if (location < 0) v[0].emplace_back(j / bs);
2441               }
2442               PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[0].size(), v[0].data(), PETSC_USE_POINTER, &rows));
2443               for (PetscInt j = 0; j < 2; ++j) PetscCall(ISRestoreIndices(ov[j], i + j));
2444               if (flg) {
2445                 PetscCall(ISCreateStride(PETSC_COMM_SELF, a[0]->rmap->n, 0, 1, &stride));
2446                 PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &cols));
2447                 PetscCall(MatCreateSubMatrix(a[0], stride, cols, MAT_INITIAL_MATRIX, &A0)); /* [ A_00  A_01 ; A_10  A_11 ] submatrix from above */
2448                 PetscCall(ISDestroy(&cols));
2449                 PetscCall(ISDestroy(&stride));
2450                 if (uaux) { /* initial Pmat was MATSBAIJ, convert back to the same format since this submatrix is square */
2451                   PetscCall(MatSetOption(A0, MAT_SYMMETRIC, PETSC_TRUE));
2452                   PetscCall(MatConvert(A0, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A0));
2453                 }
2454                 PetscCall(ISEmbed(loc, data->is, PETSC_TRUE, h->is + 2));
2455                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[1].size(), v[1].data(), PETSC_USE_POINTER, &cols));
2456                 PetscCall(MatCreateSubMatrix(a[0], rows, cols, MAT_INITIAL_MATRIX, h->A)); /* A_12 submatrix from above */
2457                 PetscCall(ISDestroy(&cols));
2458               }
2459               PetscCall(ISCreateStride(PETSC_COMM_SELF, A0->rmap->n, 0, 1, &stride));
2460               PetscCall(ISEmbed(rows, stride, PETSC_TRUE, h->is));
2461               PetscCall(ISDestroy(&stride));
2462               PetscCall(ISDestroy(&rows));
2463               PetscCall(ISEmbed(loc, ov[0], PETSC_TRUE, h->is + 1));
2464               if (subdomains) {
2465                 if (!data->levels[0]->pc) {
2466                   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->pc));
2467                   PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2468                   PetscCall(PCSetOptionsPrefix(data->levels[0]->pc, prefix));
2469                   PetscCall(PCSetOperators(data->levels[0]->pc, A, P));
2470                 }
2471                 PetscCall(PCSetType(data->levels[0]->pc, PCASM));
2472                 if (!data->levels[0]->pc->setupcalled) PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, ov + !flg, &loc));
2473                 PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(data->levels[0]->pc, flg ? A0 : a[0], PETSC_TRUE));
2474                 if (!flg) ++overlap;
2475                 if (data->share) {
2476                   PetscInt n = -1;
2477                   PetscTryMethod(data->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data->levels[0]->pc, &n, nullptr, &ksp));
2478                   PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2479                   if (flg) {
2480                     h->ksp = ksp[0];
2481                     PetscCall(PetscObjectReference((PetscObject)h->ksp));
2482                   }
2483                 }
2484               }
2485               if (!h->ksp) {
2486                 PetscBool share = data->share;
2487 
2488                 PetscCall(KSPCreate(PETSC_COMM_SELF, &h->ksp));
2489                 PetscCall(KSPSetType(h->ksp, KSPPREONLY));
2490                 PetscCall(KSPSetOperators(h->ksp, A0, A0));
2491                 do {
2492                   if (!data->share) {
2493                     share = PETSC_FALSE;
2494                     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_%s", pcpre ? pcpre : "", flg ? "svd_" : "eps_"));
2495                     PetscCall(KSPSetOptionsPrefix(h->ksp, prefix));
2496                     PetscCall(KSPSetFromOptions(h->ksp));
2497                   } else {
2498                     MatSolverType type;
2499 
2500                     PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp[0]->pc, &data->share, PCLU, PCCHOLESKY, ""));
2501                     if (data->share) {
2502                       PetscCall(PCFactorGetMatSolverType(ksp[0]->pc, &type));
2503                       if (!type) {
2504                         if (PetscDefined(HAVE_MUMPS)) PetscCall(PCFactorSetMatSolverType(ksp[0]->pc, MATSOLVERMUMPS));
2505                         else if (PetscDefined(HAVE_MKL_PARDISO)) PetscCall(PCFactorSetMatSolverType(ksp[0]->pc, MATSOLVERMKL_PARDISO));
2506                         else data->share = PETSC_FALSE;
2507                         if (data->share) PetscCall(PCSetFromOptions(ksp[0]->pc));
2508                       } else {
2509                         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &data->share));
2510                         if (!data->share) PetscCall(PetscStrcmp(type, MATSOLVERMKL_PARDISO, &data->share));
2511                       }
2512                       if (data->share) {
2513                         std::tuple<KSP, IS, Vec[2]> *p;
2514 
2515                         PetscCall(PCFactorGetMatrix(ksp[0]->pc, &A));
2516                         PetscCall(MatFactorSetSchurIS(A, h->is[4]));
2517                         PetscCall(KSPSetUp(ksp[0]));
2518                         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_eps_shell_", pcpre ? pcpre : ""));
2519                         PetscCall(KSPSetOptionsPrefix(h->ksp, prefix));
2520                         PetscCall(KSPSetFromOptions(h->ksp));
2521                         PetscCall(PCSetType(h->ksp->pc, PCSHELL));
2522                         PetscCall(PetscNew(&p));
2523                         std::get<0>(*p) = ksp[0];
2524                         PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &std::get<1>(*p)));
2525                         PetscCall(MatCreateVecs(A, std::get<2>(*p), std::get<2>(*p) + 1));
2526                         PetscCall(PCShellSetContext(h->ksp->pc, p));
2527                         PetscCall(PCShellSetApply(h->ksp->pc, PCApply_Schur));
2528                         PetscCall(PCShellSetApplyTranspose(h->ksp->pc, PCApply_Schur<Vec, true>));
2529                         PetscCall(PCShellSetMatApply(h->ksp->pc, PCApply_Schur<Mat>));
2530                         PetscCall(PCShellSetDestroy(h->ksp->pc, PCDestroy_Schur));
2531                       }
2532                     }
2533                     if (!data->share) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since neither MUMPS nor MKL PARDISO is used\n"));
2534                   }
2535                 } while (!share != !data->share); /* if data->share is initially PETSC_TRUE, but then reset to PETSC_FALSE, then go back to the beginning of the do loop */
2536               }
2537               PetscCall(ISDestroy(ov));
2538               PetscCall(ISDestroy(ov + 1));
2539               if (overlap == 1 && subdomains && flg) {
2540                 *subA = A0;
2541                 sub   = subA;
2542                 if (uaux) PetscCall(MatDestroy(&uaux));
2543               } else PetscCall(MatDestroy(&A0));
2544               PetscCall(MatCreateShell(PETSC_COMM_SELF, P->rmap->n, n[1] - n[0], P->rmap->n, n[1] - n[0], h, &data->aux));
2545               PetscCall(KSPSetErrorIfNotConverged(h->ksp, PETSC_TRUE)); /* bail out as early as possible to avoid (apparently) unrelated error messages */
2546               PetscCall(MatCreateVecs(h->ksp->pc->pmat, &h->v, nullptr));
2547               PetscCall(MatShellSetOperation(data->aux, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Harmonic));
2548               PetscCall(MatShellSetOperation(data->aux, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Harmonic));
2549               PetscCall(MatShellSetMatProductOperation(data->aux, MATPRODUCT_AB, nullptr, MatProduct_AB_Harmonic, nullptr, MATDENSE, MATDENSE));
2550               PetscCall(MatShellSetMatProductOperation(data->aux, MATPRODUCT_AtB, nullptr, MatProduct_AtB_Harmonic, nullptr, MATDENSE, MATDENSE));
2551               PetscCall(MatShellSetOperation(data->aux, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Harmonic));
2552               PetscCall(MatDestroySubMatrices(1, &a));
2553             }
2554             if (overlap != 1 || !subdomains) {
2555               PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, is, is, MAT_INITIAL_MATRIX, &sub));
2556               if (ismatis) {
2557                 PetscCall(MatISGetLocalMat(C, &N));
2558                 PetscCall(PetscObjectTypeCompare((PetscObject)N, MATSEQSBAIJ, &flg));
2559                 if (flg) PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2560                 PetscCall(MatISRestoreLocalMat(C, &N));
2561               }
2562             }
2563             if (uaux) {
2564               PetscCall(MatDestroy(&uaux));
2565               PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2566             }
2567           }
2568         }
2569       } else if (!ctx) {
2570         PetscCall(MatCreateSubMatrices(uaux, 1, is, is, MAT_INITIAL_MATRIX, &sub));
2571         PetscCall(MatDestroy(&uaux));
2572         PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2573       }
2574       if (data->N > 1) {
2575         /* Vec holding the partition of unity */
2576         if (!data->levels[0]->D) {
2577           PetscCall(ISGetLocalSize(data->is, &n));
2578           PetscCall(VecCreateMPI(PETSC_COMM_SELF, n, PETSC_DETERMINE, &data->levels[0]->D));
2579         }
2580         if (data->share && overlap == -1) {
2581           Mat      D;
2582           IS       perm = nullptr;
2583           PetscInt size = -1;
2584 
2585           if (!data->levels[0]->pc) {
2586             PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2587             PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->pc));
2588             PetscCall(PCSetOptionsPrefix(data->levels[0]->pc, prefix));
2589             PetscCall(PCSetOperators(data->levels[0]->pc, A, P));
2590           }
2591           PetscCall(PCSetType(data->levels[0]->pc, PCASM));
2592           if (!ctx) {
2593             if (!data->levels[0]->pc->setupcalled) {
2594               IS sorted; /* PCASM will sort the input IS, duplicate it to return an unmodified (PCHPDDM) input IS */
2595               PetscCall(ISDuplicate(is[0], &sorted));
2596               PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, &sorted, &loc));
2597               PetscCall(PetscObjectDereference((PetscObject)sorted));
2598             }
2599             PetscCall(PCSetFromOptions(data->levels[0]->pc));
2600             if (block) {
2601               PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, sub[0], &C, &perm));
2602               PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(data->levels[0]->pc, C, algebraic));
2603             } else PetscCall(PCSetUp(data->levels[0]->pc));
2604             PetscTryMethod(data->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data->levels[0]->pc, &size, nullptr, &ksp));
2605             if (size != 1) {
2606               data->share = PETSC_FALSE;
2607               PetscCheck(size == -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", size);
2608               PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since PCASMGetSubKSP() not found in fine-level PC\n"));
2609               PetscCall(ISDestroy(&unsorted));
2610               unsorted = is[0];
2611             } else {
2612               const char *matpre;
2613               PetscBool   cmp[4];
2614 
2615               if (!block && !ctx) PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, PetscBool3ToBool(data->Neumann) ? sub[0] : data->aux, &C, &perm));
2616               if (perm) { /* unsorted input IS */
2617                 if (!PetscBool3ToBool(data->Neumann) && !block) {
2618                   PetscCall(MatPermute(sub[0], perm, perm, &D)); /* permute since PCASM will call ISSort() */
2619                   PetscCall(MatHeaderReplace(sub[0], &D));
2620                 }
2621                 if (data->B) { /* see PCHPDDMSetRHSMat() */
2622                   PetscCall(MatPermute(data->B, perm, perm, &D));
2623                   PetscCall(MatHeaderReplace(data->B, &D));
2624                 }
2625                 PetscCall(ISDestroy(&perm));
2626               }
2627               PetscCall(KSPGetOperators(ksp[0], subA, subA + 1));
2628               PetscCall(PetscObjectReference((PetscObject)subA[0]));
2629               PetscCall(MatDuplicate(subA[1], MAT_SHARE_NONZERO_PATTERN, &D));
2630               PetscCall(MatGetOptionsPrefix(subA[1], &matpre));
2631               PetscCall(MatSetOptionsPrefix(D, matpre));
2632               PetscCall(PetscObjectTypeCompare((PetscObject)D, MATNORMAL, cmp));
2633               PetscCall(PetscObjectTypeCompare((PetscObject)C, MATNORMAL, cmp + 1));
2634               if (!cmp[0]) PetscCall(PetscObjectTypeCompare((PetscObject)D, MATNORMALHERMITIAN, cmp + 2));
2635               else cmp[2] = PETSC_FALSE;
2636               if (!cmp[1]) PetscCall(PetscObjectTypeCompare((PetscObject)C, MATNORMALHERMITIAN, cmp + 3));
2637               else cmp[3] = PETSC_FALSE;
2638               PetscCheck(cmp[0] == cmp[1] && cmp[2] == cmp[3], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_levels_1_pc_asm_sub_mat_type %s and auxiliary Mat of type %s", pcpre ? pcpre : "", ((PetscObject)D)->type_name, ((PetscObject)C)->type_name);
2639               if (!cmp[0] && !cmp[2]) {
2640                 if (!block) {
2641                   if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckMatStructure_Private(pc, D, C));
2642                   PetscCall(MatAXPY(D, 1.0, C, SUBSET_NONZERO_PATTERN));
2643                 } else {
2644                   structure = DIFFERENT_NONZERO_PATTERN;
2645                   PetscCall(MatAXPY(D, 1.0, data->aux, structure));
2646                 }
2647               } else {
2648                 Mat mat[2];
2649 
2650                 if (cmp[0]) {
2651                   PetscCall(MatNormalGetMat(D, mat));
2652                   PetscCall(MatNormalGetMat(C, mat + 1));
2653                 } else {
2654                   PetscCall(MatNormalHermitianGetMat(D, mat));
2655                   PetscCall(MatNormalHermitianGetMat(C, mat + 1));
2656                 }
2657                 PetscCall(MatAXPY(mat[0], 1.0, mat[1], SUBSET_NONZERO_PATTERN));
2658               }
2659               PetscCall(MatPropagateSymmetryOptions(C, D));
2660               PetscCall(MatDestroy(&C));
2661               C = D;
2662               /* swap pointers so that variables stay consistent throughout PCSetUp() */
2663               std::swap(C, data->aux);
2664               std::swap(uis, data->is);
2665               swap = PETSC_TRUE;
2666             }
2667           }
2668         }
2669       }
2670       if (ctx) {
2671         PC_HPDDM              *data_00 = (PC_HPDDM *)std::get<0>(*ctx)[0]->data;
2672         PC                     s;
2673         Mat                    A00, P00, A01 = nullptr, A10, A11, N, b[4];
2674         IS                     sorted, is[2], *is_00;
2675         MatSolverType          type;
2676         std::pair<PC, Vec[2]> *p;
2677 
2678         n = -1;
2679         PetscTryMethod(data_00->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data_00->levels[0]->pc, &n, nullptr, &ksp));
2680         PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2681         PetscCall(KSPGetOperators(ksp[0], subA, subA + 1));
2682         PetscCall(ISGetLocalSize(data_00->is, &n));
2683         if (n != subA[0]->rmap->n || n != subA[0]->cmap->n) {
2684           PetscCall(PCASMGetLocalSubdomains(data_00->levels[0]->pc, &n, &is_00, nullptr));
2685           PetscCall(ISGetLocalSize(*is_00, &n));
2686           PetscCheck(n == subA[0]->rmap->n && n == subA[0]->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_define_subdomains false", pcpre ? pcpre : "", ((PetscObject)pc)->prefix);
2687         } else is_00 = &data_00->is;
2688         PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, data->aux, &C, nullptr)); /* permute since PCASM works with a sorted IS */
2689         std::swap(C, data->aux);
2690         std::swap(uis, data->is);
2691         swap = PETSC_TRUE;
2692         PetscCall(MatSchurComplementGetSubMatrices(P, &A00, &P00, std::get<1>(*ctx), &A10, &A11));
2693         std::get<1>(*ctx)[1] = A10;
2694         PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
2695         if (flg) PetscCall(MatTransposeGetMat(A10, &A01));
2696         else {
2697           PetscBool flg;
2698 
2699           PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
2700           if (flg) PetscCall(MatHermitianTransposeGetMat(A10, &A01));
2701         }
2702         PetscCall(ISDuplicate(*is_00, &sorted)); /* during setup of the PC associated to the A00 block, this IS has already been sorted, but it's put back to its original state at the end of PCSetUp_HPDDM(), which may be unsorted */
2703         PetscCall(ISSort(sorted));               /* this is to avoid changing users inputs, but it requires a new call to ISSort() here                                                                                               */
2704         if (!A01) {
2705           PetscCall(MatSetOption(A10, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2706           PetscCall(MatCreateSubMatrices(A10, 1, &data->is, &sorted, MAT_INITIAL_MATRIX, &sub));
2707           b[2] = sub[0];
2708           PetscCall(PetscObjectReference((PetscObject)sub[0]));
2709           PetscCall(MatDestroySubMatrices(1, &sub));
2710           PetscCall(PetscObjectTypeCompare((PetscObject)std::get<1>(*ctx)[0], MATTRANSPOSEVIRTUAL, &flg));
2711           A10 = nullptr;
2712           if (flg) PetscCall(MatTransposeGetMat(std::get<1>(*ctx)[0], &A10));
2713           else {
2714             PetscBool flg;
2715 
2716             PetscCall(PetscObjectTypeCompare((PetscObject)std::get<1>(*ctx)[0], MATHERMITIANTRANSPOSEVIRTUAL, &flg));
2717             if (flg) PetscCall(MatHermitianTransposeGetMat(std::get<1>(*ctx)[0], &A10));
2718           }
2719           if (!A10) PetscCall(MatCreateSubMatrices(std::get<1>(*ctx)[0], 1, &sorted, &data->is, MAT_INITIAL_MATRIX, &sub));
2720           else {
2721             if (flg) PetscCall(MatCreateTranspose(b[2], b + 1));
2722             else PetscCall(MatCreateHermitianTranspose(b[2], b + 1));
2723           }
2724         } else {
2725           PetscCall(MatSetOption(A01, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2726           PetscCall(MatCreateSubMatrices(A01, 1, &sorted, &data->is, MAT_INITIAL_MATRIX, &sub));
2727           if (flg) PetscCall(MatCreateTranspose(*sub, b + 2));
2728           else PetscCall(MatCreateHermitianTranspose(*sub, b + 2));
2729         }
2730         if (A01 || !A10) {
2731           b[1] = sub[0];
2732           PetscCall(PetscObjectReference((PetscObject)sub[0]));
2733         }
2734         PetscCall(MatDestroySubMatrices(1, &sub));
2735         PetscCall(ISDestroy(&sorted));
2736         b[3] = data->aux;
2737         PetscCall(MatCreateSchurComplement(subA[0], subA[1], b[1], b[2], b[3], &S));
2738         PetscCall(MatSchurComplementSetKSP(S, ksp[0]));
2739         if (data->N != 1) {
2740           PetscCall(PCASMSetType(data->levels[0]->pc, PC_ASM_NONE)); /* "Neumann--Neumann" preconditioning with overlap and a Boolean partition of unity */
2741           PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, &data->is, &loc));
2742           PetscCall(PCSetFromOptions(data->levels[0]->pc)); /* action of eq. (15) of https://hal.science/hal-02343808v6/document (with a sign flip) */
2743           s = data->levels[0]->pc;
2744         } else {
2745           is[0] = data->is;
2746           PetscCall(PetscObjectReference((PetscObject)is[0]));
2747           PetscCall(PetscObjectReference((PetscObject)b[3]));
2748           PetscCall(PCSetType(pc, PCASM));                          /* change the type of the current PC */
2749           data = nullptr;                                           /* destroyed in the previous PCSetType(), so reset to NULL to avoid any faulty use */
2750           PetscCall(PCAppendOptionsPrefix(pc, "pc_hpddm_coarse_")); /* same prefix as when using PCHPDDM with a single level */
2751           PetscCall(PCASMSetLocalSubdomains(pc, 1, is, &loc));
2752           PetscCall(ISDestroy(is));
2753           PetscCall(ISDestroy(&loc));
2754           s = pc;
2755         }
2756         PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(s, S, PETSC_TRUE)); /* the subdomain Mat is already known and the input IS of PCASMSetLocalSubdomains() is already sorted */
2757         PetscTryMethod(s, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (s, &n, nullptr, &ksp));
2758         PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2759         PetscCall(KSPGetPC(ksp[0], &inner));
2760         PetscCall(PCSetType(inner, PCSHELL)); /* compute the action of the inverse of the local Schur complement with a PCSHELL */
2761         b[0] = subA[0];
2762         PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, nullptr, 2, nullptr, b, &N)); /* instead of computing inv(A11 - A10 inv(A00) A01), compute inv([A00, A01; A10, A11]) followed by a partial solution associated to the A11 block */
2763         if (!data) PetscCall(PetscObjectDereference((PetscObject)b[3]));
2764         PetscCall(PetscObjectDereference((PetscObject)b[1]));
2765         PetscCall(PetscObjectDereference((PetscObject)b[2]));
2766         PetscCall(PCCreate(PETSC_COMM_SELF, &s));
2767         PetscCall(PCSetOptionsPrefix(s, ((PetscObject)inner)->prefix));
2768         PetscCall(PCSetOptionsPrefix(inner, nullptr));
2769         PetscCall(KSPSetSkipPCSetFromOptions(ksp[0], PETSC_TRUE));
2770         PetscCall(PCSetType(s, PCLU));
2771         if (PetscDefined(HAVE_MUMPS)) PetscCall(PCFactorSetMatSolverType(s, MATSOLVERMUMPS)); /* only MATSOLVERMUMPS handles MATNEST, so for the others, e.g., MATSOLVERPETSC or MATSOLVERMKL_PARDISO, convert to plain MATAIJ */
2772         PetscCall(PCSetFromOptions(s));
2773         PetscCall(PCFactorGetMatSolverType(s, &type));
2774         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
2775         PetscCall(MatGetLocalSize(A11, &n, nullptr));
2776         if (flg || n == 0) {
2777           PetscCall(PCSetOperators(s, N, N));
2778           if (n) {
2779             PetscCall(PCFactorGetMatrix(s, b));
2780             PetscCall(MatSetOptionsPrefix(*b, ((PetscObject)s)->prefix));
2781             n = -1;
2782             PetscCall(PetscOptionsGetInt(((PetscObject)pc)->options, ((PetscObject)s)->prefix, "-mat_mumps_icntl_26", &n, nullptr));
2783             if (n == 1) {                                /* allocates a square MatDense of size is[1]->map->n, so one */
2784               PetscCall(MatNestGetISs(N, is, nullptr));  /*  needs to be able to deactivate this path when dealing    */
2785               PetscCall(MatFactorSetSchurIS(*b, is[1])); /*  with a large constraint space in order to avoid OOM      */
2786             }
2787           } else PetscCall(PCSetType(s, PCNONE)); /* empty local Schur complement (e.g., centralized on another process) */
2788         } else {
2789           PetscCall(MatConvert(N, MATAIJ, MAT_INITIAL_MATRIX, b));
2790           PetscCall(PCSetOperators(s, N, *b));
2791           PetscCall(PetscObjectDereference((PetscObject)*b));
2792           PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &flg, PCLU, PCCHOLESKY, PCILU, PCICC, PCQR, ""));
2793           if (flg) PetscCall(PCFactorGetMatrix(s, b)); /* MATSOLVERMKL_PARDISO cannot compute in PETSc (yet) a partial solution associated to the A11 block, only partial solution associated to the A00 block or full solution */
2794         }
2795         PetscCall(PetscNew(&p));
2796         p->first = s;
2797         if (n != 0) PetscCall(MatCreateVecs(*b, p->second, p->second + 1));
2798         else p->second[0] = p->second[1] = nullptr;
2799         PetscCall(PCShellSetContext(inner, p));
2800         PetscCall(PCShellSetApply(inner, PCApply_Nest));
2801         PetscCall(PCShellSetView(inner, PCView_Nest));
2802         PetscCall(PCShellSetDestroy(inner, PCDestroy_Nest));
2803         PetscCall(PetscObjectDereference((PetscObject)N));
2804         if (!data) {
2805           PetscCall(MatDestroy(&S));
2806           PetscCall(ISDestroy(&unsorted));
2807           PetscCall(MatDestroy(&C));
2808           PetscCall(ISDestroy(&uis));
2809           PetscCall(PetscFree(ctx));
2810 #if PetscDefined(USE_DEBUG)
2811           PetscCall(ISDestroy(&dis));
2812           PetscCall(MatDestroy(&daux));
2813 #endif
2814           PetscFunctionReturn(PETSC_SUCCESS);
2815         }
2816       }
2817       if (!data->levels[0]->scatter) {
2818         PetscCall(MatCreateVecs(P, &xin, nullptr));
2819         if (ismatis) PetscCall(MatDestroy(&P));
2820         PetscCall(VecScatterCreate(xin, data->is, data->levels[0]->D, nullptr, &data->levels[0]->scatter));
2821         PetscCall(VecDestroy(&xin));
2822       }
2823       if (data->levels[0]->P) {
2824         /* if the pattern is the same and PCSetUp() has previously succeeded, reuse HPDDM buffers and connectivity */
2825         PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(data->levels[0], !pc->setupcalled || pc->flag == DIFFERENT_NONZERO_PATTERN ? PETSC_TRUE : PETSC_FALSE));
2826       }
2827       if (!data->levels[0]->P) data->levels[0]->P = new HPDDM::Schwarz<PetscScalar>();
2828       if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_SetUp[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
2829       else PetscCall(PetscLogEventBegin(PC_HPDDM_Strc, data->levels[0]->ksp, nullptr, nullptr, nullptr));
2830       /* HPDDM internal data structure */
2831       PetscCall(data->levels[0]->P->structure(loc, data->is, !ctx ? sub[0] : nullptr, ismatis ? C : data->aux, data->levels));
2832       if (!data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Strc, data->levels[0]->ksp, nullptr, nullptr, nullptr));
2833       /* matrix pencil of the generalized eigenvalue problem on the overlap (GenEO) */
2834       if (!ctx) {
2835         if (data->deflation || overlap != -1) weighted = data->aux;
2836         else if (!data->B) {
2837           PetscBool cmp;
2838 
2839           PetscCall(MatDuplicate(sub[0], MAT_COPY_VALUES, &weighted));
2840           PetscCall(PetscObjectTypeCompareAny((PetscObject)weighted, &cmp, MATNORMAL, MATNORMALHERMITIAN, ""));
2841           if (cmp) flg = PETSC_FALSE;
2842           PetscCall(MatDiagonalScale(weighted, data->levels[0]->D, data->levels[0]->D));
2843           /* neither MatDuplicate() nor MatDiagonalScale() handles the symmetry options, so propagate the options explicitly */
2844           /* only useful for -mat_type baij -pc_hpddm_levels_1_st_pc_type cholesky (no problem with MATAIJ or MATSBAIJ)      */
2845           PetscCall(MatPropagateSymmetryOptions(sub[0], weighted));
2846           if (PetscDefined(USE_DEBUG) && PetscBool3ToBool(data->Neumann)) {
2847             Mat      *sub, A[3];
2848             PetscReal norm[2];
2849             PetscBool flg;
2850 
2851             PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg)); /* MatCreateSubMatrices() does not work with MATSBAIJ and unsorted ISes, so convert to MPIAIJ */
2852             if (flg) PetscCall(MatConvert(P, MATMPIAIJ, MAT_INITIAL_MATRIX, A));
2853             else {
2854               A[0] = P;
2855               PetscCall(PetscObjectReference((PetscObject)P));
2856             }
2857             PetscCall(MatCreateSubMatrices(A[0], 1, &data->is, &data->is, MAT_INITIAL_MATRIX, &sub));
2858             PetscCall(MatDiagonalScale(sub[0], data->levels[0]->D, data->levels[0]->D));
2859             PetscCall(MatConvert(sub[0], MATSEQAIJ, MAT_INITIAL_MATRIX, A + 1)); /* too many corner cases to handle (MATNORMAL, MATNORMALHERMITIAN, MATBAIJ with different block sizes...), so just MatConvert() to MATSEQAIJ since this is just for debugging */
2860             PetscCall(MatConvert(weighted, MATSEQAIJ, MAT_INITIAL_MATRIX, A + 2));
2861             PetscCall(MatAXPY(A[1], -1.0, A[2], UNKNOWN_NONZERO_PATTERN));
2862             PetscCall(MatNorm(A[1], NORM_FROBENIUS, norm));
2863             if (norm[0]) {
2864               PetscCall(MatNorm(A[2], NORM_FROBENIUS, norm + 1));
2865               PetscCheck(PetscAbsReal(norm[0] / norm[1]) < PetscSqrtReal(PETSC_SMALL), PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Auxiliary Mat is different from the (assembled) subdomain Mat for the interior unknowns, so it cannot be the Neumann matrix, remove -%spc_hpddm_has_neumann", pcpre ? pcpre : "");
2866             }
2867             PetscCall(MatDestroySubMatrices(1, &sub));
2868             for (PetscInt i = 0; i < 3; ++i) PetscCall(MatDestroy(A + i));
2869           }
2870         } else weighted = data->B;
2871       } else weighted = nullptr;
2872       /* SLEPc is used inside the loaded symbol */
2873       PetscCall((*loadedSym)(data->levels[0]->P, data->is, ismatis ? C : (algebraic && !block && overlap == -1 ? sub[0] : (!ctx ? data->aux : S)), weighted, data->B, initial, data->levels));
2874       if (!ctx && data->share && overlap == -1) {
2875         Mat st[2];
2876 
2877         PetscCall(KSPGetOperators(ksp[0], st, st + 1));
2878         PetscCall(MatCopy(subA[0], st[0], structure));
2879         if (subA[1] != subA[0] || st[1] != st[0]) PetscCall(MatCopy(subA[1], st[1], SAME_NONZERO_PATTERN));
2880         PetscCall(PetscObjectDereference((PetscObject)subA[0]));
2881       }
2882       if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_SetUp[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
2883       if (ismatis) PetscCall(MatISGetLocalMat(C, &N));
2884       else N = data->aux;
2885       if (!ctx) P = sub[0];
2886       else P = S;
2887       /* going through the grid hierarchy */
2888       for (n = 1; n < data->N; ++n) {
2889         if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_SetUp[n], data->levels[n]->ksp, nullptr, nullptr, nullptr));
2890         /* method composed in the loaded symbol since there, SLEPc is used as well */
2891         PetscTryMethod(data->levels[0]->ksp, "PCHPDDMSetUp_Private_C", (Mat *, Mat *, PetscInt, PetscInt *const, PC_HPDDM_Level **const), (&P, &N, n, &data->N, data->levels));
2892         if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_SetUp[n], data->levels[n]->ksp, nullptr, nullptr, nullptr));
2893       }
2894       /* reset to NULL to avoid any faulty use */
2895       PetscCall(PetscObjectComposeFunction((PetscObject)data->levels[0]->ksp, "PCHPDDMSetUp_Private_C", nullptr));
2896       if (!ismatis) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_C", nullptr));
2897       else PetscCall(PetscObjectDereference((PetscObject)C)); /* matching PetscObjectReference() above */
2898       for (n = 0; n < data->N - 1; ++n)
2899         if (data->levels[n]->P) {
2900           /* HPDDM internal work buffers */
2901           data->levels[n]->P->setBuffer();
2902           data->levels[n]->P->super::start();
2903         }
2904       if (ismatis || !subdomains) PetscCall(PCHPDDMDestroySubMatrices_Private(PetscBool3ToBool(data->Neumann), PetscBool(algebraic && !block && overlap == -1), sub));
2905       if (ismatis) data->is = nullptr;
2906       for (n = 0; n < data->N - 1 + (reused > 0); ++n) {
2907         if (data->levels[n]->P) {
2908           PC spc;
2909 
2910           /* force the PC to be PCSHELL to do the coarse grid corrections */
2911           PetscCall(KSPSetSkipPCSetFromOptions(data->levels[n]->ksp, PETSC_TRUE));
2912           PetscCall(KSPGetPC(data->levels[n]->ksp, &spc));
2913           PetscCall(PCSetType(spc, PCSHELL));
2914           PetscCall(PCShellSetContext(spc, data->levels[n]));
2915           PetscCall(PCShellSetSetUp(spc, PCSetUp_HPDDMShell));
2916           PetscCall(PCShellSetApply(spc, PCApply_HPDDMShell));
2917           PetscCall(PCShellSetMatApply(spc, PCMatApply_HPDDMShell));
2918           PetscCall(PCShellSetApplyTranspose(spc, PCApplyTranspose_HPDDMShell));
2919           PetscCall(PCShellSetMatApplyTranspose(spc, PCMatApplyTranspose_HPDDMShell));
2920           if (ctx && n == 0) {
2921             Mat                               Amat, Pmat;
2922             PetscInt                          m, M;
2923             std::tuple<Mat, PetscSF, Vec[2]> *ctx;
2924 
2925             PetscCall(KSPGetOperators(data->levels[n]->ksp, nullptr, &Pmat));
2926             PetscCall(MatGetLocalSize(Pmat, &m, nullptr));
2927             PetscCall(MatGetSize(Pmat, &M, nullptr));
2928             PetscCall(PetscNew(&ctx));
2929             std::get<0>(*ctx) = S;
2930             std::get<1>(*ctx) = data->levels[n]->scatter;
2931             PetscCall(MatCreateShell(PetscObjectComm((PetscObject)data->levels[n]->ksp), m, m, M, M, ctx, &Amat));
2932             PetscCall(MatShellSetOperation(Amat, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Schur<false>));
2933             PetscCall(MatShellSetOperation(Amat, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_Schur<true>));
2934             PetscCall(MatShellSetOperation(Amat, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Schur));
2935             PetscCall(MatCreateVecs(S, std::get<2>(*ctx), std::get<2>(*ctx) + 1));
2936             PetscCall(KSPSetOperators(data->levels[n]->ksp, Amat, Pmat));
2937             PetscCall(PetscObjectDereference((PetscObject)Amat));
2938           }
2939           PetscCall(PCShellSetDestroy(spc, PCDestroy_HPDDMShell));
2940           if (!data->levels[n]->pc) PetscCall(PCCreate(PetscObjectComm((PetscObject)data->levels[n]->ksp), &data->levels[n]->pc));
2941           if (n < reused) {
2942             PetscCall(PCSetReusePreconditioner(spc, PETSC_TRUE));
2943             PetscCall(PCSetReusePreconditioner(data->levels[n]->pc, PETSC_TRUE));
2944           }
2945           PetscCall(PCSetUp(spc));
2946         }
2947       }
2948       if (ctx) PetscCall(MatDestroy(&S));
2949       if (overlap == -1) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", nullptr));
2950     } else flg = reused ? PETSC_FALSE : PETSC_TRUE;
2951     if (!ismatis && subdomains) {
2952       if (flg) PetscCall(KSPGetPC(data->levels[0]->ksp, &inner));
2953       else inner = data->levels[0]->pc;
2954       if (inner) {
2955         if (!inner->setupcalled) PetscCall(PCSetType(inner, PCASM));
2956         PetscCall(PCSetFromOptions(inner));
2957         PetscCall(PetscStrcmp(((PetscObject)inner)->type_name, PCASM, &flg));
2958         if (flg) {
2959           if (!inner->setupcalled) { /* evaluates to PETSC_FALSE when -pc_hpddm_block_splitting */
2960             IS sorted;               /* PCASM will sort the input IS, duplicate it to return an unmodified (PCHPDDM) input IS */
2961 
2962             PetscCall(ISDuplicate(is[0], &sorted));
2963             PetscCall(PCASMSetLocalSubdomains(inner, 1, &sorted, &loc));
2964             PetscCall(PetscObjectDereference((PetscObject)sorted));
2965           }
2966           if (!PetscBool3ToBool(data->Neumann) && data->N > 1) { /* subdomain matrices are already created for the eigenproblem, reuse them for the fine-level PC */
2967             PetscCall(PCHPDDMPermute_Private(*is, nullptr, nullptr, sub[0], &P, nullptr));
2968             PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(inner, P, algebraic));
2969             PetscCall(PetscObjectDereference((PetscObject)P));
2970           }
2971         }
2972       }
2973       if (data->N > 1) {
2974         if (overlap != 1) PetscCall(PCHPDDMDestroySubMatrices_Private(PetscBool3ToBool(data->Neumann), PetscBool(algebraic && !block && overlap == -1), sub));
2975         if (overlap == 1) PetscCall(MatDestroy(subA));
2976       }
2977     }
2978     PetscCall(ISDestroy(&loc));
2979   } else data->N = 1 + reused; /* enforce this value to 1 + reused if there is no way to build another level */
2980   if (requested != data->N + reused) {
2981     PetscCall(PetscInfo(pc, "%" PetscInt_FMT " levels requested, only %" PetscInt_FMT " built + %" PetscInt_FMT " reused. Options for level(s) > %" PetscInt_FMT ", including -%spc_hpddm_coarse_ will not be taken into account\n", requested, data->N, reused,
2982                         data->N, pcpre ? pcpre : ""));
2983     PetscCall(PetscInfo(pc, "It is best to tune parameters, e.g., a higher value for -%spc_hpddm_levels_%" PetscInt_FMT "_eps_threshold_absolute or a lower value for -%spc_hpddm_levels_%" PetscInt_FMT "_svd_threshold_relative, so that at least one local deflation vector will be selected\n", pcpre ? pcpre : "",
2984                         data->N, pcpre ? pcpre : "", data->N));
2985     /* cannot use PCDestroy_HPDDMShell() because PCSHELL not set for unassembled levels */
2986     for (n = data->N - 1; n < requested - 1; ++n) {
2987       if (data->levels[n]->P) {
2988         PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(data->levels[n], PETSC_TRUE));
2989         PetscCall(VecDestroyVecs(1, &data->levels[n]->v[0]));
2990         PetscCall(VecDestroyVecs(2, &data->levels[n]->v[1]));
2991         PetscCall(MatDestroy(data->levels[n]->V));
2992         PetscCall(MatDestroy(data->levels[n]->V + 1));
2993         PetscCall(MatDestroy(data->levels[n]->V + 2));
2994         PetscCall(VecDestroy(&data->levels[n]->D));
2995         PetscCall(PetscSFDestroy(&data->levels[n]->scatter));
2996       }
2997     }
2998     if (reused) {
2999       for (n = reused; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
3000         PetscCall(KSPDestroy(&data->levels[n]->ksp));
3001         PetscCall(PCDestroy(&data->levels[n]->pc));
3002       }
3003     }
3004     PetscCheck(!PetscDefined(USE_DEBUG), PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " levels requested, only %" PetscInt_FMT " built + %" PetscInt_FMT " reused. Options for level(s) > %" PetscInt_FMT ", including -%spc_hpddm_coarse_ will not be taken into account. It is best to tune parameters, e.g., a higher value for -%spc_hpddm_levels_%" PetscInt_FMT "_eps_threshold or a lower value for -%spc_hpddm_levels_%" PetscInt_FMT "_svd_threshold_relative, so that at least one local deflation vector will be selected. If you don't want this to error out, compile --with-debugging=0", requested,
3005                data->N, reused, data->N, pcpre ? pcpre : "", pcpre ? pcpre : "", data->N, pcpre ? pcpre : "", data->N);
3006   }
3007   /* these solvers are created after PCSetFromOptions() is called */
3008   if (pc->setfromoptionscalled) {
3009     for (n = 0; n < data->N; ++n) {
3010       if (data->levels[n]->ksp) PetscCall(KSPSetFromOptions(data->levels[n]->ksp));
3011       if (data->levels[n]->pc) PetscCall(PCSetFromOptions(data->levels[n]->pc));
3012     }
3013     pc->setfromoptionscalled = 0;
3014   }
3015   data->N += reused;
3016   if (data->share && swap) {
3017     /* swap back pointers so that variables follow the user-provided numbering */
3018     std::swap(C, data->aux);
3019     std::swap(uis, data->is);
3020     PetscCall(MatDestroy(&C));
3021     PetscCall(ISDestroy(&uis));
3022   }
3023   if (algebraic) PetscCall(MatDestroy(&data->aux));
3024   if (unsorted && unsorted != is[0]) {
3025     PetscCall(ISCopy(unsorted, data->is));
3026     PetscCall(ISDestroy(&unsorted));
3027   }
3028 #if PetscDefined(USE_DEBUG)
3029   PetscCheck((data->is && dis) || (!data->is && !dis), PETSC_COMM_SELF, PETSC_ERR_PLIB, "An IS pointer is NULL but not the other: input IS pointer (%p) v. output IS pointer (%p)", (void *)dis, (void *)data->is);
3030   if (data->is) {
3031     PetscCall(ISEqualUnsorted(data->is, dis, &flg));
3032     PetscCall(ISDestroy(&dis));
3033     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Input IS and output IS are not equal");
3034   }
3035   PetscCheck((data->aux && daux) || (!data->aux && !daux), PETSC_COMM_SELF, PETSC_ERR_PLIB, "A Mat pointer is NULL but not the other: input Mat pointer (%p) v. output Mat pointer (%p)", (void *)daux, (void *)data->aux);
3036   if (data->aux) {
3037     PetscCall(MatMultEqual(data->aux, daux, 20, &flg));
3038     PetscCall(MatDestroy(&daux));
3039     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Input Mat and output Mat are not equal");
3040   }
3041 #endif
3042   PetscFunctionReturn(PETSC_SUCCESS);
3043 }
3044 
3045 /*@
3046   PCHPDDMSetCoarseCorrectionType - Sets the coarse correction type.
3047 
3048   Collective
3049 
3050   Input Parameters:
3051 + pc   - preconditioner context
3052 - type - `PC_HPDDM_COARSE_CORRECTION_DEFLATED`, `PC_HPDDM_COARSE_CORRECTION_ADDITIVE`, `PC_HPDDM_COARSE_CORRECTION_BALANCED`, or `PC_HPDDM_COARSE_CORRECTION_NONE`
3053 
3054   Options Database Key:
3055 . -pc_hpddm_coarse_correction <deflated, additive, balanced, none> - type of coarse correction to apply
3056 
3057   Level: intermediate
3058 
3059 .seealso: [](ch_ksp), `PCHPDDMGetCoarseCorrectionType()`, `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
3060 @*/
3061 PetscErrorCode PCHPDDMSetCoarseCorrectionType(PC pc, PCHPDDMCoarseCorrectionType type)
3062 {
3063   PetscFunctionBegin;
3064   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3065   PetscValidLogicalCollectiveEnum(pc, type, 2);
3066   PetscTryMethod(pc, "PCHPDDMSetCoarseCorrectionType_C", (PC, PCHPDDMCoarseCorrectionType), (pc, type));
3067   PetscFunctionReturn(PETSC_SUCCESS);
3068 }
3069 
3070 /*@
3071   PCHPDDMGetCoarseCorrectionType - Gets the coarse correction type.
3072 
3073   Input Parameter:
3074 . pc - preconditioner context
3075 
3076   Output Parameter:
3077 . type - `PC_HPDDM_COARSE_CORRECTION_DEFLATED`, `PC_HPDDM_COARSE_CORRECTION_ADDITIVE`, `PC_HPDDM_COARSE_CORRECTION_BALANCED`, or `PC_HPDDM_COARSE_CORRECTION_NONE`
3078 
3079   Level: intermediate
3080 
3081 .seealso: [](ch_ksp), `PCHPDDMSetCoarseCorrectionType()`, `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
3082 @*/
3083 PetscErrorCode PCHPDDMGetCoarseCorrectionType(PC pc, PCHPDDMCoarseCorrectionType *type)
3084 {
3085   PetscFunctionBegin;
3086   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3087   if (type) {
3088     PetscAssertPointer(type, 2);
3089     PetscUseMethod(pc, "PCHPDDMGetCoarseCorrectionType_C", (PC, PCHPDDMCoarseCorrectionType *), (pc, type));
3090   }
3091   PetscFunctionReturn(PETSC_SUCCESS);
3092 }
3093 
3094 static PetscErrorCode PCHPDDMSetCoarseCorrectionType_HPDDM(PC pc, PCHPDDMCoarseCorrectionType type)
3095 {
3096   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3097 
3098   PetscFunctionBegin;
3099   data->correction = type;
3100   PetscFunctionReturn(PETSC_SUCCESS);
3101 }
3102 
3103 static PetscErrorCode PCHPDDMGetCoarseCorrectionType_HPDDM(PC pc, PCHPDDMCoarseCorrectionType *type)
3104 {
3105   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3106 
3107   PetscFunctionBegin;
3108   *type = data->correction;
3109   PetscFunctionReturn(PETSC_SUCCESS);
3110 }
3111 
3112 /*@
3113   PCHPDDMSetSTShareSubKSP - Sets whether the `KSP` in SLEPc `ST` and the fine-level subdomain solver should be shared.
3114 
3115   Input Parameters:
3116 + pc    - preconditioner context
3117 - share - whether the `KSP` should be shared or not
3118 
3119   Note:
3120   This is not the same as `PCSetReusePreconditioner()`. Given certain conditions (visible using -info), a symbolic factorization can be skipped
3121   when using a subdomain `PCType` such as `PCLU` or `PCCHOLESKY`.
3122 
3123   Level: advanced
3124 
3125 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMGetSTShareSubKSP()`
3126 @*/
3127 PetscErrorCode PCHPDDMSetSTShareSubKSP(PC pc, PetscBool share)
3128 {
3129   PetscFunctionBegin;
3130   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3131   PetscTryMethod(pc, "PCHPDDMSetSTShareSubKSP_C", (PC, PetscBool), (pc, share));
3132   PetscFunctionReturn(PETSC_SUCCESS);
3133 }
3134 
3135 /*@
3136   PCHPDDMGetSTShareSubKSP - Gets whether the `KSP` in SLEPc `ST` and the fine-level subdomain solver is shared.
3137 
3138   Input Parameter:
3139 . pc - preconditioner context
3140 
3141   Output Parameter:
3142 . share - whether the `KSP` is shared or not
3143 
3144   Note:
3145   This is not the same as `PCGetReusePreconditioner()`. The return value is unlikely to be true, but when it is, a symbolic factorization can be skipped
3146   when using a subdomain `PCType` such as `PCLU` or `PCCHOLESKY`.
3147 
3148   Level: advanced
3149 
3150 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMSetSTShareSubKSP()`
3151 @*/
3152 PetscErrorCode PCHPDDMGetSTShareSubKSP(PC pc, PetscBool *share)
3153 {
3154   PetscFunctionBegin;
3155   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3156   if (share) {
3157     PetscAssertPointer(share, 2);
3158     PetscUseMethod(pc, "PCHPDDMGetSTShareSubKSP_C", (PC, PetscBool *), (pc, share));
3159   }
3160   PetscFunctionReturn(PETSC_SUCCESS);
3161 }
3162 
3163 static PetscErrorCode PCHPDDMSetSTShareSubKSP_HPDDM(PC pc, PetscBool share)
3164 {
3165   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3166 
3167   PetscFunctionBegin;
3168   data->share = share;
3169   PetscFunctionReturn(PETSC_SUCCESS);
3170 }
3171 
3172 static PetscErrorCode PCHPDDMGetSTShareSubKSP_HPDDM(PC pc, PetscBool *share)
3173 {
3174   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3175 
3176   PetscFunctionBegin;
3177   *share = data->share;
3178   PetscFunctionReturn(PETSC_SUCCESS);
3179 }
3180 
3181 /*@
3182   PCHPDDMSetDeflationMat - Sets the deflation space used to assemble a coarser operator.
3183 
3184   Input Parameters:
3185 + pc - preconditioner context
3186 . is - index set of the local deflation matrix
3187 - U  - deflation sequential matrix stored as a `MATSEQDENSE`
3188 
3189   Level: advanced
3190 
3191 .seealso: [](ch_ksp), `PCHPDDM`, `PCDeflationSetSpace()`, `PCMGSetRestriction()`
3192 @*/
3193 PetscErrorCode PCHPDDMSetDeflationMat(PC pc, IS is, Mat U)
3194 {
3195   PetscFunctionBegin;
3196   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3197   PetscValidHeaderSpecific(is, IS_CLASSID, 2);
3198   PetscValidHeaderSpecific(U, MAT_CLASSID, 3);
3199   PetscTryMethod(pc, "PCHPDDMSetDeflationMat_C", (PC, IS, Mat), (pc, is, U));
3200   PetscFunctionReturn(PETSC_SUCCESS);
3201 }
3202 
3203 static PetscErrorCode PCHPDDMSetDeflationMat_HPDDM(PC pc, IS is, Mat U)
3204 {
3205   PetscFunctionBegin;
3206   PetscCall(PCHPDDMSetAuxiliaryMat_Private(pc, is, U, PETSC_TRUE));
3207   PetscFunctionReturn(PETSC_SUCCESS);
3208 }
3209 
3210 PetscErrorCode HPDDMLoadDL_Private(PetscBool *found)
3211 {
3212   PetscBool flg;
3213   char      lib[PETSC_MAX_PATH_LEN], dlib[PETSC_MAX_PATH_LEN], dir[PETSC_MAX_PATH_LEN];
3214 
3215   PetscFunctionBegin;
3216   PetscAssertPointer(found, 1);
3217   PetscCall(PetscStrncpy(dir, "${PETSC_LIB_DIR}", sizeof(dir)));
3218   PetscCall(PetscOptionsGetString(nullptr, nullptr, "-hpddm_dir", dir, sizeof(dir), nullptr));
3219   PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/libhpddm_petsc", dir));
3220   PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3221 #if defined(SLEPC_LIB_DIR) /* this variable is passed during SLEPc ./configure when PETSc has not been configured   */
3222   if (!*found) {           /* with --download-hpddm since slepcconf.h is not yet built (and thus can't be included) */
3223     PetscCall(PetscStrncpy(dir, HPDDM_STR(SLEPC_LIB_DIR), sizeof(dir)));
3224     PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/libhpddm_petsc", dir));
3225     PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3226   }
3227 #endif
3228   if (!*found) { /* probable options for this to evaluate to PETSC_TRUE: system inconsistency (libhpddm_petsc moved by user?) or PETSc configured without --download-slepc */
3229     PetscCall(PetscOptionsGetenv(PETSC_COMM_SELF, "SLEPC_DIR", dir, sizeof(dir), &flg));
3230     if (flg) { /* if both PETSc and SLEPc are configured with --download-hpddm but PETSc has been configured without --download-slepc, one must ensure that libslepc is loaded before libhpddm_petsc */
3231       PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/lib/libslepc", dir));
3232       PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3233       PetscCheck(*found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s not found but SLEPC_DIR=%s", lib, dir);
3234       PetscCall(PetscDLLibraryAppend(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, dlib));
3235       PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/lib/libhpddm_petsc", dir)); /* libhpddm_petsc is always in the same directory as libslepc */
3236       PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3237     }
3238   }
3239   PetscCheck(*found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s not found", lib);
3240   PetscCall(PetscDLLibraryAppend(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, dlib));
3241   PetscFunctionReturn(PETSC_SUCCESS);
3242 }
3243 
3244 /*MC
3245    PCHPDDM - Interface with the HPDDM library.
3246 
3247    This `PC` may be used to build multilevel spectral domain decomposition methods based on the GenEO framework {cite}`spillane2011robust` {cite}`al2021multilevel`.
3248    It may be viewed as an alternative to spectral
3249    AMGe or `PCBDDC` with adaptive selection of constraints. The interface is explained in details in {cite}`jolivetromanzampini2020`
3250 
3251    The matrix used for building the preconditioner (Pmat) may be unassembled (`MATIS`), assembled (`MATAIJ`, `MATBAIJ`, or `MATSBAIJ`), hierarchical (`MATHTOOL`), `MATNORMAL`, `MATNORMALHERMITIAN`, or `MATSCHURCOMPLEMENT` (when `PCHPDDM` is used as part of an outer `PCFIELDSPLIT`).
3252 
3253    For multilevel preconditioning, when using an assembled or hierarchical Pmat, one must provide an auxiliary local `Mat` (unassembled local operator for GenEO) using
3254    `PCHPDDMSetAuxiliaryMat()`. Calling this routine is not needed when using a `MATIS` Pmat, assembly is done internally using `MatConvert()`.
3255 
3256    Options Database Keys:
3257 +   -pc_hpddm_define_subdomains <true, default=false>    - on the finest level, calls `PCASMSetLocalSubdomains()` with the `IS` supplied in `PCHPDDMSetAuxiliaryMat()`
3258                                                          (not relevant with an unassembled Pmat)
3259 .   -pc_hpddm_has_neumann <true, default=false>          - on the finest level, informs the `PC` that the local Neumann matrix is supplied in `PCHPDDMSetAuxiliaryMat()`
3260 -   -pc_hpddm_coarse_correction <type, default=deflated> - determines the `PCHPDDMCoarseCorrectionType` when calling `PCApply()`
3261 
3262    Options for subdomain solvers, subdomain eigensolvers (for computing deflation vectors), and the coarse solver can be set using the following options database prefixes.
3263 .vb
3264       -pc_hpddm_levels_%d_pc_
3265       -pc_hpddm_levels_%d_ksp_
3266       -pc_hpddm_levels_%d_eps_
3267       -pc_hpddm_levels_%d_p
3268       -pc_hpddm_levels_%d_mat_type
3269       -pc_hpddm_coarse_
3270       -pc_hpddm_coarse_p
3271       -pc_hpddm_coarse_mat_type
3272       -pc_hpddm_coarse_mat_filter
3273 .ve
3274 
3275    E.g., -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 10 -pc_hpddm_levels_2_p 4 -pc_hpddm_levels_2_sub_pc_type lu -pc_hpddm_levels_2_eps_nev 10
3276     -pc_hpddm_coarse_p 2 -pc_hpddm_coarse_mat_type baij will use 10 deflation vectors per subdomain on the fine "level 1",
3277     aggregate the fine subdomains into 4 "level 2" subdomains, then use 10 deflation vectors per subdomain on "level 2",
3278     and assemble the coarse matrix (of dimension 4 x 10 = 40) on two processes as a `MATBAIJ` (default is `MATSBAIJ`).
3279 
3280    In order to activate a "level N+1" coarse correction, it is mandatory to call -pc_hpddm_levels_N_eps_nev <nu> or -pc_hpddm_levels_N_eps_threshold_absolute <val>. The default -pc_hpddm_coarse_p value is 1, meaning that the coarse operator is aggregated on a single process.
3281 
3282    Level: intermediate
3283 
3284    Notes:
3285    This preconditioner requires that PETSc is built with SLEPc (``--download-slepc``).
3286 
3287    By default, the underlying concurrent eigenproblems
3288    are solved using SLEPc shift-and-invert spectral transformation. This is usually what gives the best performance for GenEO, cf.
3289    {cite}`spillane2011robust` {cite}`jolivet2013scalabledd`. As
3290    stated above, SLEPc options are available through -pc_hpddm_levels_%d_, e.g., -pc_hpddm_levels_1_eps_type arpack -pc_hpddm_levels_1_eps_nev 10
3291    -pc_hpddm_levels_1_st_type sinvert. There are furthermore three options related to the (subdomain-wise local) eigensolver that are not described in
3292    SLEPc documentation since they are specific to `PCHPDDM`.
3293 .vb
3294       -pc_hpddm_levels_1_st_share_sub_ksp
3295       -pc_hpddm_levels_%d_eps_threshold_absolute
3296       -pc_hpddm_levels_1_eps_use_inertia
3297 .ve
3298 
3299    The first option from the list only applies to the fine-level eigensolver, see `PCHPDDMSetSTShareSubKSP()`. The second option from the list is
3300    used to filter eigenmodes retrieved after convergence of `EPSSolve()` at "level N" such that eigenvectors used to define a "level N+1" coarse
3301    correction are associated to eigenvalues whose magnitude are lower or equal than -pc_hpddm_levels_N_eps_threshold_absolute. When using an `EPS` which cannot
3302    determine a priori the proper -pc_hpddm_levels_N_eps_nev such that all wanted eigenmodes are retrieved, it is possible to get an estimation of the
3303    correct value using the third option from the list, -pc_hpddm_levels_1_eps_use_inertia, see `MatGetInertia()`. In that case, there is no need
3304    to supply -pc_hpddm_levels_1_eps_nev. This last option also only applies to the fine-level (N = 1) eigensolver.
3305 
3306    See also {cite}`dolean2015introduction`, {cite}`al2022robust`, {cite}`al2022robustpd`, and {cite}`nataf2022recent`
3307 
3308 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCHPDDMSetAuxiliaryMat()`, `MATIS`, `PCBDDC`, `PCDEFLATION`, `PCTELESCOPE`, `PCASM`,
3309           `PCHPDDMSetCoarseCorrectionType()`, `PCHPDDMHasNeumannMat()`, `PCHPDDMSetRHSMat()`, `PCHPDDMSetDeflationMat()`, `PCHPDDMSetSTShareSubKSP()`,
3310           `PCHPDDMGetSTShareSubKSP()`, `PCHPDDMGetCoarseCorrectionType()`, `PCHPDDMGetComplexities()`
3311 M*/
3312 PETSC_EXTERN PetscErrorCode PCCreate_HPDDM(PC pc)
3313 {
3314   PC_HPDDM *data;
3315   PetscBool found;
3316 
3317   PetscFunctionBegin;
3318   if (!loadedSym) {
3319     PetscCall(HPDDMLoadDL_Private(&found));
3320     if (found) PetscCall(PetscDLLibrarySym(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, nullptr, "PCHPDDM_Internal", (void **)&loadedSym));
3321   }
3322   PetscCheck(loadedSym, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCHPDDM_Internal symbol not found in loaded libhpddm_petsc");
3323   PetscCall(PetscNew(&data));
3324   pc->data                   = data;
3325   data->Neumann              = PETSC_BOOL3_UNKNOWN;
3326   pc->ops->reset             = PCReset_HPDDM;
3327   pc->ops->destroy           = PCDestroy_HPDDM;
3328   pc->ops->setfromoptions    = PCSetFromOptions_HPDDM;
3329   pc->ops->setup             = PCSetUp_HPDDM;
3330   pc->ops->apply             = PCApply_HPDDM<false>;
3331   pc->ops->matapply          = PCMatApply_HPDDM<false>;
3332   pc->ops->applytranspose    = PCApply_HPDDM<true>;
3333   pc->ops->matapplytranspose = PCMatApply_HPDDM<true>;
3334   pc->ops->view              = PCView_HPDDM;
3335   pc->ops->presolve          = PCPreSolve_HPDDM;
3336 
3337   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetAuxiliaryMat_C", PCHPDDMSetAuxiliaryMat_HPDDM));
3338   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMHasNeumannMat_C", PCHPDDMHasNeumannMat_HPDDM));
3339   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetRHSMat_C", PCHPDDMSetRHSMat_HPDDM));
3340   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetCoarseCorrectionType_C", PCHPDDMSetCoarseCorrectionType_HPDDM));
3341   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetCoarseCorrectionType_C", PCHPDDMGetCoarseCorrectionType_HPDDM));
3342   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetSTShareSubKSP_C", PCHPDDMSetSTShareSubKSP_HPDDM));
3343   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetSTShareSubKSP_C", PCHPDDMGetSTShareSubKSP_HPDDM));
3344   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetDeflationMat_C", PCHPDDMSetDeflationMat_HPDDM));
3345   PetscFunctionReturn(PETSC_SUCCESS);
3346 }
3347 
3348 /*@C
3349   PCHPDDMInitializePackage - This function initializes everything in the `PCHPDDM` package. It is called from `PCInitializePackage()`.
3350 
3351   Level: developer
3352 
3353 .seealso: [](ch_ksp), `PetscInitialize()`
3354 @*/
3355 PetscErrorCode PCHPDDMInitializePackage(void)
3356 {
3357   char ename[32];
3358 
3359   PetscFunctionBegin;
3360   if (PCHPDDMPackageInitialized) PetscFunctionReturn(PETSC_SUCCESS);
3361   PCHPDDMPackageInitialized = PETSC_TRUE;
3362   PetscCall(PetscRegisterFinalize(PCHPDDMFinalizePackage));
3363   /* general events registered once during package initialization */
3364   /* some of these events are not triggered in libpetsc,          */
3365   /* but rather directly in libhpddm_petsc,                       */
3366   /* which is in charge of performing the following operations    */
3367 
3368   /* domain decomposition structure from Pmat sparsity pattern    */
3369   PetscCall(PetscLogEventRegister("PCHPDDMStrc", PC_CLASSID, &PC_HPDDM_Strc));
3370   /* Galerkin product, redistribution, and setup (not triggered in libpetsc)                */
3371   PetscCall(PetscLogEventRegister("PCHPDDMPtAP", PC_CLASSID, &PC_HPDDM_PtAP));
3372   /* Galerkin product with summation, redistribution, and setup (not triggered in libpetsc) */
3373   PetscCall(PetscLogEventRegister("PCHPDDMPtBP", PC_CLASSID, &PC_HPDDM_PtBP));
3374   /* next level construction using PtAP and PtBP (not triggered in libpetsc)                */
3375   PetscCall(PetscLogEventRegister("PCHPDDMNext", PC_CLASSID, &PC_HPDDM_Next));
3376   static_assert(PETSC_PCHPDDM_MAXLEVELS <= 9, "PETSC_PCHPDDM_MAXLEVELS value is too high");
3377   for (PetscInt i = 1; i < PETSC_PCHPDDM_MAXLEVELS; ++i) {
3378     PetscCall(PetscSNPrintf(ename, sizeof(ename), "PCHPDDMSetUp L%1" PetscInt_FMT, i));
3379     /* events during a PCSetUp() at level #i _except_ the assembly */
3380     /* of the Galerkin operator of the coarser level #(i + 1)      */
3381     PetscCall(PetscLogEventRegister(ename, PC_CLASSID, &PC_HPDDM_SetUp[i - 1]));
3382     PetscCall(PetscSNPrintf(ename, sizeof(ename), "PCHPDDMSolve L%1" PetscInt_FMT, i));
3383     /* events during a PCApply() at level #i _except_              */
3384     /* the KSPSolve() of the coarser level #(i + 1)                */
3385     PetscCall(PetscLogEventRegister(ename, PC_CLASSID, &PC_HPDDM_Solve[i - 1]));
3386   }
3387   PetscFunctionReturn(PETSC_SUCCESS);
3388 }
3389 
3390 /*@C
3391   PCHPDDMFinalizePackage - This function frees everything from the `PCHPDDM` package. It is called from `PetscFinalize()`.
3392 
3393   Level: developer
3394 
3395 .seealso: [](ch_ksp), `PetscFinalize()`
3396 @*/
3397 PetscErrorCode PCHPDDMFinalizePackage(void)
3398 {
3399   PetscFunctionBegin;
3400   PCHPDDMPackageInitialized = PETSC_FALSE;
3401   PetscFunctionReturn(PETSC_SUCCESS);
3402 }
3403 
3404 static PetscErrorCode MatMult_Harmonic(Mat A, Vec x, Vec y)
3405 {
3406   Harmonic h; /* [ A_00  A_01       ], furthermore, A_00 = [ A_loc,loc  A_loc,ovl ], thus, A_01 = [         ] */
3407               /* [ A_10  A_11  A_12 ]                      [ A_ovl,loc  A_ovl,ovl ]               [ A_ovl,1 ] */
3408   Vec sub;    /*  y = A x = R_loc R_0 [ A_00  A_01 ]^-1                                   R_loc = [  I_loc  ] */
3409               /*                      [ A_10  A_11 ]    R_1^T A_12 x                              [         ] */
3410   PetscFunctionBegin;
3411   PetscCall(MatShellGetContext(A, &h));
3412   PetscCall(VecSet(h->v, 0.0));
3413   PetscCall(VecGetSubVector(h->v, h->is[0], &sub));
3414   PetscCall(MatMult(h->A[0], x, sub));
3415   PetscCall(VecRestoreSubVector(h->v, h->is[0], &sub));
3416   PetscCall(KSPSolve(h->ksp, h->v, h->v));
3417   PetscCall(VecISCopy(h->v, h->is[1], SCATTER_REVERSE, y));
3418   PetscFunctionReturn(PETSC_SUCCESS);
3419 }
3420 
3421 static PetscErrorCode MatMultTranspose_Harmonic(Mat A, Vec y, Vec x)
3422 {
3423   Harmonic h;   /* x = A^T y =            [ A_00  A_01 ]^-T R_0^T R_loc^T y */
3424   Vec      sub; /*             A_12^T R_1 [ A_10  A_11 ]                    */
3425 
3426   PetscFunctionBegin;
3427   PetscCall(MatShellGetContext(A, &h));
3428   PetscCall(VecSet(h->v, 0.0));
3429   PetscCall(VecISCopy(h->v, h->is[1], SCATTER_FORWARD, y));
3430   PetscCall(KSPSolveTranspose(h->ksp, h->v, h->v));
3431   PetscCall(VecGetSubVector(h->v, h->is[0], &sub));
3432   PetscCall(MatMultTranspose(h->A[0], sub, x));
3433   PetscCall(VecRestoreSubVector(h->v, h->is[0], &sub));
3434   PetscFunctionReturn(PETSC_SUCCESS);
3435 }
3436 
3437 static PetscErrorCode MatProduct_AB_Harmonic(Mat S, Mat X, Mat Y, void *)
3438 {
3439   Harmonic h;
3440   Mat      A, B;
3441   Vec      a, b;
3442 
3443   PetscFunctionBegin;
3444   PetscCall(MatShellGetContext(S, &h));
3445   PetscCall(MatMatMult(h->A[0], X, MAT_INITIAL_MATRIX, PETSC_CURRENT, &A));
3446   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, A->cmap->n, nullptr, &B));
3447   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3448     PetscCall(MatDenseGetColumnVecRead(A, i, &a));
3449     PetscCall(MatDenseGetColumnVecWrite(B, i, &b));
3450     PetscCall(VecISCopy(b, h->is[0], SCATTER_FORWARD, a));
3451     PetscCall(MatDenseRestoreColumnVecWrite(B, i, &b));
3452     PetscCall(MatDenseRestoreColumnVecRead(A, i, &a));
3453   }
3454   PetscCall(MatDestroy(&A));
3455   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, B->cmap->n, nullptr, &A));
3456   PetscCall(KSPMatSolve(h->ksp, B, A));
3457   PetscCall(MatDestroy(&B));
3458   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3459     PetscCall(MatDenseGetColumnVecRead(A, i, &a));
3460     PetscCall(MatDenseGetColumnVecWrite(Y, i, &b));
3461     PetscCall(VecISCopy(a, h->is[1], SCATTER_REVERSE, b));
3462     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &b));
3463     PetscCall(MatDenseRestoreColumnVecRead(A, i, &a));
3464   }
3465   PetscCall(MatDestroy(&A));
3466   PetscFunctionReturn(PETSC_SUCCESS);
3467 }
3468 
3469 static PetscErrorCode MatProduct_AtB_Harmonic(Mat S, Mat Y, Mat X, void *)
3470 {
3471   Harmonic h;
3472   Mat      A, B;
3473   Vec      a, b;
3474 
3475   PetscFunctionBegin;
3476   PetscCall(MatShellGetContext(S, &h));
3477   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, Y->cmap->n, nullptr, &A));
3478   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3479     PetscCall(MatDenseGetColumnVecRead(Y, i, &b));
3480     PetscCall(MatDenseGetColumnVecWrite(A, i, &a));
3481     PetscCall(VecISCopy(a, h->is[1], SCATTER_FORWARD, b));
3482     PetscCall(MatDenseRestoreColumnVecWrite(A, i, &a));
3483     PetscCall(MatDenseRestoreColumnVecRead(Y, i, &b));
3484   }
3485   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, A->cmap->n, nullptr, &B));
3486   PetscCall(KSPMatSolveTranspose(h->ksp, A, B));
3487   PetscCall(MatDestroy(&A));
3488   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->A[0]->rmap->n, B->cmap->n, nullptr, &A));
3489   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3490     PetscCall(MatDenseGetColumnVecRead(B, i, &b));
3491     PetscCall(MatDenseGetColumnVecWrite(A, i, &a));
3492     PetscCall(VecISCopy(b, h->is[0], SCATTER_REVERSE, a));
3493     PetscCall(MatDenseRestoreColumnVecWrite(A, i, &a));
3494     PetscCall(MatDenseRestoreColumnVecRead(B, i, &b));
3495   }
3496   PetscCall(MatDestroy(&B));
3497   PetscCall(MatTransposeMatMult(h->A[0], A, MAT_REUSE_MATRIX, PETSC_CURRENT, &X));
3498   PetscCall(MatDestroy(&A));
3499   PetscFunctionReturn(PETSC_SUCCESS);
3500 }
3501 
3502 static PetscErrorCode MatDestroy_Harmonic(Mat A)
3503 {
3504   Harmonic h;
3505 
3506   PetscFunctionBegin;
3507   PetscCall(MatShellGetContext(A, &h));
3508   for (PetscInt i = 0; i < 5; ++i) PetscCall(ISDestroy(h->is + i));
3509   PetscCall(PetscFree(h->is));
3510   PetscCall(VecDestroy(&h->v));
3511   for (PetscInt i = 0; i < 2; ++i) PetscCall(MatDestroy(h->A + i));
3512   PetscCall(PetscFree(h->A));
3513   PetscCall(KSPDestroy(&h->ksp));
3514   PetscCall(PetscFree(h));
3515   PetscFunctionReturn(PETSC_SUCCESS);
3516 }
3517