xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 47c5ace7c527cf22e00c96fd8a252003364de5ff)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (preallocation can be improved) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1334   PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1335   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1336   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1337   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1338   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1339 
1340   /* Defaults to identity */
1341   for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1342 
1343   /* Create discrete gradient for the coarser level if needed */
1344   PetscCall(MatDestroy(&pcbddc->nedcG));
1345   PetscCall(ISDestroy(&pcbddc->nedclocal));
1346   if (pcbddc->current_level < pcbddc->max_levels) {
1347     ISLocalToGlobalMapping cel2g, cvl2g;
1348     IS                     wis, gwis;
1349     PetscInt               cnv, cne;
1350 
1351     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1352     if (fl2g) {
1353       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1354     } else {
1355       PetscCall(PetscObjectReference((PetscObject)wis));
1356       pcbddc->nedclocal = wis;
1357     }
1358     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1359     PetscCall(ISDestroy(&wis));
1360     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1361     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1362     PetscCall(ISDestroy(&wis));
1363     PetscCall(ISDestroy(&gwis));
1364 
1365     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1366     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1367     PetscCall(ISDestroy(&wis));
1368     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1369     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1370     PetscCall(ISDestroy(&wis));
1371     PetscCall(ISDestroy(&gwis));
1372 
1373     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1374     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1375     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1376     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1377     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1378     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1379     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1380     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1381   }
1382   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1383 
1384 #if defined(PRINT_GDET)
1385   inc = 0;
1386   lev = pcbddc->current_level;
1387 #endif
1388 
1389   /* Insert values in the change of basis matrix */
1390   for (i = 0; i < nee; i++) {
1391     Mat         Gins = NULL, GKins = NULL;
1392     IS          cornersis = NULL;
1393     PetscScalar cvals[2];
1394 
1395     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1396     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1397     if (Gins && GKins) {
1398       const PetscScalar *data;
1399       const PetscInt    *rows, *cols;
1400       PetscInt           nrh, nch, nrc, ncc;
1401 
1402       PetscCall(ISGetIndices(eedges[i], &cols));
1403       /* H1 */
1404       PetscCall(ISGetIndices(extrows[i], &rows));
1405       PetscCall(MatGetSize(Gins, &nrh, &nch));
1406       PetscCall(MatDenseGetArrayRead(Gins, &data));
1407       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1408       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1409       PetscCall(ISRestoreIndices(extrows[i], &rows));
1410       /* complement */
1411       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1412       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1413       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1414       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1415       PetscCall(MatDenseGetArrayRead(GKins, &data));
1416       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1417       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1418 
1419       /* coarse discrete gradient */
1420       if (pcbddc->nedcG) {
1421         PetscInt cols[2];
1422 
1423         cols[0] = 2 * i;
1424         cols[1] = 2 * i + 1;
1425         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1426       }
1427       PetscCall(ISRestoreIndices(eedges[i], &cols));
1428     }
1429     PetscCall(ISDestroy(&extrows[i]));
1430     PetscCall(ISDestroy(&extcols[i]));
1431     PetscCall(ISDestroy(&cornersis));
1432     PetscCall(MatDestroy(&Gins));
1433     PetscCall(MatDestroy(&GKins));
1434   }
1435 
1436   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1437   if (elements_corners && pcbddc->mat_graph->multi_element) {
1438     ISLocalToGlobalMapping map;
1439     MatNullSpace           nnsp;
1440     Vec                    quad_vec;
1441 
1442     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1443     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1444     PetscCall(VecLockReadPop(quad_vec));
1445     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
1446     PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1447     for (i = 0; i < nee; i++) {
1448       const PetscInt *idxs;
1449       PetscScalar     one = 1.0;
1450 
1451       PetscCall(ISGetLocalSize(alleedges[i], &cum));
1452       if (!cum) continue;
1453       PetscCall(ISGetIndices(alleedges[i], &idxs));
1454       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1455       PetscCall(ISRestoreIndices(alleedges[i], &idxs));
1456     }
1457     PetscCall(VecLockReadPush(quad_vec));
1458     PetscCall(VecDestroy(&quad_vec));
1459     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1460     PetscCall(MatNullSpaceDestroy(&nnsp));
1461   }
1462   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1463 
1464   /* Start assembling */
1465   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1466   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1467 
1468   /* Free */
1469   if (fl2g) {
1470     PetscCall(ISDestroy(&primals));
1471     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1472     PetscCall(PetscFree(eedges));
1473   }
1474 
1475   /* hack mat_graph with primal dofs on the coarse edges */
1476   {
1477     PCBDDCGraph graph  = pcbddc->mat_graph;
1478     PetscInt   *oqueue = graph->queue;
1479     PetscInt   *ocptr  = graph->cptr;
1480     PetscInt    ncc, *idxs;
1481 
1482     /* find first primal edge */
1483     if (pcbddc->nedclocal) {
1484       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1485     } else {
1486       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1487       idxs = cedges;
1488     }
1489     cum = 0;
1490     while (cum < nee && cedges[cum] < 0) cum++;
1491 
1492     /* adapt connected components */
1493     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1494     graph->cptr[0] = 0;
1495     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1496       PetscInt lc = ocptr[i + 1] - ocptr[i];
1497       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1498         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1499         graph->queue[graph->cptr[ncc]] = cedges[cum];
1500         ncc++;
1501         lc--;
1502         cum++;
1503         while (cum < nee && cedges[cum] < 0) cum++;
1504       }
1505       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1506       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1507       ncc++;
1508     }
1509     graph->ncc = ncc;
1510     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1511     PetscCall(PetscFree2(ocptr, oqueue));
1512   }
1513   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1514   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1515   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1516 
1517   PetscCall(ISDestroy(&nedfieldlocal));
1518   PetscCall(PetscFree(extrow));
1519   PetscCall(PetscFree2(work, rwork));
1520   PetscCall(PetscFree(corners));
1521   PetscCall(PetscFree(cedges));
1522   PetscCall(PetscFree(extrows));
1523   PetscCall(PetscFree(extcols));
1524   PetscCall(MatDestroy(&lG));
1525 
1526   /* Complete assembling */
1527   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1528   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1529   if (pcbddc->nedcG) {
1530     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1531     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1532   }
1533 
1534   PetscCall(ISDestroy(&elements_corners));
1535 
1536   /* set change of basis */
1537   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1538   PetscCall(MatDestroy(&T));
1539   PetscFunctionReturn(PETSC_SUCCESS);
1540 }
1541 
1542 /* the near-null space of BDDC carries information on quadrature weights,
1543    and these can be collinear -> so cheat with MatNullSpaceCreate
1544    and create a suitable set of basis vectors first */
1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1546 {
1547   PetscInt i;
1548 
1549   PetscFunctionBegin;
1550   for (i = 0; i < nvecs; i++) {
1551     PetscInt first, last;
1552 
1553     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1554     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1555     if (i >= first && i < last) {
1556       PetscScalar *data;
1557       PetscCall(VecGetArray(quad_vecs[i], &data));
1558       if (!has_const) {
1559         data[i - first] = 1.;
1560       } else {
1561         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1562         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1563       }
1564       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1565     }
1566     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1567   }
1568   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1569   for (i = 0; i < nvecs; i++) { /* reset vectors */
1570     PetscInt first, last;
1571     PetscCall(VecLockReadPop(quad_vecs[i]));
1572     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1573     if (i >= first && i < last) {
1574       PetscScalar *data;
1575       PetscCall(VecGetArray(quad_vecs[i], &data));
1576       if (!has_const) {
1577         data[i - first] = 0.;
1578       } else {
1579         data[2 * i - first]     = 0.;
1580         data[2 * i - first + 1] = 0.;
1581       }
1582       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1583     }
1584     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1585     PetscCall(VecLockReadPush(quad_vecs[i]));
1586   }
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1591 {
1592   Mat                    loc_divudotp;
1593   Vec                    p, v, quad_vec;
1594   ISLocalToGlobalMapping map;
1595   PetscScalar           *array;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1599   if (!transpose) {
1600     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1601   } else {
1602     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1603   }
1604   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1605   PetscCall(VecLockReadPop(quad_vec));
1606   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1607 
1608   /* compute local quad vec */
1609   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1610   if (!transpose) {
1611     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1612   } else {
1613     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1614   }
1615   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1616   PetscCall(VecSet(p, 1.));
1617   if (!transpose) {
1618     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1619   } else {
1620     PetscCall(MatMult(loc_divudotp, p, v));
1621   }
1622   PetscCall(VecDestroy(&p));
1623   if (vl2l) {
1624     Mat        lA;
1625     VecScatter sc;
1626     Vec        vins;
1627 
1628     PetscCall(MatISGetLocalMat(A, &lA));
1629     PetscCall(MatCreateVecs(lA, &vins, NULL));
1630     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1631     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1632     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1633     PetscCall(VecScatterDestroy(&sc));
1634     PetscCall(VecDestroy(&v));
1635     v = vins;
1636   }
1637 
1638   /* mask summation of interface values */
1639   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1640   const PetscInt *degree;
1641   PetscSF         msf;
1642 
1643   PetscCall(VecGetLocalSize(v, &n));
1644   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1645   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1646   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1647   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1648   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1649   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1650   for (PetscInt i = 0, c = 0; i < nr; i++) {
1651     mmask[c] = 1;
1652     c += degree[i];
1653   }
1654   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1655   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1656   PetscCall(VecGetArray(v, &array));
1657   for (PetscInt i = 0; i < n; i++) {
1658     array[i] *= mask[i];
1659     idxs[i] = i;
1660   }
1661   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1662   PetscCall(VecRestoreArray(v, &array));
1663   PetscCall(PetscFree3(mmask, mask, idxs));
1664   PetscCall(VecDestroy(&v));
1665   PetscCall(VecAssemblyBegin(quad_vec));
1666   PetscCall(VecAssemblyEnd(quad_vec));
1667   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1668   PetscCall(VecLockReadPush(quad_vec));
1669   PetscCall(VecDestroy(&quad_vec));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1674 {
1675   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1676 
1677   PetscFunctionBegin;
1678   if (primalv) {
1679     if (pcbddc->user_primal_vertices_local) {
1680       IS list[2], newp;
1681 
1682       list[0] = primalv;
1683       list[1] = pcbddc->user_primal_vertices_local;
1684       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1685       PetscCall(ISSortRemoveDups(newp));
1686       PetscCall(ISDestroy(&list[1]));
1687       pcbddc->user_primal_vertices_local = newp;
1688     } else {
1689       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1690     }
1691   }
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1696 {
1697   PetscInt f, *comp = (PetscInt *)ctx;
1698 
1699   PetscFunctionBegin;
1700   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1705 {
1706   Vec       local, global;
1707   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1708   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1709   PetscBool monolithic = PETSC_FALSE;
1710 
1711   PetscFunctionBegin;
1712   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1713   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1714   PetscOptionsEnd();
1715   /* need to convert from global to local topology information and remove references to information in global ordering */
1716   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1717   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1718   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1719   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1720   if (monolithic) { /* just get block size to properly compute vertices */
1721     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1722     goto boundary;
1723   }
1724 
1725   if (pcbddc->user_provided_isfordofs) {
1726     if (pcbddc->n_ISForDofs) {
1727       PetscInt i;
1728 
1729       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1730       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1731         PetscInt bs;
1732 
1733         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1734         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1735         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1736         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1737       }
1738       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1739       pcbddc->n_ISForDofs      = 0;
1740       PetscCall(PetscFree(pcbddc->ISForDofs));
1741     }
1742   } else {
1743     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1744       DM dm;
1745 
1746       PetscCall(MatGetDM(pc->pmat, &dm));
1747       if (!dm) PetscCall(PCGetDM(pc, &dm));
1748       if (dm) {
1749         IS      *fields;
1750         PetscInt nf, i;
1751 
1752         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1753         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1754         for (i = 0; i < nf; i++) {
1755           PetscInt bs;
1756 
1757           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1758           PetscCall(ISGetBlockSize(fields[i], &bs));
1759           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1760           PetscCall(ISDestroy(&fields[i]));
1761         }
1762         PetscCall(PetscFree(fields));
1763         pcbddc->n_ISForDofsLocal = nf;
1764       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1765         PetscContainer c;
1766 
1767         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1768         if (c) {
1769           MatISLocalFields lf;
1770           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1771           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1772         } else { /* fallback, create the default fields if bs > 1 */
1773           PetscInt i, n = matis->A->rmap->n;
1774           PetscCall(MatGetBlockSize(pc->pmat, &i));
1775           if (i > 1) {
1776             pcbddc->n_ISForDofsLocal = i;
1777             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1778             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1779           }
1780         }
1781       }
1782     } else {
1783       PetscInt i;
1784       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1785     }
1786   }
1787 
1788 boundary:
1789   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1790     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1791   } else if (pcbddc->DirichletBoundariesLocal) {
1792     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1793   }
1794   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1795     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1796   } else if (pcbddc->NeumannBoundariesLocal) {
1797     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1798   }
1799   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1800   PetscCall(VecDestroy(&global));
1801   PetscCall(VecDestroy(&local));
1802   /* detect local disconnected subdomains if requested or needed */
1803   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1804     IS        primalv = NULL;
1805     PetscInt  nel;
1806     PetscBool filter = pcbddc->detect_disconnected_filter;
1807 
1808     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1809     PetscCall(PetscFree(pcbddc->local_subs));
1810     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1811     if (matis->allow_repeated && nel) {
1812       const PetscInt *elsizes;
1813 
1814       pcbddc->n_local_subs = nel;
1815       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1816       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1817       for (PetscInt i = 0, c = 0; i < nel; i++) {
1818         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1819         c += elsizes[i];
1820       }
1821     } else {
1822       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1823     }
1824     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1825     PetscCall(ISDestroy(&primalv));
1826   }
1827   /* early stage corner detection */
1828   {
1829     DM dm;
1830 
1831     PetscCall(MatGetDM(pc->pmat, &dm));
1832     if (!dm) PetscCall(PCGetDM(pc, &dm));
1833     if (dm) {
1834       PetscBool isda;
1835 
1836       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1837       if (isda) {
1838         ISLocalToGlobalMapping l2l;
1839         IS                     corners;
1840         Mat                    lA;
1841         PetscBool              gl, lo;
1842 
1843         {
1844           Vec                cvec;
1845           const PetscScalar *coords;
1846           PetscInt           dof, n, cdim;
1847           PetscBool          memc = PETSC_TRUE;
1848 
1849           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1850           PetscCall(DMGetCoordinates(dm, &cvec));
1851           PetscCall(VecGetLocalSize(cvec, &n));
1852           PetscCall(VecGetBlockSize(cvec, &cdim));
1853           n /= cdim;
1854           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1855           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1856           PetscCall(VecGetArrayRead(cvec, &coords));
1857 #if defined(PETSC_USE_COMPLEX)
1858           memc = PETSC_FALSE;
1859 #endif
1860           if (dof != 1) memc = PETSC_FALSE;
1861           if (memc) {
1862             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1863           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1864             PetscReal *bcoords = pcbddc->mat_graph->coords;
1865             PetscInt   i, b, d;
1866 
1867             for (i = 0; i < n; i++) {
1868               for (b = 0; b < dof; b++) {
1869                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1870               }
1871             }
1872           }
1873           PetscCall(VecRestoreArrayRead(cvec, &coords));
1874           pcbddc->mat_graph->cdim  = cdim;
1875           pcbddc->mat_graph->cnloc = dof * n;
1876           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1877         }
1878         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1879         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1880         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1881         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1882         lo = (PetscBool)(l2l && corners);
1883         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1884         if (gl) { /* From PETSc's DMDA */
1885           const PetscInt *idx;
1886           PetscInt        dof, bs, *idxout, n;
1887 
1888           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1889           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1890           PetscCall(ISGetLocalSize(corners, &n));
1891           PetscCall(ISGetIndices(corners, &idx));
1892           if (bs == dof) {
1893             PetscCall(PetscMalloc1(n, &idxout));
1894             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1895           } else { /* the original DMDA local-to-local map have been modified */
1896             PetscInt i, d;
1897 
1898             PetscCall(PetscMalloc1(dof * n, &idxout));
1899             for (i = 0; i < n; i++)
1900               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1901             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1902 
1903             bs = 1;
1904             n *= dof;
1905           }
1906           PetscCall(ISRestoreIndices(corners, &idx));
1907           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1908           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1909           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1910           PetscCall(ISDestroy(&corners));
1911           pcbddc->corner_selected  = PETSC_TRUE;
1912           pcbddc->corner_selection = PETSC_TRUE;
1913         }
1914         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1915       }
1916     }
1917   }
1918   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1919     DM dm;
1920 
1921     PetscCall(MatGetDM(pc->pmat, &dm));
1922     if (!dm) PetscCall(PCGetDM(pc, &dm));
1923     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1924       Vec          vcoords;
1925       PetscSection section;
1926       PetscReal   *coords;
1927       PetscInt     d, cdim, nl, nf, **ctxs;
1928       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1929       /* debug coordinates */
1930       PetscViewer       viewer;
1931       PetscBool         flg;
1932       PetscViewerFormat format;
1933       const char       *prefix;
1934 
1935       PetscCall(DMGetCoordinateDim(dm, &cdim));
1936       PetscCall(DMGetLocalSection(dm, &section));
1937       PetscCall(PetscSectionGetNumFields(section, &nf));
1938       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1939       PetscCall(VecGetLocalSize(vcoords, &nl));
1940       PetscCall(PetscMalloc1(nl * cdim, &coords));
1941       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1942       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1943       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1944       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1945 
1946       /* debug coordinates */
1947       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1948       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1949       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1950       for (d = 0; d < cdim; d++) {
1951         PetscInt           i;
1952         const PetscScalar *v;
1953         char               name[16];
1954 
1955         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1956         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
1957         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1958         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1959         if (flg) PetscCall(VecView(vcoords, viewer));
1960         PetscCall(VecGetArrayRead(vcoords, &v));
1961         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1962         PetscCall(VecRestoreArrayRead(vcoords, &v));
1963       }
1964       PetscCall(VecDestroy(&vcoords));
1965       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1966       PetscCall(PetscFree(coords));
1967       PetscCall(PetscFree(ctxs[0]));
1968       PetscCall(PetscFree2(funcs, ctxs));
1969       if (flg) {
1970         PetscCall(PetscViewerPopFormat(viewer));
1971         PetscCall(PetscViewerDestroy(&viewer));
1972       }
1973     }
1974   }
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1979 {
1980   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
1981   IS              nis;
1982   const PetscInt *idxs;
1983   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1984 
1985   PetscFunctionBegin;
1986   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1987   if (mop == MPI_LAND) {
1988     /* init rootdata with true */
1989     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1990   } else {
1991     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1992   }
1993   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1994   PetscCall(ISGetLocalSize(*is, &nd));
1995   PetscCall(ISGetIndices(*is, &idxs));
1996   for (i = 0; i < nd; i++)
1997     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1998   PetscCall(ISRestoreIndices(*is, &idxs));
1999   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2000   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2001   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2002   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2003   if (mop == MPI_LAND) {
2004     PetscCall(PetscMalloc1(nd, &nidxs));
2005   } else {
2006     PetscCall(PetscMalloc1(n, &nidxs));
2007   }
2008   for (i = 0, nnd = 0; i < n; i++)
2009     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2010   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2011   PetscCall(ISDestroy(is));
2012   *is = nis;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2017 {
2018   PC_IS   *pcis   = (PC_IS *)pc->data;
2019   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2020 
2021   PetscFunctionBegin;
2022   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2023   if (pcbddc->ChangeOfBasisMatrix) {
2024     Vec swap;
2025 
2026     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2027     swap                = pcbddc->work_change;
2028     pcbddc->work_change = r;
2029     r                   = swap;
2030   }
2031   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2032   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2033   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2034   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2035   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2036   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2037   PetscCall(VecSet(z, 0.));
2038   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2039   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2040   if (pcbddc->ChangeOfBasisMatrix) {
2041     pcbddc->work_change = r;
2042     PetscCall(VecCopy(z, pcbddc->work_change));
2043     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2044   }
2045   PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047 
2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2049 {
2050   PCBDDCBenignMatMult_ctx ctx;
2051   PetscBool               apply_right, apply_left, reset_x;
2052 
2053   PetscFunctionBegin;
2054   PetscCall(MatShellGetContext(A, &ctx));
2055   if (transpose) {
2056     apply_right = ctx->apply_left;
2057     apply_left  = ctx->apply_right;
2058   } else {
2059     apply_right = ctx->apply_right;
2060     apply_left  = ctx->apply_left;
2061   }
2062   reset_x = PETSC_FALSE;
2063   if (apply_right) {
2064     const PetscScalar *ax;
2065     PetscInt           nl, i;
2066 
2067     PetscCall(VecGetLocalSize(x, &nl));
2068     PetscCall(VecGetArrayRead(x, &ax));
2069     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2070     PetscCall(VecRestoreArrayRead(x, &ax));
2071     for (i = 0; i < ctx->benign_n; i++) {
2072       PetscScalar     sum, val;
2073       const PetscInt *idxs;
2074       PetscInt        nz, j;
2075       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2076       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2077       sum = 0.;
2078       if (ctx->apply_p0) {
2079         val = ctx->work[idxs[nz - 1]];
2080         for (j = 0; j < nz - 1; j++) {
2081           sum += ctx->work[idxs[j]];
2082           ctx->work[idxs[j]] += val;
2083         }
2084       } else {
2085         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2086       }
2087       ctx->work[idxs[nz - 1]] -= sum;
2088       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2089     }
2090     PetscCall(VecPlaceArray(x, ctx->work));
2091     reset_x = PETSC_TRUE;
2092   }
2093   if (transpose) {
2094     PetscCall(MatMultTranspose(ctx->A, x, y));
2095   } else {
2096     PetscCall(MatMult(ctx->A, x, y));
2097   }
2098   if (reset_x) PetscCall(VecResetArray(x));
2099   if (apply_left) {
2100     PetscScalar *ay;
2101     PetscInt     i;
2102 
2103     PetscCall(VecGetArray(y, &ay));
2104     for (i = 0; i < ctx->benign_n; i++) {
2105       PetscScalar     sum, val;
2106       const PetscInt *idxs;
2107       PetscInt        nz, j;
2108       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2109       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2110       val = -ay[idxs[nz - 1]];
2111       if (ctx->apply_p0) {
2112         sum = 0.;
2113         for (j = 0; j < nz - 1; j++) {
2114           sum += ay[idxs[j]];
2115           ay[idxs[j]] += val;
2116         }
2117         ay[idxs[nz - 1]] += sum;
2118       } else {
2119         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2120         ay[idxs[nz - 1]] = 0.;
2121       }
2122       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2123     }
2124     PetscCall(VecRestoreArray(y, &ay));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2130 {
2131   PetscFunctionBegin;
2132   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2137 {
2138   PetscFunctionBegin;
2139   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2140   PetscFunctionReturn(PETSC_SUCCESS);
2141 }
2142 
2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2144 {
2145   PC_IS                  *pcis   = (PC_IS *)pc->data;
2146   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2147   PCBDDCBenignMatMult_ctx ctx;
2148 
2149   PetscFunctionBegin;
2150   if (!restore) {
2151     Mat                A_IB, A_BI;
2152     PetscScalar       *work;
2153     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2154 
2155     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2156     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2157     PetscCall(PetscMalloc1(pcis->n, &work));
2158     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2159     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2160     PetscCall(MatSetType(A_IB, MATSHELL));
2161     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2162     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2163     PetscCall(PetscNew(&ctx));
2164     PetscCall(MatShellSetContext(A_IB, ctx));
2165     ctx->apply_left  = PETSC_TRUE;
2166     ctx->apply_right = PETSC_FALSE;
2167     ctx->apply_p0    = PETSC_FALSE;
2168     ctx->benign_n    = pcbddc->benign_n;
2169     if (reuse) {
2170       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2171       ctx->free                 = PETSC_FALSE;
2172     } else { /* TODO: could be optimized for successive solves */
2173       ISLocalToGlobalMapping N_to_D;
2174       PetscInt               i;
2175 
2176       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2177       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2178       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2179       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2180       ctx->free = PETSC_TRUE;
2181     }
2182     ctx->A    = pcis->A_IB;
2183     ctx->work = work;
2184     PetscCall(MatSetUp(A_IB));
2185     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2186     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2187     pcis->A_IB = A_IB;
2188 
2189     /* A_BI as A_IB^T */
2190     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2191     pcbddc->benign_original_mat = pcis->A_BI;
2192     pcis->A_BI                  = A_BI;
2193   } else {
2194     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2195     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2196     PetscCall(MatDestroy(&pcis->A_IB));
2197     pcis->A_IB = ctx->A;
2198     ctx->A     = NULL;
2199     PetscCall(MatDestroy(&pcis->A_BI));
2200     pcis->A_BI                  = pcbddc->benign_original_mat;
2201     pcbddc->benign_original_mat = NULL;
2202     if (ctx->free) {
2203       PetscInt i;
2204       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2205       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2206     }
2207     PetscCall(PetscFree(ctx->work));
2208     PetscCall(PetscFree(ctx));
2209   }
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 /* used just in bddc debug mode */
2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2215 {
2216   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2217   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2218   Mat      An;
2219 
2220   PetscFunctionBegin;
2221   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2222   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2223   if (is1) {
2224     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2225     PetscCall(MatDestroy(&An));
2226   } else {
2227     *B = An;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 /* TODO: add reuse flag */
2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2234 {
2235   Mat             Bt;
2236   PetscScalar    *a, *bdata;
2237   const PetscInt *ii, *ij;
2238   PetscInt        m, n, i, nnz, *bii, *bij;
2239   PetscBool       flg_row;
2240 
2241   PetscFunctionBegin;
2242   PetscCall(MatGetSize(A, &n, &m));
2243   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2244   PetscCall(MatSeqAIJGetArray(A, &a));
2245   nnz = n;
2246   for (i = 0; i < ii[n]; i++) {
2247     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2248   }
2249   PetscCall(PetscMalloc1(n + 1, &bii));
2250   PetscCall(PetscMalloc1(nnz, &bij));
2251   PetscCall(PetscMalloc1(nnz, &bdata));
2252   nnz    = 0;
2253   bii[0] = 0;
2254   for (i = 0; i < n; i++) {
2255     PetscInt j;
2256     for (j = ii[i]; j < ii[i + 1]; j++) {
2257       PetscScalar entry = a[j];
2258       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2259         bij[nnz]   = ij[j];
2260         bdata[nnz] = entry;
2261         nnz++;
2262       }
2263     }
2264     bii[i + 1] = nnz;
2265   }
2266   PetscCall(MatSeqAIJRestoreArray(A, &a));
2267   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2268   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2269   {
2270     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2271     b->free_a     = PETSC_TRUE;
2272     b->free_ij    = PETSC_TRUE;
2273   }
2274   if (*B == A) PetscCall(MatDestroy(&A));
2275   *B = Bt;
2276   PetscFunctionReturn(PETSC_SUCCESS);
2277 }
2278 
2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2280 {
2281   Mat                    B = NULL;
2282   DM                     dm;
2283   IS                     is_dummy, *cc_n;
2284   ISLocalToGlobalMapping l2gmap_dummy;
2285   PCBDDCGraph            graph;
2286   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2287   PetscInt               i, n;
2288   PetscInt              *xadj, *adjncy;
2289   PetscBool              isplex = PETSC_FALSE;
2290 
2291   PetscFunctionBegin;
2292   if (ncc) *ncc = 0;
2293   if (cc) *cc = NULL;
2294   if (primalv) *primalv = NULL;
2295   PetscCall(PCBDDCGraphCreate(&graph));
2296   PetscCall(MatGetDM(pc->pmat, &dm));
2297   if (!dm) PetscCall(PCGetDM(pc, &dm));
2298   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2299   if (filter) isplex = PETSC_FALSE;
2300 
2301   if (isplex) { /* this code has been modified from plexpartition.c */
2302     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2303     PetscInt       *adj = NULL;
2304     IS              cellNumbering;
2305     const PetscInt *cellNum;
2306     PetscBool       useCone, useClosure;
2307     PetscSection    section;
2308     PetscSegBuffer  adjBuffer;
2309     PetscSF         sfPoint;
2310 
2311     PetscCall(DMConvert(dm, DMPLEX, &dm));
2312     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2313     PetscCall(DMGetPointSF(dm, &sfPoint));
2314     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2315     /* Build adjacency graph via a section/segbuffer */
2316     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2317     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2318     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2319     /* Always use FVM adjacency to create partitioner graph */
2320     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2321     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2322     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2323     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2324     for (n = 0, p = pStart; p < pEnd; p++) {
2325       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2326       if (nroots > 0) {
2327         if (cellNum[p] < 0) continue;
2328       }
2329       adjSize = PETSC_DETERMINE;
2330       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2331       for (a = 0; a < adjSize; ++a) {
2332         const PetscInt point = adj[a];
2333         if (pStart <= point && point < pEnd) {
2334           PetscInt *PETSC_RESTRICT pBuf;
2335           PetscCall(PetscSectionAddDof(section, p, 1));
2336           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2337           *pBuf = point;
2338         }
2339       }
2340       n++;
2341     }
2342     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2343     /* Derive CSR graph from section/segbuffer */
2344     PetscCall(PetscSectionSetUp(section));
2345     PetscCall(PetscSectionGetStorageSize(section, &size));
2346     PetscCall(PetscMalloc1(n + 1, &xadj));
2347     for (idx = 0, p = pStart; p < pEnd; p++) {
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2352     }
2353     xadj[n] = size;
2354     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2355     /* Clean up */
2356     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2357     PetscCall(PetscSectionDestroy(&section));
2358     PetscCall(PetscFree(adj));
2359     graph->xadj   = xadj;
2360     graph->adjncy = adjncy;
2361   } else {
2362     Mat       A;
2363     PetscBool isseqaij, flg_row;
2364 
2365     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2366     if (!A->rmap->N || !A->cmap->N) {
2367       PetscCall(PCBDDCGraphDestroy(&graph));
2368       PetscFunctionReturn(PETSC_SUCCESS);
2369     }
2370     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2371     if (!isseqaij && filter) {
2372       PetscBool isseqdense;
2373 
2374       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2375       if (!isseqdense) {
2376         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2377       } else { /* TODO: rectangular case and LDA */
2378         PetscScalar *array;
2379         PetscReal    chop = 1.e-6;
2380 
2381         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2382         PetscCall(MatDenseGetArray(B, &array));
2383         PetscCall(MatGetSize(B, &n, NULL));
2384         for (i = 0; i < n; i++) {
2385           PetscInt j;
2386           for (j = i + 1; j < n; j++) {
2387             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2388             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2389             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2390           }
2391         }
2392         PetscCall(MatDenseRestoreArray(B, &array));
2393         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2394       }
2395     } else {
2396       PetscCall(PetscObjectReference((PetscObject)A));
2397       B = A;
2398     }
2399     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2400 
2401     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2402     if (filter) {
2403       PetscScalar *data;
2404       PetscInt     j, cum;
2405 
2406       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2407       PetscCall(MatSeqAIJGetArray(B, &data));
2408       cum = 0;
2409       for (i = 0; i < n; i++) {
2410         PetscInt t;
2411 
2412         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2413           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2414           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2415         }
2416         t                = xadj_filtered[i];
2417         xadj_filtered[i] = cum;
2418         cum += t;
2419       }
2420       PetscCall(MatSeqAIJRestoreArray(B, &data));
2421       graph->xadj   = xadj_filtered;
2422       graph->adjncy = adjncy_filtered;
2423     } else {
2424       graph->xadj   = xadj;
2425       graph->adjncy = adjncy;
2426     }
2427   }
2428   /* compute local connected components using PCBDDCGraph */
2429   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2430   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2431   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2432   PetscCall(ISDestroy(&is_dummy));
2433   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2434   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2435   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2436   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2437 
2438   /* partial clean up */
2439   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2440   if (B) {
2441     PetscBool flg_row;
2442     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2443     PetscCall(MatDestroy(&B));
2444   }
2445   if (isplex) {
2446     PetscCall(PetscFree(xadj));
2447     PetscCall(PetscFree(adjncy));
2448   }
2449 
2450   /* get back data */
2451   if (isplex) {
2452     if (ncc) *ncc = graph->ncc;
2453     if (cc || primalv) {
2454       Mat          A;
2455       PetscBT      btv, btvt, btvc;
2456       PetscSection subSection;
2457       PetscInt    *ids, cum, cump, *cids, *pids;
2458       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2459 
2460       PetscCall(DMGetDimension(dm, &dim));
2461       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2462       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2463       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2464       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2465       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2466       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2467       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2468       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2469       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2470       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2471 
2472       /* First see if we find corners for the subdomains, i.e. a vertex
2473          shared by at least dim subdomain boundary faces. This does not
2474          cover all the possible cases with simplices but it is enough
2475          for tensor cells */
2476       if (vStart != fStart && dim <= 3) {
2477         for (PetscInt c = cStart; c < cEnd; c++) {
2478           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2479           const PetscInt *faces;
2480 
2481           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2482           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2483           PetscCall(DMPlexGetCone(dm, c, &faces));
2484           for (PetscInt f = 0; f < nf; f++) {
2485             PetscInt nc, ff;
2486 
2487             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2488             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2489             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2490           }
2491           if (cnt >= mcnt) {
2492             PetscInt size, *closure = NULL;
2493 
2494             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2495             for (PetscInt k = 0; k < 2 * size; k += 2) {
2496               PetscInt v = closure[k];
2497               if (v >= vStart && v < vEnd) {
2498                 PetscInt vsize, *vclosure = NULL;
2499 
2500                 cnt = 0;
2501                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2502                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2503                   PetscInt f = vclosure[vk];
2504                   if (f >= fStart && f < fEnd) {
2505                     PetscInt  nc, ff;
2506                     PetscBool valid = PETSC_FALSE;
2507 
2508                     for (PetscInt fk = 0; fk < nf; fk++)
2509                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2510                     if (!valid) continue;
2511                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2512                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2513                     if (nc == 1 && f == ff) cnt++;
2514                   }
2515                 }
2516                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2517                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2518               }
2519             }
2520             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2521           }
2522           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2523         }
2524       }
2525 
2526       cids[0] = 0;
2527       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2528         PetscInt j;
2529 
2530         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2531         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2532           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2533 
2534           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2535           for (k = 0; k < 2 * size; k += 2) {
2536             PetscInt s, pp, p = closure[k], off, dof, cdof;
2537 
2538             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2539             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2540             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2541             for (s = 0; s < dof - cdof; s++) {
2542               if (PetscBTLookupSet(btvt, off + s)) continue;
2543               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2544               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2545               else pids[cump++] = off + s; /* cross-vertex */
2546             }
2547             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2548             if (pp != p) {
2549               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2550               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2551               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2552               for (s = 0; s < dof - cdof; s++) {
2553                 if (PetscBTLookupSet(btvt, off + s)) continue;
2554                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2555                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2556                 else pids[cump++] = off + s; /* cross-vertex */
2557               }
2558             }
2559           }
2560           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2561         }
2562         cids[i + 1] = cum;
2563         /* mark dofs as already assigned */
2564         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2565       }
2566       if (cc) {
2567         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2568         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2569         *cc = cc_n;
2570       }
2571       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2572       PetscCall(PetscFree3(ids, cids, pids));
2573       PetscCall(PetscBTDestroy(&btv));
2574       PetscCall(PetscBTDestroy(&btvt));
2575       PetscCall(PetscBTDestroy(&btvc));
2576       PetscCall(DMDestroy(&dm));
2577     }
2578   } else {
2579     if (ncc) *ncc = graph->ncc;
2580     if (cc) {
2581       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2582       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2583       *cc = cc_n;
2584     }
2585   }
2586   /* clean up graph */
2587   graph->xadj   = NULL;
2588   graph->adjncy = NULL;
2589   PetscCall(PCBDDCGraphDestroy(&graph));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2594 {
2595   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2596   PC_IS   *pcis   = (PC_IS *)pc->data;
2597   IS       dirIS  = NULL;
2598   PetscInt i;
2599 
2600   PetscFunctionBegin;
2601   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2602   if (zerodiag) {
2603     Mat             A;
2604     Vec             vec3_N;
2605     PetscScalar    *vals;
2606     const PetscInt *idxs;
2607     PetscInt        nz, *count;
2608 
2609     /* p0 */
2610     PetscCall(VecSet(pcis->vec1_N, 0.));
2611     PetscCall(PetscMalloc1(pcis->n, &vals));
2612     PetscCall(ISGetLocalSize(zerodiag, &nz));
2613     PetscCall(ISGetIndices(zerodiag, &idxs));
2614     for (i = 0; i < nz; i++) vals[i] = 1.;
2615     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2616     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2617     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2618     /* v_I */
2619     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2620     for (i = 0; i < nz; i++) vals[i] = 0.;
2621     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2622     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2623     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2624     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2625     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2626     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2627     if (dirIS) {
2628       PetscInt n;
2629 
2630       PetscCall(ISGetLocalSize(dirIS, &n));
2631       PetscCall(ISGetIndices(dirIS, &idxs));
2632       for (i = 0; i < n; i++) vals[i] = 0.;
2633       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2634       PetscCall(ISRestoreIndices(dirIS, &idxs));
2635     }
2636     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2637     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2638     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2639     PetscCall(VecSet(vec3_N, 0.));
2640     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2641     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2642     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2643     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2644     PetscCall(PetscFree(vals));
2645     PetscCall(VecDestroy(&vec3_N));
2646 
2647     /* there should not be any pressure dofs lying on the interface */
2648     PetscCall(PetscCalloc1(pcis->n, &count));
2649     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2650     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2651     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2652     PetscCall(ISGetIndices(zerodiag, &idxs));
2653     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2654     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2655     PetscCall(PetscFree(count));
2656   }
2657   PetscCall(ISDestroy(&dirIS));
2658 
2659   /* check PCBDDCBenignGetOrSetP0 */
2660   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2661   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2662   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2663   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2664   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2665   for (i = 0; i < pcbddc->benign_n; i++) {
2666     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2667     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2668   }
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2673 {
2674   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2675   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2676   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2677   PetscInt  nz, n, benign_n, bsp = 1;
2678   PetscInt *interior_dofs, n_interior_dofs, nneu;
2679   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2680 
2681   PetscFunctionBegin;
2682   if (reuse) goto project_b0;
2683   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2684   PetscCall(MatDestroy(&pcbddc->benign_B0));
2685   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2686   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2687   has_null_pressures = PETSC_TRUE;
2688   have_null          = PETSC_TRUE;
2689   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2690      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2691      Checks if all the pressure dofs in each subdomain have a zero diagonal
2692      If not, a change of basis on pressures is not needed
2693      since the local Schur complements are already SPD
2694   */
2695   if (pcbddc->n_ISForDofsLocal) {
2696     IS        iP = NULL;
2697     PetscInt  p, *pp;
2698     PetscBool flg, blocked = PETSC_FALSE;
2699 
2700     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2701     n = pcbddc->n_ISForDofsLocal;
2702     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2703     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2704     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2705     PetscOptionsEnd();
2706     if (!flg) {
2707       n     = 1;
2708       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2709     }
2710 
2711     bsp = 0;
2712     for (p = 0; p < n; p++) {
2713       PetscInt bs = 1;
2714 
2715       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2716       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2717       bsp += bs;
2718     }
2719     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2720     bsp = 0;
2721     for (p = 0; p < n; p++) {
2722       const PetscInt *idxs;
2723       PetscInt        b, bs = 1, npl, *bidxs;
2724 
2725       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2726       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2727       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2728       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2729       for (b = 0; b < bs; b++) {
2730         PetscInt i;
2731 
2732         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2733         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2734         bsp++;
2735       }
2736       PetscCall(PetscFree(bidxs));
2737       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2738     }
2739     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2740 
2741     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2742     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2743     if (iP) {
2744       IS newpressures;
2745 
2746       PetscCall(ISDifference(pressures, iP, &newpressures));
2747       PetscCall(ISDestroy(&pressures));
2748       pressures = newpressures;
2749     }
2750     PetscCall(ISSorted(pressures, &sorted));
2751     if (!sorted) PetscCall(ISSort(pressures));
2752     PetscCall(PetscFree(pp));
2753   }
2754 
2755   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2756   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2757   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2758   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2759   PetscCall(ISSorted(zerodiag, &sorted));
2760   if (!sorted) PetscCall(ISSort(zerodiag));
2761   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2762   zerodiag_save = zerodiag;
2763   PetscCall(ISGetLocalSize(zerodiag, &nz));
2764   if (!nz) {
2765     if (n) have_null = PETSC_FALSE;
2766     has_null_pressures = PETSC_FALSE;
2767     PetscCall(ISDestroy(&zerodiag));
2768   }
2769   recompute_zerodiag = PETSC_FALSE;
2770 
2771   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2772   zerodiag_subs   = NULL;
2773   benign_n        = 0;
2774   n_interior_dofs = 0;
2775   interior_dofs   = NULL;
2776   nneu            = 0;
2777   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2778   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2779   if (checkb) { /* need to compute interior nodes */
2780     PetscInt               n, i;
2781     PetscInt              *count;
2782     ISLocalToGlobalMapping mapping;
2783 
2784     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2785     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2786     PetscCall(PetscMalloc1(n, &interior_dofs));
2787     for (i = 0; i < n; i++)
2788       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2789     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2790   }
2791   if (has_null_pressures) {
2792     IS             *subs;
2793     PetscInt        nsubs, i, j, nl;
2794     const PetscInt *idxs;
2795     PetscScalar    *array;
2796     Vec            *work;
2797 
2798     subs  = pcbddc->local_subs;
2799     nsubs = pcbddc->n_local_subs;
2800     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2801     if (checkb) {
2802       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2803       PetscCall(ISGetLocalSize(zerodiag, &nl));
2804       PetscCall(ISGetIndices(zerodiag, &idxs));
2805       /* work[0] = 1_p */
2806       PetscCall(VecSet(work[0], 0.));
2807       PetscCall(VecGetArray(work[0], &array));
2808       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2809       PetscCall(VecRestoreArray(work[0], &array));
2810       /* work[0] = 1_v */
2811       PetscCall(VecSet(work[1], 1.));
2812       PetscCall(VecGetArray(work[1], &array));
2813       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2814       PetscCall(VecRestoreArray(work[1], &array));
2815       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2816     }
2817 
2818     if (nsubs > 1 || bsp > 1) {
2819       IS      *is;
2820       PetscInt b, totb;
2821 
2822       totb  = bsp;
2823       is    = bsp > 1 ? bzerodiag : &zerodiag;
2824       nsubs = PetscMax(nsubs, 1);
2825       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2826       for (b = 0; b < totb; b++) {
2827         for (i = 0; i < nsubs; i++) {
2828           ISLocalToGlobalMapping l2g;
2829           IS                     t_zerodiag_subs;
2830           PetscInt               nl;
2831 
2832           if (subs) {
2833             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2834           } else {
2835             IS tis;
2836 
2837             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2838             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2839             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2840             PetscCall(ISDestroy(&tis));
2841           }
2842           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2843           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2844           if (nl) {
2845             PetscBool valid = PETSC_TRUE;
2846 
2847             if (checkb) {
2848               PetscCall(VecSet(matis->x, 0));
2849               PetscCall(ISGetLocalSize(subs[i], &nl));
2850               PetscCall(ISGetIndices(subs[i], &idxs));
2851               PetscCall(VecGetArray(matis->x, &array));
2852               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2853               PetscCall(VecRestoreArray(matis->x, &array));
2854               PetscCall(ISRestoreIndices(subs[i], &idxs));
2855               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2856               PetscCall(MatMult(matis->A, matis->x, matis->y));
2857               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2858               PetscCall(VecGetArray(matis->y, &array));
2859               for (j = 0; j < n_interior_dofs; j++) {
2860                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2861                   valid = PETSC_FALSE;
2862                   break;
2863                 }
2864               }
2865               PetscCall(VecRestoreArray(matis->y, &array));
2866             }
2867             if (valid && nneu) {
2868               const PetscInt *idxs;
2869               PetscInt        nzb;
2870 
2871               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2872               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2873               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2874               if (nzb) valid = PETSC_FALSE;
2875             }
2876             if (valid && pressures) {
2877               IS       t_pressure_subs, tmp;
2878               PetscInt i1, i2;
2879 
2880               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2881               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2882               PetscCall(ISGetLocalSize(tmp, &i1));
2883               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2884               if (i2 != i1) valid = PETSC_FALSE;
2885               PetscCall(ISDestroy(&t_pressure_subs));
2886               PetscCall(ISDestroy(&tmp));
2887             }
2888             if (valid) {
2889               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2890               benign_n++;
2891             } else recompute_zerodiag = PETSC_TRUE;
2892           }
2893           PetscCall(ISDestroy(&t_zerodiag_subs));
2894           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2895         }
2896       }
2897     } else { /* there's just one subdomain (or zero if they have not been detected */
2898       PetscBool valid = PETSC_TRUE;
2899 
2900       if (nneu) valid = PETSC_FALSE;
2901       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2902       if (valid && checkb) {
2903         PetscCall(MatMult(matis->A, work[0], matis->x));
2904         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2905         PetscCall(VecGetArray(matis->x, &array));
2906         for (j = 0; j < n_interior_dofs; j++) {
2907           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2908             valid = PETSC_FALSE;
2909             break;
2910           }
2911         }
2912         PetscCall(VecRestoreArray(matis->x, &array));
2913       }
2914       if (valid) {
2915         benign_n = 1;
2916         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2917         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2918         zerodiag_subs[0] = zerodiag;
2919       }
2920     }
2921     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2922   }
2923   PetscCall(PetscFree(interior_dofs));
2924 
2925   if (!benign_n) {
2926     PetscInt n;
2927 
2928     PetscCall(ISDestroy(&zerodiag));
2929     recompute_zerodiag = PETSC_FALSE;
2930     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2931     if (n) have_null = PETSC_FALSE;
2932   }
2933 
2934   /* final check for null pressures */
2935   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2936 
2937   if (recompute_zerodiag) {
2938     PetscCall(ISDestroy(&zerodiag));
2939     if (benign_n == 1) {
2940       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2941       zerodiag = zerodiag_subs[0];
2942     } else {
2943       PetscInt i, nzn, *new_idxs;
2944 
2945       nzn = 0;
2946       for (i = 0; i < benign_n; i++) {
2947         PetscInt ns;
2948         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2949         nzn += ns;
2950       }
2951       PetscCall(PetscMalloc1(nzn, &new_idxs));
2952       nzn = 0;
2953       for (i = 0; i < benign_n; i++) {
2954         PetscInt ns, *idxs;
2955         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2956         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2957         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2958         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2959         nzn += ns;
2960       }
2961       PetscCall(PetscSortInt(nzn, new_idxs));
2962       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2963     }
2964     have_null = PETSC_FALSE;
2965   }
2966 
2967   /* determines if the coarse solver will be singular or not */
2968   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2969 
2970   /* Prepare matrix to compute no-net-flux */
2971   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2972     Mat                    A, loc_divudotp;
2973     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2974     IS                     row, col, isused = NULL;
2975     PetscInt               M, N, n, st, n_isused;
2976 
2977     if (pressures) {
2978       isused = pressures;
2979     } else {
2980       isused = zerodiag_save;
2981     }
2982     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2983     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2984     PetscCall(MatGetLocalSize(A, &n, NULL));
2985     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2986     n_isused = 0;
2987     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2988     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2989     st = st - n_isused;
2990     if (n) {
2991       const PetscInt *gidxs;
2992 
2993       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2994       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2995       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2996       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2997       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2998       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2999     } else {
3000       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3001       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3002       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3003     }
3004     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3005     PetscCall(ISGetSize(row, &M));
3006     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3007     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3008     PetscCall(ISDestroy(&row));
3009     PetscCall(ISDestroy(&col));
3010     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3011     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3012     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3013     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3014     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3015     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3016     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3017     PetscCall(MatDestroy(&loc_divudotp));
3018     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3019     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3020   }
3021   PetscCall(ISDestroy(&zerodiag_save));
3022   PetscCall(ISDestroy(&pressures));
3023   if (bzerodiag) {
3024     PetscInt i;
3025 
3026     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3027     PetscCall(PetscFree(bzerodiag));
3028   }
3029   pcbddc->benign_n             = benign_n;
3030   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3031 
3032   /* determines if the problem has subdomains with 0 pressure block */
3033   have_null = (PetscBool)(!!pcbddc->benign_n);
3034   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3035 
3036 project_b0:
3037   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3038   /* change of basis and p0 dofs */
3039   if (pcbddc->benign_n) {
3040     PetscInt i, s, *nnz;
3041 
3042     /* local change of basis for pressures */
3043     PetscCall(MatDestroy(&pcbddc->benign_change));
3044     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3045     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3046     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3047     PetscCall(PetscMalloc1(n, &nnz));
3048     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3049     for (i = 0; i < pcbddc->benign_n; i++) {
3050       const PetscInt *idxs;
3051       PetscInt        nzs, j;
3052 
3053       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3054       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3055       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3056       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3057       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3058     }
3059     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3060     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3061     PetscCall(PetscFree(nnz));
3062     /* set identity by default */
3063     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3064     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3065     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3066     /* set change on pressures */
3067     for (s = 0; s < pcbddc->benign_n; s++) {
3068       PetscScalar    *array;
3069       const PetscInt *idxs;
3070       PetscInt        nzs;
3071 
3072       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3073       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3074       for (i = 0; i < nzs - 1; i++) {
3075         PetscScalar vals[2];
3076         PetscInt    cols[2];
3077 
3078         cols[0] = idxs[i];
3079         cols[1] = idxs[nzs - 1];
3080         vals[0] = 1.;
3081         vals[1] = 1.;
3082         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3083       }
3084       PetscCall(PetscMalloc1(nzs, &array));
3085       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3086       array[nzs - 1] = 1.;
3087       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3088       /* store local idxs for p0 */
3089       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3090       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3091       PetscCall(PetscFree(array));
3092     }
3093     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3094     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3095 
3096     /* project if needed */
3097     if (pcbddc->benign_change_explicit) {
3098       Mat M;
3099 
3100       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3101       PetscCall(MatDestroy(&pcbddc->local_mat));
3102       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3103       PetscCall(MatDestroy(&M));
3104     }
3105     /* store global idxs for p0 */
3106     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3107   }
3108   *zerodiaglocal = zerodiag;
3109   PetscFunctionReturn(PETSC_SUCCESS);
3110 }
3111 
3112 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3113 {
3114   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3115   PetscScalar *array;
3116 
3117   PetscFunctionBegin;
3118   if (!pcbddc->benign_sf) {
3119     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3120     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3121   }
3122   if (get) {
3123     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3124     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3125     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3126     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3127   } else {
3128     PetscCall(VecGetArray(v, &array));
3129     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3130     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3131     PetscCall(VecRestoreArray(v, &array));
3132   }
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3137 {
3138   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3139 
3140   PetscFunctionBegin;
3141   /* TODO: add error checking
3142     - avoid nested pop (or push) calls.
3143     - cannot push before pop.
3144     - cannot call this if pcbddc->local_mat is NULL
3145   */
3146   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3147   if (pop) {
3148     if (pcbddc->benign_change_explicit) {
3149       IS       is_p0;
3150       MatReuse reuse;
3151 
3152       /* extract B_0 */
3153       reuse = MAT_INITIAL_MATRIX;
3154       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3155       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3156       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3157       /* remove rows and cols from local problem */
3158       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3159       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3160       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3161       PetscCall(ISDestroy(&is_p0));
3162     } else {
3163       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3164       PetscScalar *vals;
3165       PetscInt     i, n, *idxs_ins;
3166 
3167       PetscCall(VecGetLocalSize(matis->y, &n));
3168       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3169       if (!pcbddc->benign_B0) {
3170         PetscInt *nnz;
3171         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3172         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3173         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3174         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3175         for (i = 0; i < pcbddc->benign_n; i++) {
3176           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3177           nnz[i] = n - nnz[i];
3178         }
3179         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3180         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3181         PetscCall(PetscFree(nnz));
3182       }
3183 
3184       for (i = 0; i < pcbddc->benign_n; i++) {
3185         PetscScalar *array;
3186         PetscInt    *idxs, j, nz, cum;
3187 
3188         PetscCall(VecSet(matis->x, 0.));
3189         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3190         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3191         for (j = 0; j < nz; j++) vals[j] = 1.;
3192         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3193         PetscCall(VecAssemblyBegin(matis->x));
3194         PetscCall(VecAssemblyEnd(matis->x));
3195         PetscCall(VecSet(matis->y, 0.));
3196         PetscCall(MatMult(matis->A, matis->x, matis->y));
3197         PetscCall(VecGetArray(matis->y, &array));
3198         cum = 0;
3199         for (j = 0; j < n; j++) {
3200           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3201             vals[cum]     = array[j];
3202             idxs_ins[cum] = j;
3203             cum++;
3204           }
3205         }
3206         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3207         PetscCall(VecRestoreArray(matis->y, &array));
3208         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3209       }
3210       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3211       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3212       PetscCall(PetscFree2(idxs_ins, vals));
3213     }
3214   } else { /* push */
3215 
3216     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3217     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3218       PetscScalar *B0_vals;
3219       PetscInt    *B0_cols, B0_ncol;
3220 
3221       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3222       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3223       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3224       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3225       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3226     }
3227     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3228     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3229   }
3230   PetscFunctionReturn(PETSC_SUCCESS);
3231 }
3232 
3233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3234 {
3235   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3236   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3237   PetscBLASInt    B_dummyint, B_neigs, B_ierr, B_lwork;
3238   PetscBLASInt   *B_iwork, *B_ifail;
3239   PetscScalar    *work, lwork;
3240   PetscScalar    *St, *S, *eigv;
3241   PetscScalar    *Sarray, *Starray;
3242   PetscReal      *eigs, thresh, lthresh, uthresh;
3243   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3244   PetscBool       allocated_S_St, upart;
3245 #if defined(PETSC_USE_COMPLEX)
3246   PetscReal *rwork;
3247 #endif
3248 
3249   PetscFunctionBegin;
3250   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3251   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3252   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3253   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3254              sub_schurs->is_posdef);
3255   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3256 
3257   if (pcbddc->dbg_flag) {
3258     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3259     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3260     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3261     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3262     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3263   }
3264 
3265   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3266 
3267   /* max size of subsets */
3268   mss = 0;
3269   for (i = 0; i < sub_schurs->n_subs; i++) {
3270     PetscInt subset_size;
3271 
3272     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3273     mss = PetscMax(mss, subset_size);
3274   }
3275 
3276   /* min/max and threshold */
3277   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3278   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3279   nmax           = PetscMax(nmin, nmax);
3280   allocated_S_St = PETSC_FALSE;
3281   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3282     allocated_S_St = PETSC_TRUE;
3283   }
3284 
3285   /* allocate lapack workspace */
3286   cum = cum2 = 0;
3287   maxneigs   = 0;
3288   for (i = 0; i < sub_schurs->n_subs; i++) {
3289     PetscInt n, subset_size;
3290 
3291     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3292     n = PetscMin(subset_size, nmax);
3293     cum += subset_size;
3294     cum2 += subset_size * n;
3295     maxneigs = PetscMax(maxneigs, n);
3296   }
3297   lwork = 0;
3298   if (mss) {
3299     PetscScalar  sdummy  = 0.;
3300     PetscBLASInt B_itype = 1;
3301     PetscBLASInt B_N, idummy = 0;
3302     PetscReal    rdummy = 0., zero = 0.0;
3303     PetscReal    eps = 0.0; /* dlamch? */
3304 
3305     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3306     PetscCall(PetscBLASIntCast(mss, &B_N));
3307     B_lwork = -1;
3308     /* some implementations may complain about NULL pointers, even if we are querying */
3309     S       = &sdummy;
3310     St      = &sdummy;
3311     eigs    = &rdummy;
3312     eigv    = &sdummy;
3313     B_iwork = &idummy;
3314     B_ifail = &idummy;
3315 #if defined(PETSC_USE_COMPLEX)
3316     rwork = &rdummy;
3317 #endif
3318     thresh = 1.0;
3319     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3320 #if defined(PETSC_USE_COMPLEX)
3321     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3322 #else
3323     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3324 #endif
3325     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3326     PetscCall(PetscFPTrapPop());
3327   }
3328 
3329   nv = 0;
3330   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3331     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3332   }
3333   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3334   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3335   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3336 #if defined(PETSC_USE_COMPLEX)
3337   PetscCall(PetscMalloc1(7 * mss, &rwork));
3338 #endif
3339   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3340                          &pcbddc->adaptive_constraints_data));
3341   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3342 
3343   maxneigs = 0;
3344   cum = cumarray                           = 0;
3345   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3346   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3347   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3348     const PetscInt *idxs;
3349 
3350     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3351     for (cum = 0; cum < nv; cum++) {
3352       pcbddc->adaptive_constraints_n[cum]            = 1;
3353       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3354       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3355       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3356       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3357     }
3358     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3359   }
3360 
3361   if (mss) { /* multilevel */
3362     if (sub_schurs->gdsw) {
3363       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3364       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3365     } else {
3366       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3367       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3368     }
3369   }
3370 
3371   lthresh = pcbddc->adaptive_threshold[0];
3372   uthresh = pcbddc->adaptive_threshold[1];
3373   upart   = pcbddc->use_deluxe_scaling;
3374   for (i = 0; i < sub_schurs->n_subs; i++) {
3375     const PetscInt *idxs;
3376     PetscReal       upper, lower;
3377     PetscInt        j, subset_size, eigs_start = 0;
3378     PetscBLASInt    B_N;
3379     PetscBool       same_data = PETSC_FALSE;
3380     PetscBool       scal      = PETSC_FALSE;
3381 
3382     if (upart) {
3383       upper = PETSC_MAX_REAL;
3384       lower = uthresh;
3385     } else {
3386       if (sub_schurs->gdsw) {
3387         upper = uthresh;
3388         lower = PETSC_MIN_REAL;
3389       } else {
3390         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3391         upper = 1. / uthresh;
3392         lower = 0.;
3393       }
3394     }
3395     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3396     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3397     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3398     /* this is experimental: we assume the dofs have been properly grouped to have
3399        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3400     if (!sub_schurs->is_posdef) {
3401       Mat T;
3402 
3403       for (j = 0; j < subset_size; j++) {
3404         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3405           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3406           PetscCall(MatScale(T, -1.0));
3407           PetscCall(MatDestroy(&T));
3408           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3409           PetscCall(MatScale(T, -1.0));
3410           PetscCall(MatDestroy(&T));
3411           if (sub_schurs->change_primal_sub) {
3412             PetscInt        nz, k;
3413             const PetscInt *idxs;
3414 
3415             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3416             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3417             for (k = 0; k < nz; k++) {
3418               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3419               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3420             }
3421             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3422           }
3423           scal = PETSC_TRUE;
3424           break;
3425         }
3426       }
3427     }
3428 
3429     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3430       if (sub_schurs->is_symmetric) {
3431         PetscInt j, k;
3432         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3433           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3434           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3435         }
3436         for (j = 0; j < subset_size; j++) {
3437           for (k = j; k < subset_size; k++) {
3438             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3439             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3440           }
3441         }
3442       } else {
3443         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3444         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3445       }
3446     } else {
3447       S  = Sarray + cumarray;
3448       St = Starray + cumarray;
3449     }
3450     /* see if we can save some work */
3451     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3452 
3453     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3454       B_neigs = 0;
3455     } else {
3456       PetscBLASInt B_itype = 1;
3457       PetscBLASInt B_IL, B_IU;
3458       PetscReal    eps = -1.0; /* dlamch? */
3459       PetscInt     nmin_s;
3460       PetscBool    compute_range;
3461 
3462       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3463       B_neigs       = 0;
3464       compute_range = (PetscBool)!same_data;
3465       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3466 
3467       if (pcbddc->dbg_flag) {
3468         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3469 
3470         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3471         PetscCall(
3472           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3473       }
3474 
3475       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3476       if (compute_range) {
3477         /* ask for eigenvalues larger than thresh */
3478         if (sub_schurs->is_posdef) {
3479 #if defined(PETSC_USE_COMPLEX)
3480           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3481 #else
3482           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3483 #endif
3484           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3485         } else { /* no theory so far, but it works nicely */
3486           PetscInt  recipe = 0, recipe_m = 1;
3487           PetscReal bb[2];
3488 
3489           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3490           switch (recipe) {
3491           case 0:
3492             if (scal) {
3493               bb[0] = PETSC_MIN_REAL;
3494               bb[1] = lthresh;
3495             } else {
3496               bb[0] = uthresh;
3497               bb[1] = PETSC_MAX_REAL;
3498             }
3499 #if defined(PETSC_USE_COMPLEX)
3500             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3501 #else
3502             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3503 #endif
3504             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3505             break;
3506           case 1:
3507             bb[0] = PETSC_MIN_REAL;
3508             bb[1] = lthresh * lthresh;
3509 #if defined(PETSC_USE_COMPLEX)
3510             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3511 #else
3512             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3513 #endif
3514             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3515             if (!scal) {
3516               PetscBLASInt B_neigs2 = 0;
3517 
3518               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3519               bb[1] = PETSC_MAX_REAL;
3520               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3521               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3522 #if defined(PETSC_USE_COMPLEX)
3523               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3524 #else
3525               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3526 #endif
3527               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3528               B_neigs += B_neigs2;
3529             }
3530             break;
3531           case 2:
3532             if (scal) {
3533               bb[0] = PETSC_MIN_REAL;
3534               bb[1] = 0;
3535 #if defined(PETSC_USE_COMPLEX)
3536               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3537 #else
3538               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3539 #endif
3540               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3541             } else {
3542               PetscBLASInt B_neigs2 = 0;
3543               PetscBool    do_copy  = PETSC_FALSE;
3544 
3545               lthresh = PetscMax(lthresh, 0.0);
3546               if (lthresh > 0.0) {
3547                 bb[0] = PETSC_MIN_REAL;
3548                 bb[1] = lthresh * lthresh;
3549 
3550                 do_copy = PETSC_TRUE;
3551 #if defined(PETSC_USE_COMPLEX)
3552                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3553 #else
3554                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3555 #endif
3556                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3557               }
3558               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3559               bb[1] = PETSC_MAX_REAL;
3560               if (do_copy) {
3561                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3562                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3563               }
3564 #if defined(PETSC_USE_COMPLEX)
3565               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3566 #else
3567               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3568 #endif
3569               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3570               B_neigs += B_neigs2;
3571             }
3572             break;
3573           case 3:
3574             if (scal) {
3575               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3576             } else {
3577               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3578             }
3579             if (!scal) {
3580               bb[0] = uthresh;
3581               bb[1] = PETSC_MAX_REAL;
3582 #if defined(PETSC_USE_COMPLEX)
3583               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3584 #else
3585               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3586 #endif
3587               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3588             }
3589             if (recipe_m > 0 && B_N - B_neigs > 0) {
3590               PetscBLASInt B_neigs2 = 0;
3591 
3592               B_IL = 1;
3593               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3594               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3595               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3596 #if defined(PETSC_USE_COMPLEX)
3597               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3598 #else
3599               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3600 #endif
3601               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3602               B_neigs += B_neigs2;
3603             }
3604             break;
3605           case 4:
3606             bb[0] = PETSC_MIN_REAL;
3607             bb[1] = lthresh;
3608 #if defined(PETSC_USE_COMPLEX)
3609             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3610 #else
3611             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3612 #endif
3613             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3614             {
3615               PetscBLASInt B_neigs2 = 0;
3616 
3617               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3618               bb[1] = PETSC_MAX_REAL;
3619               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3620               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3621 #if defined(PETSC_USE_COMPLEX)
3622               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3623 #else
3624               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3625 #endif
3626               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3627               B_neigs += B_neigs2;
3628             }
3629             break;
3630           case 5: /* same as before: first compute all eigenvalues, then filter */
3631 #if defined(PETSC_USE_COMPLEX)
3632             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3633 #else
3634             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3635 #endif
3636             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3637             {
3638               PetscInt e, k, ne;
3639               for (e = 0, ne = 0; e < B_neigs; e++) {
3640                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3641                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3642                   eigs[ne] = eigs[e];
3643                   ne++;
3644                 }
3645               }
3646               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3647               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3648             }
3649             break;
3650           default:
3651             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3652           }
3653         }
3654       } else if (!same_data) { /* this is just to see all the eigenvalues */
3655         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3656         B_IL = 1;
3657 #if defined(PETSC_USE_COMPLEX)
3658         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3659 #else
3660         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3661 #endif
3662         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3663       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3664         PetscInt k;
3665         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3666         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3667         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3668         nmin = nmax;
3669         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3670         for (k = 0; k < nmax; k++) {
3671           eigs[k]                     = 1. / PETSC_SMALL;
3672           eigv[k * (subset_size + 1)] = 1.0;
3673         }
3674       }
3675       PetscCall(PetscFPTrapPop());
3676       if (B_ierr) {
3677         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3678         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3679         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3680       }
3681 
3682       if (B_neigs > nmax) {
3683         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3684         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3685         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3686       }
3687 
3688       nmin_s = PetscMin(nmin, B_N);
3689       if (B_neigs < nmin_s) {
3690         PetscBLASInt B_neigs2 = 0;
3691 
3692         if (upart) {
3693           if (scal) {
3694             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3695             B_IL = B_neigs + 1;
3696           } else {
3697             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3698             B_IU = B_N - B_neigs;
3699           }
3700         } else {
3701           B_IL = B_neigs + 1;
3702           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3703         }
3704         if (pcbddc->dbg_flag) {
3705           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3706         }
3707         if (sub_schurs->is_symmetric) {
3708           PetscInt j, k;
3709           for (j = 0; j < subset_size; j++) {
3710             for (k = j; k < subset_size; k++) {
3711               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3712               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3713             }
3714           }
3715         } else {
3716           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3717           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3718         }
3719         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3720 #if defined(PETSC_USE_COMPLEX)
3721         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3722 #else
3723         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3724 #endif
3725         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3726         PetscCall(PetscFPTrapPop());
3727         B_neigs += B_neigs2;
3728       }
3729       if (B_ierr) {
3730         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3731         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3732         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3733       }
3734       if (pcbddc->dbg_flag) {
3735         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3736         for (j = 0; j < B_neigs; j++) {
3737           if (!sub_schurs->gdsw) {
3738             if (eigs[j] == 0.0) {
3739               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3740             } else {
3741               if (upart) {
3742                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3743               } else {
3744                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3745               }
3746             }
3747           } else {
3748             double pg = (double)eigs[j + eigs_start];
3749             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3750             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3751           }
3752         }
3753       }
3754     }
3755     /* change the basis back to the original one */
3756     if (sub_schurs->change) {
3757       Mat change, phi, phit;
3758 
3759       if (pcbddc->dbg_flag > 2) {
3760         PetscInt ii;
3761         for (ii = 0; ii < B_neigs; ii++) {
3762           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3763           for (j = 0; j < B_N; j++) {
3764 #if defined(PETSC_USE_COMPLEX)
3765             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3766             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3767             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3768 #else
3769             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3770 #endif
3771           }
3772         }
3773       }
3774       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3775       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3776       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3777       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3778       PetscCall(MatDestroy(&phit));
3779       PetscCall(MatDestroy(&phi));
3780     }
3781     maxneigs                               = PetscMax(B_neigs, maxneigs);
3782     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3783     if (B_neigs) {
3784       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3785 
3786       if (pcbddc->dbg_flag > 1) {
3787         PetscInt ii;
3788         for (ii = 0; ii < B_neigs; ii++) {
3789           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3790           for (j = 0; j < B_N; j++) {
3791 #if defined(PETSC_USE_COMPLEX)
3792             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3793             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3794             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3795 #else
3796             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3797 #endif
3798           }
3799         }
3800       }
3801       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3802       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3803       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3804       cum++;
3805     }
3806     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3807     /* shift for next computation */
3808     cumarray += subset_size * subset_size;
3809   }
3810   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3811 
3812   if (mss) {
3813     if (sub_schurs->gdsw) {
3814       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3815       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3816     } else {
3817       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3818       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3819       /* destroy matrices (junk) */
3820       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3821       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3822     }
3823   }
3824   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3825   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3826 #if defined(PETSC_USE_COMPLEX)
3827   PetscCall(PetscFree(rwork));
3828 #endif
3829   if (pcbddc->dbg_flag) {
3830     PetscInt maxneigs_r;
3831     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3832     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3833   }
3834   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3835   PetscFunctionReturn(PETSC_SUCCESS);
3836 }
3837 
3838 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3839 {
3840   Mat coarse_submat;
3841 
3842   PetscFunctionBegin;
3843   /* Setup local scatters R_to_B and (optionally) R_to_D */
3844   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3845   PetscCall(PCBDDCSetUpLocalScatters(pc));
3846 
3847   /* Setup local neumann solver ksp_R */
3848   /* PCBDDCSetUpLocalScatters should be called first! */
3849   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3850 
3851   /*
3852      Setup local correction and local part of coarse basis.
3853      Gives back the dense local part of the coarse matrix in column major ordering
3854   */
3855   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3856 
3857   /* Compute total number of coarse nodes and setup coarse solver */
3858   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3859   PetscCall(MatDestroy(&coarse_submat));
3860   PetscFunctionReturn(PETSC_SUCCESS);
3861 }
3862 
3863 PetscErrorCode PCBDDCResetCustomization(PC pc)
3864 {
3865   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3866 
3867   PetscFunctionBegin;
3868   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3869   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3870   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3871   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3872   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3873   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3874   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3875   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3876   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3877   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3878   PetscFunctionReturn(PETSC_SUCCESS);
3879 }
3880 
3881 PetscErrorCode PCBDDCResetTopography(PC pc)
3882 {
3883   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3884   PetscInt i;
3885 
3886   PetscFunctionBegin;
3887   PetscCall(MatDestroy(&pcbddc->nedcG));
3888   PetscCall(ISDestroy(&pcbddc->nedclocal));
3889   PetscCall(MatDestroy(&pcbddc->discretegradient));
3890   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3891   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3892   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3893   PetscCall(VecDestroy(&pcbddc->work_change));
3894   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3895   PetscCall(MatDestroy(&pcbddc->divudotp));
3896   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3897   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3898   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3899   pcbddc->n_local_subs = 0;
3900   PetscCall(PetscFree(pcbddc->local_subs));
3901   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3902   pcbddc->graphanalyzed        = PETSC_FALSE;
3903   pcbddc->recompute_topography = PETSC_TRUE;
3904   pcbddc->corner_selected      = PETSC_FALSE;
3905   PetscFunctionReturn(PETSC_SUCCESS);
3906 }
3907 
3908 PetscErrorCode PCBDDCResetSolvers(PC pc)
3909 {
3910   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3911 
3912   PetscFunctionBegin;
3913   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3914   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3915   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3916   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3917   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3918   PetscCall(VecDestroy(&pcbddc->vec1_P));
3919   PetscCall(VecDestroy(&pcbddc->vec1_C));
3920   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3921   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3922   PetscCall(VecDestroy(&pcbddc->vec1_R));
3923   PetscCall(VecDestroy(&pcbddc->vec2_R));
3924   PetscCall(ISDestroy(&pcbddc->is_R_local));
3925   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3926   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3927   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3928   PetscCall(KSPReset(pcbddc->ksp_D));
3929   PetscCall(KSPReset(pcbddc->ksp_R));
3930   PetscCall(KSPReset(pcbddc->coarse_ksp));
3931   PetscCall(MatDestroy(&pcbddc->local_mat));
3932   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3933   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3934   PetscCall(PetscFree(pcbddc->global_primal_indices));
3935   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3936   PetscCall(MatDestroy(&pcbddc->benign_change));
3937   PetscCall(VecDestroy(&pcbddc->benign_vec));
3938   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3939   PetscCall(MatDestroy(&pcbddc->benign_B0));
3940   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3941   if (pcbddc->benign_zerodiag_subs) {
3942     PetscInt i;
3943     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3944     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3945   }
3946   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3947   PetscFunctionReturn(PETSC_SUCCESS);
3948 }
3949 
3950 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3951 {
3952   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3953   PC_IS   *pcis   = (PC_IS *)pc->data;
3954   VecType  impVecType;
3955   PetscInt n_constraints, n_R, old_size;
3956 
3957   PetscFunctionBegin;
3958   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3959   n_R           = pcis->n - pcbddc->n_vertices;
3960   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3961   /* local work vectors (try to avoid unneeded work)*/
3962   /* R nodes */
3963   old_size = -1;
3964   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3965   if (n_R != old_size) {
3966     PetscCall(VecDestroy(&pcbddc->vec1_R));
3967     PetscCall(VecDestroy(&pcbddc->vec2_R));
3968     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3969     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3970     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3971     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3972   }
3973   /* local primal dofs */
3974   old_size = -1;
3975   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3976   if (pcbddc->local_primal_size != old_size) {
3977     PetscCall(VecDestroy(&pcbddc->vec1_P));
3978     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3979     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3980     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3981   }
3982   /* local explicit constraints */
3983   old_size = -1;
3984   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3985   if (n_constraints && n_constraints != old_size) {
3986     PetscCall(VecDestroy(&pcbddc->vec1_C));
3987     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3988     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3989     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3990   }
3991   PetscFunctionReturn(PETSC_SUCCESS);
3992 }
3993 
3994 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
3995 {
3996   PetscBool          flg;
3997   const PetscScalar *a;
3998 
3999   PetscFunctionBegin;
4000   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4001   if (flg) {
4002     PetscCall(MatDenseGetArrayRead(S, &a));
4003     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4004     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4005     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4006     PetscCall(MatDenseRestoreArrayRead(S, &a));
4007   } else {
4008     const PetscInt *ii, *jj;
4009     PetscInt        n;
4010     PetscInt        buf[8192], *bufc = NULL;
4011     PetscBool       freeb = PETSC_FALSE;
4012     Mat             Sm    = S;
4013 
4014     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4015     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4016     else PetscCall(PetscObjectReference((PetscObject)S));
4017     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4018     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4019     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4020     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4021       bufc = buf;
4022     } else {
4023       PetscCall(PetscMalloc1(nc, &bufc));
4024       freeb = PETSC_TRUE;
4025     }
4026 
4027     for (PetscInt i = 0; i < n; i++) {
4028       const PetscInt nci = ii[i + 1] - ii[i];
4029 
4030       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4031       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4032     }
4033     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4034     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4035     PetscCall(MatDestroy(&Sm));
4036     if (freeb) PetscCall(PetscFree(bufc));
4037   }
4038   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4039   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4040   PetscFunctionReturn(PETSC_SUCCESS);
4041 }
4042 
4043 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4044 {
4045   Mat_SeqAIJ        *aij;
4046   PetscInt          *ii, *jj;
4047   PetscScalar       *aa;
4048   PetscInt           nnz = 0, m, nc;
4049   const PetscScalar *a;
4050   const PetscScalar  zero = 0.0;
4051 
4052   PetscFunctionBegin;
4053   PetscCall(MatGetLocalSize(D, &m, &nc));
4054   PetscCall(MatDenseGetArrayRead(D, &a));
4055   PetscCall(PetscMalloc1(m + 1, &ii));
4056   PetscCall(PetscMalloc1(m * nc, &jj));
4057   PetscCall(PetscMalloc1(m * nc, &aa));
4058   ii[0] = 0;
4059   for (PetscInt k = 0; k < m; k++) {
4060     for (PetscInt s = 0; s < nc; s++) {
4061       const PetscInt    c = s + k * nc;
4062       const PetscScalar v = a[k + s * m];
4063 
4064       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4065       jj[nnz] = j[c];
4066       aa[nnz] = a[k + s * m];
4067       nnz++;
4068     }
4069     ii[k + 1] = nnz;
4070   }
4071 
4072   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4073   PetscCall(MatDenseRestoreArrayRead(D, &a));
4074 
4075   aij          = (Mat_SeqAIJ *)(*mat)->data;
4076   aij->free_a  = PETSC_TRUE;
4077   aij->free_ij = PETSC_TRUE;
4078   PetscFunctionReturn(PETSC_SUCCESS);
4079 }
4080 
4081 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4082 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4083 {
4084   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4085   const PetscBool allowzeropivot    = PETSC_FALSE;
4086   PetscBool       zeropivotdetected = PETSC_FALSE;
4087   const PetscReal shift             = 0.0;
4088   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4089   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4090   PetscLogDouble  flops = 0.0;
4091 
4092   PetscFunctionBegin;
4093   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4094   for (PetscInt i = 0; i < nblocks; i++) {
4095     ncnt += bsizes[i];
4096     ncnt2 += PetscSqr(bsizes[i]);
4097   }
4098   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4099   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4100   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4101 
4102   PetscCall(PetscMalloc1(n + 1, &ii));
4103   PetscCall(PetscMalloc1(ncnt2, &jj));
4104   PetscCall(PetscCalloc1(ncnt2, &aa));
4105 
4106   ncnt  = 0;
4107   ii[0] = 0;
4108   indi  = ii;
4109   indj  = jj;
4110   diag  = aa;
4111   for (PetscInt i = 0; i < nblocks; i++) {
4112     const PetscInt bs = bsizes[i];
4113 
4114     for (PetscInt k = 0; k < bs; k++) {
4115       indi[k + 1] = indi[k] + bs;
4116       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4117     }
4118     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4119     switch (bs) {
4120     case 1:
4121       *diag = 1.0 / (*diag);
4122       break;
4123     case 2:
4124       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4125       break;
4126     case 3:
4127       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4128       break;
4129     case 4:
4130       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4131       break;
4132     case 5:
4133       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4134       break;
4135     case 6:
4136       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4137       break;
4138     case 7:
4139       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4140       break;
4141     default:
4142       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4143     }
4144     ncnt += bs;
4145     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4146     diag += bs * bs;
4147     indj += bs * bs;
4148     indi += bs;
4149   }
4150   PetscCall(PetscLogFlops(flops));
4151   PetscCall(PetscFree2(v_work, v_pivots));
4152   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4153   {
4154     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4155     aij->free_a     = PETSC_TRUE;
4156     aij->free_ij    = PETSC_TRUE;
4157   }
4158   PetscFunctionReturn(PETSC_SUCCESS);
4159 }
4160 
4161 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4162 {
4163   const PetscScalar *rarr;
4164   PetscScalar       *larr;
4165   PetscSF            vsf;
4166   PetscInt           n, rld, lld;
4167 
4168   PetscFunctionBegin;
4169   PetscCall(MatGetSize(A, NULL, &n));
4170   PetscCall(MatDenseGetLDA(A, &rld));
4171   PetscCall(MatDenseGetLDA(B, &lld));
4172   PetscCall(MatDenseGetArrayRead(A, &rarr));
4173   PetscCall(MatDenseGetArrayWrite(B, &larr));
4174   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4175   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4176   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4177   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4178   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4179   PetscCall(PetscSFDestroy(&vsf));
4180   PetscFunctionReturn(PETSC_SUCCESS);
4181 }
4182 
4183 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4184 {
4185   PC_IS          *pcis       = (PC_IS *)pc->data;
4186   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4187   PCBDDCGraph     graph      = pcbddc->mat_graph;
4188   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4189   /* submatrices of local problem */
4190   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4191   /* submatrices of local coarse problem */
4192   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4193   /* working matrices */
4194   Mat C_CR;
4195 
4196   /* additional working stuff */
4197   PC              pc_R;
4198   IS              is_R, is_V, is_C;
4199   const PetscInt *idx_V, *idx_C;
4200   Mat             F, Brhs = NULL;
4201   Vec             dummy_vec;
4202   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4203   PetscInt       *idx_V_B;
4204   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4205   PetscInt        n_eff_vertices, n_eff_constraints;
4206   PetscInt        i, n_R, n_D, n_B;
4207   PetscScalar     one = 1.0, m_one = -1.0;
4208 
4209   /* Multi-element support */
4210   PetscBool multi_element = graph->multi_element;
4211   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4212   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4213   IS        is_C_perm = NULL;
4214   PetscInt  n_C_bss = 0, *C_bss = NULL;
4215   Mat       coarse_phi_multi;
4216 
4217   PetscFunctionBegin;
4218   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4219   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4220 
4221   /* Set Non-overlapping dimensions */
4222   n_vertices    = pcbddc->n_vertices;
4223   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4224   n_B           = pcis->n_B;
4225   n_D           = pcis->n - n_B;
4226   n_R           = pcis->n - n_vertices;
4227 
4228   /* vertices in boundary numbering */
4229   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4230   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4231   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4232 
4233   /* these two cases still need to be optimized */
4234   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4235 
4236   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4237   if (multi_element) {
4238     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4239 
4240     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4241     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4242     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4243     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4244     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4245 
4246     /* group vertices and constraints by subdomain id */
4247     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4248     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4249     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4250     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4251 
4252     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4253     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4254     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4255     for (PetscInt i = 0; i < n_vertices; i++) {
4256       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4257 
4258       V_to_eff_V[i] = count_eff[s];
4259       count_eff[s] += 1;
4260     }
4261     for (PetscInt i = 0; i < n_constraints; i++) {
4262       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4263 
4264       C_to_eff_C[i] = count_eff[s];
4265       count_eff[s] += 1;
4266     }
4267 
4268     /* preallocation */
4269     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4270     for (PetscInt i = 0; i < n_vertices; i++) {
4271       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4272 
4273       nnz[i] = count_eff[s] + count_eff[s + 1];
4274     }
4275     for (PetscInt i = 0; i < n_constraints; i++) {
4276       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4277 
4278       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4279     }
4280     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4281     PetscCall(PetscFree(nnz));
4282 
4283     n_eff_vertices    = 0;
4284     n_eff_constraints = 0;
4285     for (PetscInt i = 0; i < n_el; i++) {
4286       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4287       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4288       count_eff[2 * i]     = 0;
4289       count_eff[2 * i + 1] = 0;
4290     }
4291 
4292     const PetscInt *idx;
4293     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4294 
4295     for (PetscInt i = 0; i < n_vertices; i++) {
4296       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4297       const PetscInt s = 2 * e;
4298 
4299       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4300       count_eff[s] += 1;
4301     }
4302     for (PetscInt i = 0; i < n_constraints; i++) {
4303       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4304       const PetscInt s = 2 * e + 1;
4305 
4306       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4307       count_eff[s] += 1;
4308     }
4309 
4310     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4311     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4312     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4313     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4314     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4315     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4316     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4317     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4318 
4319     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4320     for (PetscInt i = 0; i < n_R; i++) {
4321       const PetscInt e = graph->nodes[idx[i]].local_sub;
4322       const PetscInt s = 2 * e;
4323       PetscInt       j;
4324 
4325       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4326       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4327     }
4328     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4329     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4330     for (PetscInt i = 0; i < n_B; i++) {
4331       const PetscInt e = graph->nodes[idx[i]].local_sub;
4332       const PetscInt s = 2 * e;
4333       PetscInt       j;
4334 
4335       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4336       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4337     }
4338     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4339 
4340     /* permutation and blocksizes for block invert of S_CC */
4341     PetscInt *idxp;
4342 
4343     PetscCall(PetscMalloc1(n_constraints, &idxp));
4344     PetscCall(PetscMalloc1(n_el, &C_bss));
4345     n_C_bss = 0;
4346     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4347       const PetscInt nc = count_eff[2 * e + 1];
4348 
4349       if (nc) C_bss[n_C_bss++] = nc;
4350       for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; }
4351       cnt += nc;
4352     }
4353 
4354     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4355 
4356     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4357     PetscCall(PetscFree(count_eff));
4358   } else {
4359     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4360     n_eff_constraints = n_constraints;
4361     n_eff_vertices    = n_vertices;
4362   }
4363 
4364   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4365   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4366   PetscCall(PCSetUp(pc_R));
4367   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4368   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4369   lda_rhs                = n_R;
4370   need_benign_correction = PETSC_FALSE;
4371   if (isLU || isCHOL) {
4372     PetscCall(PCFactorGetMatrix(pc_R, &F));
4373   } else if (sub_schurs && sub_schurs->reuse_solver) {
4374     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4375     MatFactorType      type;
4376 
4377     F = reuse_solver->F;
4378     PetscCall(MatGetFactorType(F, &type));
4379     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4380     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4381     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4382     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4383   } else F = NULL;
4384 
4385   /* determine if we can use a sparse right-hand side */
4386   sparserhs = PETSC_FALSE;
4387   if (F && !multi_element) {
4388     MatSolverType solver;
4389 
4390     PetscCall(MatFactorGetSolverType(F, &solver));
4391     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4392   }
4393 
4394   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4395   dummy_vec = NULL;
4396   if (need_benign_correction && lda_rhs != n_R && F) {
4397     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4398     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4399     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4400   }
4401 
4402   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4403   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4404 
4405   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4406   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4407   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4408   PetscCall(ISGetIndices(is_V, &idx_V));
4409   PetscCall(ISGetIndices(is_C, &idx_C));
4410 
4411   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4412   if (n_constraints) {
4413     Mat C_B;
4414 
4415     /* Extract constraints on R nodes: C_{CR}  */
4416     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4417     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4418 
4419     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4420     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4421     if (!sparserhs) {
4422       PetscScalar *marr;
4423 
4424       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4425       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4426       for (i = 0; i < n_constraints; i++) {
4427         const PetscScalar *row_cmat_values;
4428         const PetscInt    *row_cmat_indices;
4429         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4430 
4431         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4432         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4433         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4434       }
4435       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4436     } else {
4437       Mat tC_CR;
4438 
4439       PetscCall(MatScale(C_CR, -1.0));
4440       if (lda_rhs != n_R) {
4441         PetscScalar *aa;
4442         PetscInt     r, *ii, *jj;
4443         PetscBool    done;
4444 
4445         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4446         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4447         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4448         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4449         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4450         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4451       } else {
4452         PetscCall(PetscObjectReference((PetscObject)C_CR));
4453         tC_CR = C_CR;
4454       }
4455       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4456       PetscCall(MatDestroy(&tC_CR));
4457     }
4458     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4459     if (F) {
4460       if (need_benign_correction) {
4461         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4462 
4463         /* rhs is already zero on interior dofs, no need to change the rhs */
4464         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4465       }
4466       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4467       if (need_benign_correction) {
4468         PetscScalar       *marr;
4469         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4470 
4471         /* XXX multi_element? */
4472         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4473         if (lda_rhs != n_R) {
4474           for (i = 0; i < n_eff_constraints; i++) {
4475             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4476             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4477             PetscCall(VecResetArray(dummy_vec));
4478           }
4479         } else {
4480           for (i = 0; i < n_eff_constraints; i++) {
4481             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4482             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4483             PetscCall(VecResetArray(pcbddc->vec1_R));
4484           }
4485         }
4486         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4487       }
4488     } else {
4489       const PetscScalar *barr;
4490       PetscScalar       *marr;
4491 
4492       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4493       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4494       for (i = 0; i < n_eff_constraints; i++) {
4495         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4496         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4497         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4498         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4499         PetscCall(VecResetArray(pcbddc->vec1_R));
4500         PetscCall(VecResetArray(pcbddc->vec2_R));
4501       }
4502       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4503       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4504     }
4505     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4506     PetscCall(MatDestroy(&Brhs));
4507     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4508     if (!pcbddc->switch_static) {
4509       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4510       for (i = 0; i < n_eff_constraints; i++) {
4511         Vec r, b;
4512         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4513         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4514         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4515         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4516         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4517         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4518       }
4519       if (multi_element) {
4520         Mat T;
4521 
4522         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4523         PetscCall(MatDestroy(&local_auxmat2_R));
4524         local_auxmat2_R = T;
4525         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4526         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4527         pcbddc->local_auxmat2 = T;
4528       }
4529       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4530     } else {
4531       if (multi_element) {
4532         Mat T;
4533 
4534         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4535         PetscCall(MatDestroy(&local_auxmat2_R));
4536         local_auxmat2_R = T;
4537       }
4538       if (lda_rhs != n_R) {
4539         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4540       } else {
4541         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4542         pcbddc->local_auxmat2 = local_auxmat2_R;
4543       }
4544       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4545     }
4546     PetscCall(MatScale(S_CC, m_one));
4547     if (multi_element) {
4548       Mat T, T2;
4549       IS  isp, ispi;
4550 
4551       isp = is_C_perm;
4552 
4553       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4554       PetscCall(MatPermute(S_CC, isp, isp, &T));
4555       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4556       PetscCall(MatDestroy(&T));
4557       PetscCall(MatDestroy(&S_CC));
4558       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4559       PetscCall(MatDestroy(&T2));
4560       PetscCall(ISDestroy(&ispi));
4561     } else {
4562       if (isCHOL) {
4563         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4564       } else {
4565         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4566       }
4567       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4568     }
4569     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4570     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4571     PetscCall(MatDestroy(&C_B));
4572     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4573   }
4574 
4575   /* Get submatrices from subdomain matrix */
4576   if (n_vertices) {
4577 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4578     PetscBool oldpin;
4579 #endif
4580     IS is_aux;
4581 
4582     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4583       IS tis;
4584 
4585       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4586       PetscCall(ISSort(tis));
4587       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4588       PetscCall(ISDestroy(&tis));
4589     } else {
4590       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4591     }
4592 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4593     oldpin = pcbddc->local_mat->boundtocpu;
4594 #endif
4595     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4596     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4597     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4598     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4599     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4600     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4601 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4602     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4603 #endif
4604     PetscCall(ISDestroy(&is_aux));
4605   }
4606   PetscCall(ISDestroy(&is_C_perm));
4607   PetscCall(PetscFree(C_bss));
4608 
4609   p0_lidx_I = NULL;
4610   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4611     const PetscInt *idxs;
4612 
4613     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4614     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4615     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4616     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4617   }
4618 
4619   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4620 
4621   /* Matrices of coarse basis functions (local) */
4622   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4623   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4624   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4625   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4626   if (!multi_element) {
4627     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4628     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4629     coarse_phi_multi = NULL;
4630   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4631     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4632     IS is_cols[2] = {is_V, is_C};
4633 
4634     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4635     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4636     PetscCall(ISDestroy(&is_rows[1]));
4637   }
4638 
4639   /* vertices */
4640   if (n_vertices) {
4641     PetscBool restoreavr = PETSC_FALSE;
4642     Mat       A_RRmA_RV  = NULL;
4643 
4644     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4645     PetscCall(MatDestroy(&A_VV));
4646 
4647     if (n_R) {
4648       Mat A_RV_bcorr = NULL, S_VV;
4649 
4650       PetscCall(MatScale(A_RV, m_one));
4651       if (need_benign_correction) {
4652         ISLocalToGlobalMapping RtoN;
4653         IS                     is_p0;
4654         PetscInt              *idxs_p0, n;
4655 
4656         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4657         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4658         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4659         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4660         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4661         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4662         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4663         PetscCall(ISDestroy(&is_p0));
4664       }
4665 
4666       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4667       if (!sparserhs || need_benign_correction) {
4668         if (lda_rhs == n_R && !multi_element) {
4669           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4670         } else {
4671           Mat             T;
4672           PetscScalar    *av, *array;
4673           const PetscInt *xadj, *adjncy;
4674           PetscInt        n;
4675           PetscBool       flg_row;
4676 
4677           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4678           PetscCall(MatDenseGetArrayWrite(T, &array));
4679           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4680           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4681           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4682           for (i = 0; i < n; i++) {
4683             PetscInt j;
4684             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4685           }
4686           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4687           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4688           PetscCall(MatDestroy(&A_RV));
4689           A_RV = T;
4690         }
4691         if (need_benign_correction) {
4692           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4693           PetscScalar       *marr;
4694 
4695           /* XXX multi_element */
4696           PetscCall(MatDenseGetArray(A_RV, &marr));
4697           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4698 
4699                  | 0 0  0 | (V)
4700              L = | 0 0 -1 | (P-p0)
4701                  | 0 0 -1 | (p0)
4702 
4703           */
4704           for (i = 0; i < reuse_solver->benign_n; i++) {
4705             const PetscScalar *vals;
4706             const PetscInt    *idxs, *idxs_zero;
4707             PetscInt           n, j, nz;
4708 
4709             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4710             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4711             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4712             for (j = 0; j < n; j++) {
4713               PetscScalar val = vals[j];
4714               PetscInt    k, col = idxs[j];
4715               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4716             }
4717             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4718             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4719           }
4720           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4721         }
4722         PetscCall(PetscObjectReference((PetscObject)A_RV));
4723         Brhs = A_RV;
4724       } else {
4725         Mat tA_RVT, A_RVT;
4726 
4727         if (!pcbddc->symmetric_primal) {
4728           /* A_RV already scaled by -1 */
4729           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4730         } else {
4731           restoreavr = PETSC_TRUE;
4732           PetscCall(MatScale(A_VR, -1.0));
4733           PetscCall(PetscObjectReference((PetscObject)A_VR));
4734           A_RVT = A_VR;
4735         }
4736         if (lda_rhs != n_R) {
4737           PetscScalar *aa;
4738           PetscInt     r, *ii, *jj;
4739           PetscBool    done;
4740 
4741           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4742           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4743           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4744           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4745           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4746           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4747         } else {
4748           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4749           tA_RVT = A_RVT;
4750         }
4751         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4752         PetscCall(MatDestroy(&tA_RVT));
4753         PetscCall(MatDestroy(&A_RVT));
4754       }
4755       if (F) {
4756         /* need to correct the rhs */
4757         if (need_benign_correction) {
4758           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4759           PetscScalar       *marr;
4760 
4761           PetscCall(MatDenseGetArray(Brhs, &marr));
4762           if (lda_rhs != n_R) {
4763             for (i = 0; i < n_eff_vertices; i++) {
4764               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4765               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4766               PetscCall(VecResetArray(dummy_vec));
4767             }
4768           } else {
4769             for (i = 0; i < n_eff_vertices; i++) {
4770               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4771               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4772               PetscCall(VecResetArray(pcbddc->vec1_R));
4773             }
4774           }
4775           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4776         }
4777         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4778         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4779         /* need to correct the solution */
4780         if (need_benign_correction) {
4781           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4782           PetscScalar       *marr;
4783 
4784           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4785           if (lda_rhs != n_R) {
4786             for (i = 0; i < n_eff_vertices; i++) {
4787               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4788               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4789               PetscCall(VecResetArray(dummy_vec));
4790             }
4791           } else {
4792             for (i = 0; i < n_eff_vertices; i++) {
4793               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4794               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4795               PetscCall(VecResetArray(pcbddc->vec1_R));
4796             }
4797           }
4798           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4799         }
4800       } else {
4801         const PetscScalar *barr;
4802         PetscScalar       *marr;
4803 
4804         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4805         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4806         for (i = 0; i < n_eff_vertices; i++) {
4807           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4808           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4809           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4810           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4811           PetscCall(VecResetArray(pcbddc->vec1_R));
4812           PetscCall(VecResetArray(pcbddc->vec2_R));
4813         }
4814         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4815         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4816       }
4817       PetscCall(MatDestroy(&A_RV));
4818       PetscCall(MatDestroy(&Brhs));
4819       /* S_VV and S_CV */
4820       if (n_constraints) {
4821         Mat B;
4822 
4823         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4824         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4825 
4826         /* S_CV = pcbddc->local_auxmat1 * B */
4827         if (multi_element) {
4828           Mat T;
4829 
4830           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4831           PetscCall(MatDestroy(&B));
4832           B = T;
4833         }
4834         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4835         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4836         PetscCall(MatProductSetFromOptions(S_CV));
4837         PetscCall(MatProductSymbolic(S_CV));
4838         PetscCall(MatProductNumeric(S_CV));
4839         PetscCall(MatProductClear(S_CV));
4840         PetscCall(MatDestroy(&B));
4841 
4842         /* B = local_auxmat2_R * S_CV */
4843         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4844         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4845         PetscCall(MatProductSetFromOptions(B));
4846         PetscCall(MatProductSymbolic(B));
4847         PetscCall(MatProductNumeric(B));
4848 
4849         PetscCall(MatScale(S_CV, m_one));
4850         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4851 
4852         if (multi_element) {
4853           Mat T;
4854 
4855           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4856           PetscCall(MatDestroy(&A_RRmA_RV));
4857           A_RRmA_RV = T;
4858         }
4859         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4860         PetscCall(MatDestroy(&B));
4861       } else if (multi_element) {
4862         Mat T;
4863 
4864         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4865         PetscCall(MatDestroy(&A_RRmA_RV));
4866         A_RRmA_RV = T;
4867       }
4868 
4869       if (lda_rhs != n_R) {
4870         Mat T;
4871 
4872         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4873         PetscCall(MatDestroy(&A_RRmA_RV));
4874         A_RRmA_RV = T;
4875       }
4876 
4877       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4878       if (need_benign_correction) { /* XXX SPARSE */
4879         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4880         PetscScalar       *sums;
4881         const PetscScalar *marr;
4882 
4883         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4884         PetscCall(PetscMalloc1(n_vertices, &sums));
4885         for (i = 0; i < reuse_solver->benign_n; i++) {
4886           const PetscScalar *vals;
4887           const PetscInt    *idxs, *idxs_zero;
4888           PetscInt           n, j, nz;
4889 
4890           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4891           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4892           for (j = 0; j < n_vertices; j++) {
4893             sums[j] = 0.;
4894             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4895           }
4896           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4897           for (j = 0; j < n; j++) {
4898             PetscScalar val = vals[j];
4899             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4900           }
4901           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4902           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4903         }
4904         PetscCall(PetscFree(sums));
4905         PetscCall(MatDestroy(&A_RV_bcorr));
4906         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4907       }
4908 
4909       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4910       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4911       PetscCall(MatDestroy(&S_VV));
4912     }
4913 
4914     /* coarse basis functions */
4915     if (coarse_phi_multi) {
4916       Mat Vid;
4917 
4918       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4919       PetscCall(MatShift_Basic(Vid, 1.0));
4920       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4921       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4922       PetscCall(MatDestroy(&Vid));
4923     } else {
4924       if (A_RRmA_RV) {
4925         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4926         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4927           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4928           if (pcbddc->benign_n) {
4929             for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4930           }
4931         }
4932       }
4933       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4934       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4935       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4936     }
4937     PetscCall(MatDestroy(&A_RRmA_RV));
4938   }
4939   PetscCall(MatDestroy(&A_RV));
4940   PetscCall(VecDestroy(&dummy_vec));
4941 
4942   if (n_constraints) {
4943     Mat B, B2;
4944 
4945     PetscCall(MatScale(S_CC, m_one));
4946     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4947     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4948     PetscCall(MatProductSetFromOptions(B));
4949     PetscCall(MatProductSymbolic(B));
4950     PetscCall(MatProductNumeric(B));
4951 
4952     if (n_vertices) {
4953       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4954         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4955       } else {
4956         if (lda_rhs != n_R) {
4957           Mat tB;
4958 
4959           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4960           PetscCall(MatDestroy(&B));
4961           B = tB;
4962         }
4963         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
4964       }
4965       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4966     }
4967 
4968     /* coarse basis functions */
4969     if (coarse_phi_multi) {
4970       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4971     } else {
4972       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4973       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4974       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4975       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4976         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4977         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4978         if (pcbddc->benign_n) {
4979           for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4980         }
4981         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
4982       }
4983     }
4984     PetscCall(MatDestroy(&B));
4985   }
4986 
4987   /* assemble sparse coarse basis functions */
4988   if (coarse_phi_multi) {
4989     Mat T;
4990 
4991     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
4992     PetscCall(MatDestroy(&coarse_phi_multi));
4993     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
4994     if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); }
4995     PetscCall(MatDestroy(&T));
4996   }
4997   PetscCall(MatDestroy(&local_auxmat2_R));
4998   PetscCall(PetscFree(p0_lidx_I));
4999 
5000   /* coarse matrix entries relative to B_0 */
5001   if (pcbddc->benign_n) {
5002     Mat                B0_B, B0_BPHI;
5003     IS                 is_dummy;
5004     const PetscScalar *data;
5005     PetscInt           j;
5006 
5007     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5008     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5009     PetscCall(ISDestroy(&is_dummy));
5010     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5011     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5012     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5013     for (j = 0; j < pcbddc->benign_n; j++) {
5014       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5015       for (i = 0; i < pcbddc->local_primal_size; i++) {
5016         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5017         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5018       }
5019     }
5020     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5021     PetscCall(MatDestroy(&B0_B));
5022     PetscCall(MatDestroy(&B0_BPHI));
5023   }
5024 
5025   /* compute other basis functions for non-symmetric problems */
5026   if (!pcbddc->symmetric_primal) {
5027     Mat          B_V = NULL, B_C = NULL;
5028     PetscScalar *marray, *work;
5029 
5030     /* TODO multi_element MatDenseScatter */
5031     if (n_constraints) {
5032       Mat S_CCT, C_CRT;
5033 
5034       PetscCall(MatScale(S_CC, m_one));
5035       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5036       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5037       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5038       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5039       PetscCall(MatDestroy(&S_CCT));
5040       if (n_vertices) {
5041         Mat S_VCT;
5042 
5043         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5044         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5045         PetscCall(MatDestroy(&S_VCT));
5046         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5047       }
5048       PetscCall(MatDestroy(&C_CRT));
5049     } else {
5050       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5051     }
5052     if (n_vertices && n_R) {
5053       PetscScalar    *av, *marray;
5054       const PetscInt *xadj, *adjncy;
5055       PetscInt        n;
5056       PetscBool       flg_row;
5057 
5058       /* B_V = B_V - A_VR^T */
5059       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5060       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5061       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5062       PetscCall(MatDenseGetArray(B_V, &marray));
5063       for (i = 0; i < n; i++) {
5064         PetscInt j;
5065         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5066       }
5067       PetscCall(MatDenseRestoreArray(B_V, &marray));
5068       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5069       PetscCall(MatDestroy(&A_VR));
5070     }
5071 
5072     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5073     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5074     if (n_vertices) {
5075       PetscCall(MatDenseGetArray(B_V, &marray));
5076       for (i = 0; i < n_vertices; i++) {
5077         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5078         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5079         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5080         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5081         PetscCall(VecResetArray(pcbddc->vec1_R));
5082         PetscCall(VecResetArray(pcbddc->vec2_R));
5083       }
5084       PetscCall(MatDenseRestoreArray(B_V, &marray));
5085     }
5086     if (B_C) {
5087       PetscCall(MatDenseGetArray(B_C, &marray));
5088       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5089         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5090         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5091         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5092         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5093         PetscCall(VecResetArray(pcbddc->vec1_R));
5094         PetscCall(VecResetArray(pcbddc->vec2_R));
5095       }
5096       PetscCall(MatDenseRestoreArray(B_C, &marray));
5097     }
5098     /* coarse basis functions */
5099     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5100     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5101     for (i = 0; i < pcbddc->local_primal_size; i++) {
5102       Vec v;
5103 
5104       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5105       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5106       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5107       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5108       if (i < n_vertices) {
5109         PetscScalar one = 1.0;
5110         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5111         PetscCall(VecAssemblyBegin(v));
5112         PetscCall(VecAssemblyEnd(v));
5113       }
5114       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5115 
5116       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5117         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5118         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5119         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5120         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5121       }
5122       PetscCall(VecResetArray(pcbddc->vec1_R));
5123     }
5124     PetscCall(MatDestroy(&B_V));
5125     PetscCall(MatDestroy(&B_C));
5126     PetscCall(PetscFree(work));
5127   } else {
5128     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5129     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5130     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5131     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5132   }
5133   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5134   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5135 
5136   /* free memory */
5137   PetscCall(PetscFree(V_to_eff_V));
5138   PetscCall(PetscFree(C_to_eff_C));
5139   PetscCall(PetscFree(R_eff_V_J));
5140   PetscCall(PetscFree(R_eff_C_J));
5141   PetscCall(PetscFree(B_eff_V_J));
5142   PetscCall(PetscFree(B_eff_C_J));
5143   PetscCall(ISDestroy(&is_R));
5144   PetscCall(ISRestoreIndices(is_V, &idx_V));
5145   PetscCall(ISRestoreIndices(is_C, &idx_C));
5146   PetscCall(ISDestroy(&is_V));
5147   PetscCall(ISDestroy(&is_C));
5148   PetscCall(PetscFree(idx_V_B));
5149   PetscCall(MatDestroy(&S_CV));
5150   PetscCall(MatDestroy(&S_VC));
5151   PetscCall(MatDestroy(&S_CC));
5152   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5153   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5154   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5155 
5156   /* Checking coarse_sub_mat and coarse basis functions */
5157   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5158   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5159   if (pcbddc->dbg_flag) {
5160     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5161     Mat       coarse_phi_D, coarse_phi_B;
5162     Mat       coarse_psi_D, coarse_psi_B;
5163     Mat       A_II, A_BB, A_IB, A_BI;
5164     Mat       C_B, CPHI;
5165     IS        is_dummy;
5166     Vec       mones;
5167     MatType   checkmattype = MATSEQAIJ;
5168     PetscReal real_value;
5169 
5170     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5171       Mat A;
5172       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5173       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5174       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5175       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5176       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5177       PetscCall(MatDestroy(&A));
5178     } else {
5179       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5180       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5181       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5182       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5183     }
5184     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5185     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5186     if (!pcbddc->symmetric_primal) {
5187       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5188       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5189     }
5190     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5191     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5192     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5193     if (!pcbddc->symmetric_primal) {
5194       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5195       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5196       PetscCall(MatDestroy(&AUXMAT));
5197       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5198       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5199       PetscCall(MatDestroy(&AUXMAT));
5200       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5201       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5202       PetscCall(MatDestroy(&AUXMAT));
5203       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5204       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5205       PetscCall(MatDestroy(&AUXMAT));
5206     } else {
5207       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5208       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5209       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5210       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5211       PetscCall(MatDestroy(&AUXMAT));
5212       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5213       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5214       PetscCall(MatDestroy(&AUXMAT));
5215     }
5216     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5217     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5218     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5219     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5220     if (pcbddc->benign_n) {
5221       Mat                B0_B, B0_BPHI;
5222       const PetscScalar *data2;
5223       PetscScalar       *data;
5224       PetscInt           j;
5225 
5226       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5227       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5228       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5229       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5230       PetscCall(MatDenseGetArray(TM1, &data));
5231       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5232       for (j = 0; j < pcbddc->benign_n; j++) {
5233         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5234         for (i = 0; i < pcbddc->local_primal_size; i++) {
5235           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5236           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5237         }
5238       }
5239       PetscCall(MatDenseRestoreArray(TM1, &data));
5240       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5241       PetscCall(MatDestroy(&B0_B));
5242       PetscCall(ISDestroy(&is_dummy));
5243       PetscCall(MatDestroy(&B0_BPHI));
5244     }
5245     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5246     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5247     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5248     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5249 
5250     /* check constraints */
5251     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5252     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5253     if (!pcbddc->benign_n) { /* TODO: add benign case */
5254       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5255     } else {
5256       PetscScalar *data;
5257       Mat          tmat;
5258       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5259       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5260       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5261       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5262       PetscCall(MatDestroy(&tmat));
5263     }
5264     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5265     PetscCall(VecSet(mones, -1.0));
5266     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5267     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5268     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5269     if (!pcbddc->symmetric_primal) {
5270       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5271       PetscCall(VecSet(mones, -1.0));
5272       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5273       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5274       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5275     }
5276     PetscCall(MatDestroy(&C_B));
5277     PetscCall(MatDestroy(&CPHI));
5278     PetscCall(ISDestroy(&is_dummy));
5279     PetscCall(VecDestroy(&mones));
5280     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5281     PetscCall(MatDestroy(&A_II));
5282     PetscCall(MatDestroy(&A_BB));
5283     PetscCall(MatDestroy(&A_IB));
5284     PetscCall(MatDestroy(&A_BI));
5285     PetscCall(MatDestroy(&TM1));
5286     PetscCall(MatDestroy(&TM2));
5287     PetscCall(MatDestroy(&TM3));
5288     PetscCall(MatDestroy(&TM4));
5289     PetscCall(MatDestroy(&coarse_phi_D));
5290     PetscCall(MatDestroy(&coarse_phi_B));
5291     if (!pcbddc->symmetric_primal) {
5292       PetscCall(MatDestroy(&coarse_psi_D));
5293       PetscCall(MatDestroy(&coarse_psi_B));
5294     }
5295   }
5296 
5297 #if 0
5298   {
5299     PetscViewer viewer;
5300     char filename[256];
5301 
5302     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5303     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5304     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5305     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5306     PetscCall(MatView(*coarse_submat,viewer));
5307     if (pcbddc->coarse_phi_B) {
5308       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5309       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5310     }
5311     if (pcbddc->coarse_phi_D) {
5312       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5313       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5314     }
5315     if (pcbddc->coarse_psi_B) {
5316       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5317       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5318     }
5319     if (pcbddc->coarse_psi_D) {
5320       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5321       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5322     }
5323     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5324     PetscCall(MatView(pcbddc->local_mat,viewer));
5325     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5326     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5327     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5328     PetscCall(ISView(pcis->is_I_local,viewer));
5329     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5330     PetscCall(ISView(pcis->is_B_local,viewer));
5331     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5332     PetscCall(ISView(pcbddc->is_R_local,viewer));
5333     PetscCall(PetscViewerDestroy(&viewer));
5334   }
5335 #endif
5336 
5337   /* device support */
5338   {
5339     PetscBool iscuda, iship, iskokkos;
5340     MatType   mtype = NULL;
5341 
5342     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5343     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5344     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5345     if (iskokkos) {
5346       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5347       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5348     }
5349     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5350     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5351     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5352     if (mtype) {
5353       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5354       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5355       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5356       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5357       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5358       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5359     }
5360   }
5361   PetscFunctionReturn(PETSC_SUCCESS);
5362 }
5363 
5364 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5365 {
5366   Mat      *work_mat;
5367   IS        isrow_s, iscol_s;
5368   PetscBool rsorted, csorted;
5369   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5370 
5371   PetscFunctionBegin;
5372   PetscCall(ISSorted(isrow, &rsorted));
5373   PetscCall(ISSorted(iscol, &csorted));
5374   PetscCall(ISGetLocalSize(isrow, &rsize));
5375   PetscCall(ISGetLocalSize(iscol, &csize));
5376 
5377   if (!rsorted) {
5378     const PetscInt *idxs;
5379     PetscInt       *idxs_sorted, i;
5380 
5381     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5382     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5383     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5384     PetscCall(ISGetIndices(isrow, &idxs));
5385     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5386     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5387     PetscCall(ISRestoreIndices(isrow, &idxs));
5388     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5389   } else {
5390     PetscCall(PetscObjectReference((PetscObject)isrow));
5391     isrow_s = isrow;
5392   }
5393 
5394   if (!csorted) {
5395     if (isrow == iscol) {
5396       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5397       iscol_s = isrow_s;
5398     } else {
5399       const PetscInt *idxs;
5400       PetscInt       *idxs_sorted, i;
5401 
5402       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5403       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5404       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5405       PetscCall(ISGetIndices(iscol, &idxs));
5406       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5407       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5408       PetscCall(ISRestoreIndices(iscol, &idxs));
5409       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5410     }
5411   } else {
5412     PetscCall(PetscObjectReference((PetscObject)iscol));
5413     iscol_s = iscol;
5414   }
5415 
5416   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5417 
5418   if (!rsorted || !csorted) {
5419     Mat new_mat;
5420     IS  is_perm_r, is_perm_c;
5421 
5422     if (!rsorted) {
5423       PetscInt *idxs_r, i;
5424       PetscCall(PetscMalloc1(rsize, &idxs_r));
5425       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5426       PetscCall(PetscFree(idxs_perm_r));
5427       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5428     } else {
5429       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5430     }
5431     PetscCall(ISSetPermutation(is_perm_r));
5432 
5433     if (!csorted) {
5434       if (isrow_s == iscol_s) {
5435         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5436         is_perm_c = is_perm_r;
5437       } else {
5438         PetscInt *idxs_c, i;
5439         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5440         PetscCall(PetscMalloc1(csize, &idxs_c));
5441         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5442         PetscCall(PetscFree(idxs_perm_c));
5443         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5444       }
5445     } else {
5446       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5447     }
5448     PetscCall(ISSetPermutation(is_perm_c));
5449 
5450     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5451     PetscCall(MatDestroy(&work_mat[0]));
5452     work_mat[0] = new_mat;
5453     PetscCall(ISDestroy(&is_perm_r));
5454     PetscCall(ISDestroy(&is_perm_c));
5455   }
5456 
5457   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5458   *B = work_mat[0];
5459   PetscCall(MatDestroyMatrices(1, &work_mat));
5460   PetscCall(ISDestroy(&isrow_s));
5461   PetscCall(ISDestroy(&iscol_s));
5462   PetscFunctionReturn(PETSC_SUCCESS);
5463 }
5464 
5465 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5466 {
5467   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5468   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5469   Mat       new_mat, lA;
5470   IS        is_local, is_global;
5471   PetscInt  local_size;
5472   PetscBool isseqaij, issym, isset;
5473 
5474   PetscFunctionBegin;
5475   PetscCall(MatDestroy(&pcbddc->local_mat));
5476   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5477   if (pcbddc->mat_graph->multi_element) {
5478     Mat     *mats, *bdiags;
5479     IS      *gsubs;
5480     PetscInt nsubs = pcbddc->n_local_subs;
5481 
5482     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5483     PetscCall(PetscMalloc1(nsubs, &gsubs));
5484     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5485     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5486     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5487     PetscCall(PetscFree(gsubs));
5488 
5489     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5490     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5491     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5492     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5493     PetscCall(PetscFree(mats));
5494   } else {
5495     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5496     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5497     PetscCall(ISDestroy(&is_local));
5498     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5499     PetscCall(ISDestroy(&is_global));
5500   }
5501   if (pcbddc->dbg_flag) {
5502     Vec       x, x_change;
5503     PetscReal error;
5504 
5505     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5506     PetscCall(VecSetRandom(x, NULL));
5507     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5508     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5509     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5510     PetscCall(MatMult(new_mat, matis->x, matis->y));
5511     if (!pcbddc->change_interior) {
5512       const PetscScalar *x, *y, *v;
5513       PetscReal          lerror = 0.;
5514       PetscInt           i;
5515 
5516       PetscCall(VecGetArrayRead(matis->x, &x));
5517       PetscCall(VecGetArrayRead(matis->y, &y));
5518       PetscCall(VecGetArrayRead(matis->counter, &v));
5519       for (i = 0; i < local_size; i++)
5520         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5521       PetscCall(VecRestoreArrayRead(matis->x, &x));
5522       PetscCall(VecRestoreArrayRead(matis->y, &y));
5523       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5524       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5525       if (error > PETSC_SMALL) {
5526         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5527           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5528         } else {
5529           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5530         }
5531       }
5532     }
5533     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5534     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5535     PetscCall(VecAXPY(x, -1.0, x_change));
5536     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5537     if (error > PETSC_SMALL) {
5538       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5539         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5540       } else {
5541         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5542       }
5543     }
5544     PetscCall(VecDestroy(&x));
5545     PetscCall(VecDestroy(&x_change));
5546   }
5547 
5548   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5549   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5550 
5551   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5552   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5553   if (isseqaij) {
5554     PetscCall(MatDestroy(&pcbddc->local_mat));
5555     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5556     if (lA) {
5557       Mat work;
5558       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5559       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5560       PetscCall(MatDestroy(&work));
5561     }
5562   } else {
5563     Mat work_mat;
5564 
5565     PetscCall(MatDestroy(&pcbddc->local_mat));
5566     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5567     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5568     PetscCall(MatDestroy(&work_mat));
5569     if (lA) {
5570       Mat work;
5571       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5572       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5573       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5574       PetscCall(MatDestroy(&work));
5575     }
5576   }
5577   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5578   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5579   PetscCall(MatDestroy(&new_mat));
5580   PetscFunctionReturn(PETSC_SUCCESS);
5581 }
5582 
5583 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5584 {
5585   PC_IS          *pcis        = (PC_IS *)pc->data;
5586   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5587   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5588   PetscInt       *idx_R_local = NULL;
5589   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5590   PetscInt        vbs, bs;
5591   PetscBT         bitmask = NULL;
5592 
5593   PetscFunctionBegin;
5594   /*
5595     No need to setup local scatters if
5596       - primal space is unchanged
5597         AND
5598       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5599         AND
5600       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5601   */
5602   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5603   /* destroy old objects */
5604   PetscCall(ISDestroy(&pcbddc->is_R_local));
5605   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5606   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5607   /* Set Non-overlapping dimensions */
5608   n_B        = pcis->n_B;
5609   n_D        = pcis->n - n_B;
5610   n_vertices = pcbddc->n_vertices;
5611 
5612   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5613 
5614   /* create auxiliary bitmask and allocate workspace */
5615   if (!sub_schurs || !sub_schurs->reuse_solver) {
5616     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5617     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5618     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5619 
5620     for (i = 0, n_R = 0; i < pcis->n; i++) {
5621       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5622     }
5623   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5624     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5625 
5626     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5627     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5628   }
5629 
5630   /* Block code */
5631   vbs = 1;
5632   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5633   if (bs > 1 && !(n_vertices % bs)) {
5634     PetscBool is_blocked = PETSC_TRUE;
5635     PetscInt *vary;
5636     if (!sub_schurs || !sub_schurs->reuse_solver) {
5637       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5638       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5639       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5640       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5641       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5642       for (i = 0; i < pcis->n / bs; i++) {
5643         if (vary[i] != 0 && vary[i] != bs) {
5644           is_blocked = PETSC_FALSE;
5645           break;
5646         }
5647       }
5648       PetscCall(PetscFree(vary));
5649     } else {
5650       /* Verify directly the R set */
5651       for (i = 0; i < n_R / bs; i++) {
5652         PetscInt j, node = idx_R_local[bs * i];
5653         for (j = 1; j < bs; j++) {
5654           if (node != idx_R_local[bs * i + j] - j) {
5655             is_blocked = PETSC_FALSE;
5656             break;
5657           }
5658         }
5659       }
5660     }
5661     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5662       vbs = bs;
5663       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5664     }
5665   }
5666   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5667   if (sub_schurs && sub_schurs->reuse_solver) {
5668     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5669 
5670     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5671     PetscCall(ISDestroy(&reuse_solver->is_R));
5672     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5673     reuse_solver->is_R = pcbddc->is_R_local;
5674   } else {
5675     PetscCall(PetscFree(idx_R_local));
5676   }
5677 
5678   /* print some info if requested */
5679   if (pcbddc->dbg_flag) {
5680     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5681     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5682     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5683     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5684     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5685     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5686                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5687     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5688   }
5689 
5690   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5691   if (!sub_schurs || !sub_schurs->reuse_solver) {
5692     IS        is_aux1, is_aux2;
5693     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5694 
5695     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5696     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5697     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5698     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5699     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5700     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5701     for (i = 0, j = 0; i < n_R; i++) {
5702       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5703     }
5704     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5705     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5706     for (i = 0, j = 0; i < n_B; i++) {
5707       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5708     }
5709     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5710     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5711     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5712     PetscCall(ISDestroy(&is_aux1));
5713     PetscCall(ISDestroy(&is_aux2));
5714 
5715     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5716       PetscCall(PetscMalloc1(n_D, &aux_array1));
5717       for (i = 0, j = 0; i < n_R; i++) {
5718         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5719       }
5720       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5721       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5722       PetscCall(ISDestroy(&is_aux1));
5723     }
5724     PetscCall(PetscBTDestroy(&bitmask));
5725     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5726   } else {
5727     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5728     IS                 tis;
5729     PetscInt           schur_size;
5730 
5731     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5732     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5733     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5734     PetscCall(ISDestroy(&tis));
5735     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5736       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5737       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5738       PetscCall(ISDestroy(&tis));
5739     }
5740   }
5741   PetscFunctionReturn(PETSC_SUCCESS);
5742 }
5743 
5744 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5745 {
5746   MatNullSpace NullSpace;
5747   Mat          dmat;
5748   const Vec   *nullvecs;
5749   Vec          v, v2, *nullvecs2;
5750   VecScatter   sct = NULL;
5751   PetscScalar *ddata;
5752   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5753   PetscBool    nnsp_has_cnst;
5754 
5755   PetscFunctionBegin;
5756   if (!is && !B) { /* MATIS */
5757     Mat_IS *matis = (Mat_IS *)A->data;
5758 
5759     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5760     sct = matis->cctx;
5761     PetscCall(PetscObjectReference((PetscObject)sct));
5762   } else {
5763     PetscCall(MatGetNullSpace(B, &NullSpace));
5764     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5765     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5766   }
5767   PetscCall(MatGetNullSpace(A, &NullSpace));
5768   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5769   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5770 
5771   PetscCall(MatCreateVecs(A, &v, NULL));
5772   PetscCall(MatCreateVecs(B, &v2, NULL));
5773   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5774   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5775   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5776   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5777   PetscCall(VecGetBlockSize(v2, &bs));
5778   PetscCall(VecGetSize(v2, &N));
5779   PetscCall(VecGetLocalSize(v2, &n));
5780   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5781   for (k = 0; k < nnsp_size; k++) {
5782     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5783     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5784     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5785   }
5786   if (nnsp_has_cnst) {
5787     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5788     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5789   }
5790   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5791   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5792 
5793   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5794   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5795   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5796   PetscCall(MatDestroy(&dmat));
5797 
5798   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5799   PetscCall(PetscFree(nullvecs2));
5800   PetscCall(MatSetNearNullSpace(B, NullSpace));
5801   PetscCall(MatNullSpaceDestroy(&NullSpace));
5802   PetscCall(VecDestroy(&v));
5803   PetscCall(VecDestroy(&v2));
5804   PetscCall(VecScatterDestroy(&sct));
5805   PetscFunctionReturn(PETSC_SUCCESS);
5806 }
5807 
5808 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5809 {
5810   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5811   PC_IS       *pcis   = (PC_IS *)pc->data;
5812   PC           pc_temp;
5813   Mat          A_RR;
5814   MatNullSpace nnsp;
5815   MatReuse     reuse;
5816   PetscScalar  m_one = -1.0;
5817   PetscReal    value;
5818   PetscInt     n_D, n_R;
5819   PetscBool    issbaij, opts, isset, issym;
5820   PetscBool    f = PETSC_FALSE;
5821   char         dir_prefix[256], neu_prefix[256], str_level[16];
5822   size_t       len;
5823 
5824   PetscFunctionBegin;
5825   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5826   /* approximate solver, propagate NearNullSpace if needed */
5827   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5828     MatNullSpace gnnsp1, gnnsp2;
5829     PetscBool    lhas, ghas;
5830 
5831     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5832     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5833     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5834     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5835     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5836     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5837   }
5838 
5839   /* compute prefixes */
5840   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5841   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5842   if (!pcbddc->current_level) {
5843     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5844     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5845     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5846     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5847   } else {
5848     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5849     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5850     len -= 15;                                /* remove "pc_bddc_coarse_" */
5851     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5852     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5853     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5854     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5855     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5856     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5857     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5858     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5859     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5860   }
5861 
5862   /* DIRICHLET PROBLEM */
5863   if (dirichlet) {
5864     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5865     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5866       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5867       if (pcbddc->dbg_flag) {
5868         Mat A_IIn;
5869 
5870         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5871         PetscCall(MatDestroy(&pcis->A_II));
5872         pcis->A_II = A_IIn;
5873       }
5874     }
5875     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5876     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5877 
5878     /* Matrix for Dirichlet problem is pcis->A_II */
5879     n_D  = pcis->n - pcis->n_B;
5880     opts = PETSC_FALSE;
5881     if (!pcbddc->ksp_D) { /* create object if not yet build */
5882       opts = PETSC_TRUE;
5883       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5884       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5885       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5886       /* default */
5887       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5888       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5889       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5890       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5891       if (issbaij) {
5892         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5893       } else {
5894         PetscCall(PCSetType(pc_temp, PCLU));
5895       }
5896       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5897     }
5898     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5899     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5900     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5901     /* Allow user's customization */
5902     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5903     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5904     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5905       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5906     }
5907     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5908     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5909     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5910     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5911       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5912       const PetscInt *idxs;
5913       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5914 
5915       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5916       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5917       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5918       for (i = 0; i < nl; i++) {
5919         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5920       }
5921       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5922       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5923       PetscCall(PetscFree(scoords));
5924     }
5925     if (sub_schurs && sub_schurs->reuse_solver) {
5926       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5927 
5928       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5929     }
5930 
5931     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5932     if (!n_D) {
5933       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5934       PetscCall(PCSetType(pc_temp, PCNONE));
5935     }
5936     PetscCall(KSPSetUp(pcbddc->ksp_D));
5937     /* set ksp_D into pcis data */
5938     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5939     PetscCall(KSPDestroy(&pcis->ksp_D));
5940     pcis->ksp_D = pcbddc->ksp_D;
5941   }
5942 
5943   /* NEUMANN PROBLEM */
5944   A_RR = NULL;
5945   if (neumann) {
5946     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5947     PetscInt        ibs, mbs;
5948     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5949     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5950 
5951     reuse_neumann_solver = PETSC_FALSE;
5952     if (sub_schurs && sub_schurs->reuse_solver) {
5953       IS iP;
5954 
5955       reuse_neumann_solver = PETSC_TRUE;
5956       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5957       if (iP) reuse_neumann_solver = PETSC_FALSE;
5958     }
5959     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5960     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5961     if (pcbddc->ksp_R) { /* already created ksp */
5962       PetscInt nn_R;
5963       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5964       PetscCall(PetscObjectReference((PetscObject)A_RR));
5965       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5966       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5967         PetscCall(KSPReset(pcbddc->ksp_R));
5968         PetscCall(MatDestroy(&A_RR));
5969         reuse = MAT_INITIAL_MATRIX;
5970       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5971         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5972           PetscCall(MatDestroy(&A_RR));
5973           reuse = MAT_INITIAL_MATRIX;
5974         } else { /* safe to reuse the matrix */
5975           reuse = MAT_REUSE_MATRIX;
5976         }
5977       }
5978       /* last check */
5979       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5980         PetscCall(MatDestroy(&A_RR));
5981         reuse = MAT_INITIAL_MATRIX;
5982       }
5983     } else { /* first time, so we need to create the matrix */
5984       reuse = MAT_INITIAL_MATRIX;
5985     }
5986     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5987        TODO: Get Rid of these conversions */
5988     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5989     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5990     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5991     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5992       if (matis->A == pcbddc->local_mat) {
5993         PetscCall(MatDestroy(&pcbddc->local_mat));
5994         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5995       } else {
5996         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5997       }
5998     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
5999       if (matis->A == pcbddc->local_mat) {
6000         PetscCall(MatDestroy(&pcbddc->local_mat));
6001         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6002       } else {
6003         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6004       }
6005     }
6006     /* extract A_RR */
6007     if (reuse_neumann_solver) {
6008       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6009 
6010       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6011         PetscCall(MatDestroy(&A_RR));
6012         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6013           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6014         } else {
6015           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6016         }
6017       } else {
6018         PetscCall(MatDestroy(&A_RR));
6019         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6020         PetscCall(PetscObjectReference((PetscObject)A_RR));
6021       }
6022     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6023       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6024     }
6025     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6026     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6027     opts = PETSC_FALSE;
6028     if (!pcbddc->ksp_R) { /* create object if not present */
6029       opts = PETSC_TRUE;
6030       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6031       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6032       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6033       /* default */
6034       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6035       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6036       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6037       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6038       if (issbaij) {
6039         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6040       } else {
6041         PetscCall(PCSetType(pc_temp, PCLU));
6042       }
6043       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6044     }
6045     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6046     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6047     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6048     if (opts) { /* Allow user's customization once */
6049       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6050     }
6051     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6052     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6053       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6054     }
6055     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6056     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6057     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6058     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6059       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6060       const PetscInt *idxs;
6061       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6062 
6063       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6064       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6065       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6066       for (i = 0; i < nl; i++) {
6067         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6068       }
6069       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6070       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6071       PetscCall(PetscFree(scoords));
6072     }
6073 
6074     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6075     if (!n_R) {
6076       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6077       PetscCall(PCSetType(pc_temp, PCNONE));
6078     }
6079     /* Reuse solver if it is present */
6080     if (reuse_neumann_solver) {
6081       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6082 
6083       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6084     }
6085     PetscCall(KSPSetUp(pcbddc->ksp_R));
6086   }
6087 
6088   if (pcbddc->dbg_flag) {
6089     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6090     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6091     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6092   }
6093   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6094 
6095   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6096   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6097   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6098   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6099   /* check Dirichlet and Neumann solvers */
6100   if (pcbddc->dbg_flag) {
6101     if (dirichlet) { /* Dirichlet */
6102       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6103       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6104       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6105       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6106       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6107       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6108       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6109       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6110     }
6111     if (neumann) { /* Neumann */
6112       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6113       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6114       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6115       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6116       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6117       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6118       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6119       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6120     }
6121   }
6122   /* free Neumann problem's matrix */
6123   PetscCall(MatDestroy(&A_RR));
6124   PetscFunctionReturn(PETSC_SUCCESS);
6125 }
6126 
6127 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6128 {
6129   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6130   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6131   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6132 
6133   PetscFunctionBegin;
6134   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6135   if (!pcbddc->switch_static) {
6136     if (applytranspose && pcbddc->local_auxmat1) {
6137       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6138       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6139     }
6140     if (!reuse_solver) {
6141       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6142       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6143     } else {
6144       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6145 
6146       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6147       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6148     }
6149   } else {
6150     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6151     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6152     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6153     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6154     if (applytranspose && pcbddc->local_auxmat1) {
6155       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6156       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6157       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6158       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6159     }
6160   }
6161   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6162   if (!reuse_solver || pcbddc->switch_static) {
6163     if (applytranspose) {
6164       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6165     } else {
6166       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6167     }
6168     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6169   } else {
6170     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6171 
6172     if (applytranspose) {
6173       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6174     } else {
6175       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6176     }
6177   }
6178   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6179   PetscCall(VecSet(inout_B, 0.));
6180   if (!pcbddc->switch_static) {
6181     if (!reuse_solver) {
6182       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6183       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6184     } else {
6185       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6186 
6187       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6188       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6189     }
6190     if (!applytranspose && pcbddc->local_auxmat1) {
6191       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6192       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6193     }
6194   } else {
6195     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6196     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6197     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6198     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6199     if (!applytranspose && pcbddc->local_auxmat1) {
6200       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6201       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6202     }
6203     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6204     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6205     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6206     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6207   }
6208   PetscFunctionReturn(PETSC_SUCCESS);
6209 }
6210 
6211 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6212 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6213 {
6214   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6215   PC_IS            *pcis   = (PC_IS *)pc->data;
6216   const PetscScalar zero   = 0.0;
6217 
6218   PetscFunctionBegin;
6219   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6220   if (!pcbddc->benign_apply_coarse_only) {
6221     if (applytranspose) {
6222       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6223       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6224     } else {
6225       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6226       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6227     }
6228   } else {
6229     PetscCall(VecSet(pcbddc->vec1_P, zero));
6230   }
6231 
6232   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6233   if (pcbddc->benign_n) {
6234     PetscScalar *array;
6235     PetscInt     j;
6236 
6237     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6238     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6239     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6240   }
6241 
6242   /* start communications from local primal nodes to rhs of coarse solver */
6243   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6244   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6245   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6246 
6247   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6248   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6249   if (pcbddc->coarse_ksp) {
6250     Mat          coarse_mat;
6251     Vec          rhs, sol;
6252     MatNullSpace nullsp;
6253     PetscBool    isbddc = PETSC_FALSE;
6254 
6255     if (pcbddc->benign_have_null) {
6256       PC coarse_pc;
6257 
6258       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6259       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6260       /* we need to propagate to coarser levels the need for a possible benign correction */
6261       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6262         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6263         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6264         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6265       }
6266     }
6267     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6268     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6269     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6270     if (applytranspose) {
6271       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6272       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6273       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6274       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6275       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6276     } else {
6277       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6278       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6279         PC coarse_pc;
6280 
6281         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6282         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6283         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6284         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6285         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6286       } else {
6287         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6288         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6289         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6290       }
6291     }
6292     /* we don't need the benign correction at coarser levels anymore */
6293     if (pcbddc->benign_have_null && isbddc) {
6294       PC       coarse_pc;
6295       PC_BDDC *coarsepcbddc;
6296 
6297       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6298       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6299       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6300       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6301     }
6302   }
6303   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6304 
6305   /* Local solution on R nodes */
6306   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6307   /* communications from coarse sol to local primal nodes */
6308   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6309   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6310 
6311   /* Sum contributions from the two levels */
6312   if (!pcbddc->benign_apply_coarse_only) {
6313     if (applytranspose) {
6314       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6315       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6316     } else {
6317       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6318       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6319     }
6320     /* store p0 */
6321     if (pcbddc->benign_n) {
6322       PetscScalar *array;
6323       PetscInt     j;
6324 
6325       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6326       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6327       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6328     }
6329   } else { /* expand the coarse solution */
6330     if (applytranspose) {
6331       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6332     } else {
6333       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6334     }
6335   }
6336   PetscFunctionReturn(PETSC_SUCCESS);
6337 }
6338 
6339 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6340 {
6341   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6342   Vec                from, to;
6343   const PetscScalar *array;
6344 
6345   PetscFunctionBegin;
6346   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6347     from = pcbddc->coarse_vec;
6348     to   = pcbddc->vec1_P;
6349     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6350       Vec tvec;
6351 
6352       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6353       PetscCall(VecResetArray(tvec));
6354       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6355       PetscCall(VecGetArrayRead(tvec, &array));
6356       PetscCall(VecPlaceArray(from, array));
6357       PetscCall(VecRestoreArrayRead(tvec, &array));
6358     }
6359   } else { /* from local to global -> put data in coarse right-hand side */
6360     from = pcbddc->vec1_P;
6361     to   = pcbddc->coarse_vec;
6362   }
6363   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6364   PetscFunctionReturn(PETSC_SUCCESS);
6365 }
6366 
6367 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6368 {
6369   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6370   Vec                from, to;
6371   const PetscScalar *array;
6372 
6373   PetscFunctionBegin;
6374   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6375     from = pcbddc->coarse_vec;
6376     to   = pcbddc->vec1_P;
6377   } else { /* from local to global -> put data in coarse right-hand side */
6378     from = pcbddc->vec1_P;
6379     to   = pcbddc->coarse_vec;
6380   }
6381   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6382   if (smode == SCATTER_FORWARD) {
6383     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6384       Vec tvec;
6385 
6386       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6387       PetscCall(VecGetArrayRead(to, &array));
6388       PetscCall(VecPlaceArray(tvec, array));
6389       PetscCall(VecRestoreArrayRead(to, &array));
6390     }
6391   } else {
6392     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6393       PetscCall(VecResetArray(from));
6394     }
6395   }
6396   PetscFunctionReturn(PETSC_SUCCESS);
6397 }
6398 
6399 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6400 {
6401   PC_IS   *pcis   = (PC_IS *)pc->data;
6402   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6403   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6404   /* one and zero */
6405   PetscScalar one = 1.0, zero = 0.0;
6406   /* space to store constraints and their local indices */
6407   PetscScalar *constraints_data;
6408   PetscInt    *constraints_idxs, *constraints_idxs_B;
6409   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6410   PetscInt    *constraints_n;
6411   /* iterators */
6412   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6413   /* BLAS integers */
6414   PetscBLASInt lwork, lierr;
6415   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6416   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6417   /* reuse */
6418   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6419   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6420   /* change of basis */
6421   PetscBool qr_needed;
6422   PetscBT   change_basis, qr_needed_idx;
6423   /* auxiliary stuff */
6424   PetscInt *nnz, *is_indices;
6425   PetscInt  ncc;
6426   /* some quantities */
6427   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6428   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6429   PetscReal tol; /* tolerance for retaining eigenmodes */
6430 
6431   PetscFunctionBegin;
6432   tol = PetscSqrtReal(PETSC_SMALL);
6433   /* Destroy Mat objects computed previously */
6434   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6435   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6436   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6437   /* save info on constraints from previous setup (if any) */
6438   olocal_primal_size    = pcbddc->local_primal_size;
6439   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6440   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6441   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6442   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6443   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6444   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6445 
6446   if (!pcbddc->adaptive_selection) {
6447     IS           ISForVertices, *ISForFaces, *ISForEdges;
6448     MatNullSpace nearnullsp;
6449     const Vec   *nearnullvecs;
6450     Vec         *localnearnullsp;
6451     PetscScalar *array;
6452     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6453     PetscBool    nnsp_has_cnst;
6454     /* LAPACK working arrays for SVD or POD */
6455     PetscBool    skip_lapack, boolforchange;
6456     PetscScalar *work;
6457     PetscReal   *singular_vals;
6458 #if defined(PETSC_USE_COMPLEX)
6459     PetscReal *rwork;
6460 #endif
6461     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6462     PetscBLASInt dummy_int    = 1;
6463     PetscScalar  dummy_scalar = 1.;
6464     PetscBool    use_pod      = PETSC_FALSE;
6465 
6466     /* MKL SVD with same input gives different results on different processes! */
6467 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6468     use_pod = PETSC_TRUE;
6469 #endif
6470     /* Get index sets for faces, edges and vertices from graph */
6471     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6472     o_nf       = n_ISForFaces;
6473     o_ne       = n_ISForEdges;
6474     n_vertices = 0;
6475     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6476     /* print some info */
6477     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6478       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6479       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6480       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6481       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6482       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6483       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6484       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6485       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6486       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6487     }
6488 
6489     if (!pcbddc->use_vertices) n_vertices = 0;
6490     if (!pcbddc->use_edges) n_ISForEdges = 0;
6491     if (!pcbddc->use_faces) n_ISForFaces = 0;
6492 
6493     /* check if near null space is attached to global mat */
6494     if (pcbddc->use_nnsp) {
6495       PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6496     } else nearnullsp = NULL;
6497 
6498     if (nearnullsp) {
6499       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6500       /* remove any stored info */
6501       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6502       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6503       /* store information for BDDC solver reuse */
6504       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6505       pcbddc->onearnullspace = nearnullsp;
6506       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6507       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6508     } else { /* if near null space is not provided BDDC uses constants by default */
6509       nnsp_size     = 0;
6510       nnsp_has_cnst = PETSC_TRUE;
6511     }
6512     /* get max number of constraints on a single cc */
6513     max_constraints = nnsp_size;
6514     if (nnsp_has_cnst) max_constraints++;
6515 
6516     /*
6517          Evaluate maximum storage size needed by the procedure
6518          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6519          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6520          There can be multiple constraints per connected component
6521                                                                                                                                                            */
6522     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6523     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6524 
6525     total_counts = n_ISForFaces + n_ISForEdges;
6526     total_counts *= max_constraints;
6527     total_counts += n_vertices;
6528     PetscCall(PetscBTCreate(total_counts, &change_basis));
6529 
6530     total_counts           = 0;
6531     max_size_of_constraint = 0;
6532     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6533       IS used_is;
6534       if (i < n_ISForEdges) {
6535         used_is = ISForEdges[i];
6536       } else {
6537         used_is = ISForFaces[i - n_ISForEdges];
6538       }
6539       PetscCall(ISGetSize(used_is, &j));
6540       total_counts += j;
6541       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6542     }
6543     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6544 
6545     /* get local part of global near null space vectors */
6546     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6547     for (k = 0; k < nnsp_size; k++) {
6548       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6549       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6550       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6551     }
6552 
6553     /* whether or not to skip lapack calls */
6554     skip_lapack = PETSC_TRUE;
6555     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6556 
6557     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6558     if (!skip_lapack) {
6559       PetscScalar temp_work;
6560 
6561       if (use_pod) {
6562         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6563         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6564         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6565         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6566 #if defined(PETSC_USE_COMPLEX)
6567         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6568 #endif
6569         /* now we evaluate the optimal workspace using query with lwork=-1 */
6570         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6571         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6572         lwork = -1;
6573         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6574 #if !defined(PETSC_USE_COMPLEX)
6575         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6576 #else
6577         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6578 #endif
6579         PetscCall(PetscFPTrapPop());
6580         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6581       } else {
6582 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6583         /* SVD */
6584         PetscInt max_n, min_n;
6585         max_n = max_size_of_constraint;
6586         min_n = max_constraints;
6587         if (max_size_of_constraint < max_constraints) {
6588           min_n = max_size_of_constraint;
6589           max_n = max_constraints;
6590         }
6591         PetscCall(PetscMalloc1(min_n, &singular_vals));
6592   #if defined(PETSC_USE_COMPLEX)
6593         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6594   #endif
6595         /* now we evaluate the optimal workspace using query with lwork=-1 */
6596         lwork = -1;
6597         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6598         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6599         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6600         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6601   #if !defined(PETSC_USE_COMPLEX)
6602         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6603   #else
6604         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6605   #endif
6606         PetscCall(PetscFPTrapPop());
6607         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6608 #else
6609         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6610 #endif /* on missing GESVD */
6611       }
6612       /* Allocate optimal workspace */
6613       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6614       PetscCall(PetscMalloc1(lwork, &work));
6615     }
6616     /* Now we can loop on constraining sets */
6617     total_counts            = 0;
6618     constraints_idxs_ptr[0] = 0;
6619     constraints_data_ptr[0] = 0;
6620     /* vertices */
6621     if (n_vertices) {
6622       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6623       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6624       for (i = 0; i < n_vertices; i++) {
6625         constraints_n[total_counts]            = 1;
6626         constraints_data[total_counts]         = 1.0;
6627         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6628         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6629         total_counts++;
6630       }
6631       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6632     }
6633 
6634     /* edges and faces */
6635     total_counts_cc = total_counts;
6636     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6637       IS        used_is;
6638       PetscBool idxs_copied = PETSC_FALSE;
6639 
6640       if (ncc < n_ISForEdges) {
6641         used_is       = ISForEdges[ncc];
6642         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6643       } else {
6644         used_is       = ISForFaces[ncc - n_ISForEdges];
6645         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6646       }
6647       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6648 
6649       PetscCall(ISGetSize(used_is, &size_of_constraint));
6650       if (!size_of_constraint) continue;
6651       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6652       if (nnsp_has_cnst) {
6653         PetscScalar quad_value;
6654 
6655         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6656         idxs_copied = PETSC_TRUE;
6657 
6658         if (!pcbddc->use_nnsp_true) {
6659           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6660         } else {
6661           quad_value = 1.0;
6662         }
6663         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6664         temp_constraints++;
6665         total_counts++;
6666       }
6667       for (k = 0; k < nnsp_size; k++) {
6668         PetscReal    real_value;
6669         PetscScalar *ptr_to_data;
6670 
6671         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6672         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6673         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6674         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6675         /* check if array is null on the connected component */
6676         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6677         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6678         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6679           temp_constraints++;
6680           total_counts++;
6681           if (!idxs_copied) {
6682             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6683             idxs_copied = PETSC_TRUE;
6684           }
6685         }
6686       }
6687       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6688       valid_constraints = temp_constraints;
6689       if (!pcbddc->use_nnsp_true && temp_constraints) {
6690         if (temp_constraints == 1) { /* just normalize the constraint */
6691           PetscScalar norm, *ptr_to_data;
6692 
6693           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6694           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6695           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6696           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6697           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6698         } else { /* perform SVD */
6699           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6700 
6701           if (use_pod) {
6702             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6703                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6704                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6705                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6706                   from that computed using LAPACKgesvd
6707                -> This is due to a different computation of eigenvectors in LAPACKheev
6708                -> The quality of the POD-computed basis will be the same */
6709             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6710             /* Store upper triangular part of correlation matrix */
6711             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6712             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6713             for (j = 0; j < temp_constraints; j++) {
6714               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6715             }
6716             /* compute eigenvalues and eigenvectors of correlation matrix */
6717             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6718             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6719 #if !defined(PETSC_USE_COMPLEX)
6720             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6721 #else
6722             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6723 #endif
6724             PetscCall(PetscFPTrapPop());
6725             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6726             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6727             j = 0;
6728             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6729             total_counts      = total_counts - j;
6730             valid_constraints = temp_constraints - j;
6731             /* scale and copy POD basis into used quadrature memory */
6732             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6733             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6734             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6735             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6736             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6737             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6738             if (j < temp_constraints) {
6739               PetscInt ii;
6740               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6741               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6742               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6743               PetscCall(PetscFPTrapPop());
6744               for (k = 0; k < temp_constraints - j; k++) {
6745                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6746               }
6747             }
6748           } else {
6749 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6750             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6751             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6752             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6753             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6754   #if !defined(PETSC_USE_COMPLEX)
6755             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6756   #else
6757             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6758   #endif
6759             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6760             PetscCall(PetscFPTrapPop());
6761             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6762             k = temp_constraints;
6763             if (k > size_of_constraint) k = size_of_constraint;
6764             j = 0;
6765             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6766             valid_constraints = k - j;
6767             total_counts      = total_counts - temp_constraints + valid_constraints;
6768 #else
6769             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6770 #endif /* on missing GESVD */
6771           }
6772         }
6773       }
6774       /* update pointers information */
6775       if (valid_constraints) {
6776         constraints_n[total_counts_cc]            = valid_constraints;
6777         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6778         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6779         /* set change_of_basis flag */
6780         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6781         total_counts_cc++;
6782       }
6783     }
6784     /* free workspace */
6785     if (!skip_lapack) {
6786       PetscCall(PetscFree(work));
6787 #if defined(PETSC_USE_COMPLEX)
6788       PetscCall(PetscFree(rwork));
6789 #endif
6790       PetscCall(PetscFree(singular_vals));
6791       PetscCall(PetscFree(correlation_mat));
6792       PetscCall(PetscFree(temp_basis));
6793     }
6794     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6795     PetscCall(PetscFree(localnearnullsp));
6796     /* free index sets of faces, edges and vertices */
6797     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6798   } else {
6799     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6800 
6801     total_counts = 0;
6802     n_vertices   = 0;
6803     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6804     max_constraints = 0;
6805     total_counts_cc = 0;
6806     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6807       total_counts += pcbddc->adaptive_constraints_n[i];
6808       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6809       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6810     }
6811     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6812     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6813     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6814     constraints_data     = pcbddc->adaptive_constraints_data;
6815     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6816     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6817     total_counts_cc = 0;
6818     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6819       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6820     }
6821 
6822     max_size_of_constraint = 0;
6823     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6824     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6825     /* Change of basis */
6826     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6827     if (pcbddc->use_change_of_basis) {
6828       for (i = 0; i < sub_schurs->n_subs; i++) {
6829         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6830       }
6831     }
6832   }
6833   pcbddc->local_primal_size = total_counts;
6834   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6835 
6836   /* map constraints_idxs in boundary numbering */
6837   if (pcbddc->use_change_of_basis) {
6838     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6839     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6840   }
6841 
6842   /* Create constraint matrix */
6843   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6844   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6845   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6846 
6847   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6848   /* determine if a QR strategy is needed for change of basis */
6849   qr_needed = pcbddc->use_qr_single;
6850   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6851   total_primal_vertices        = 0;
6852   pcbddc->local_primal_size_cc = 0;
6853   for (i = 0; i < total_counts_cc; i++) {
6854     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6855     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6856       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6857       pcbddc->local_primal_size_cc += 1;
6858     } else if (PetscBTLookup(change_basis, i)) {
6859       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6860       pcbddc->local_primal_size_cc += constraints_n[i];
6861       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6862         PetscCall(PetscBTSet(qr_needed_idx, i));
6863         qr_needed = PETSC_TRUE;
6864       }
6865     } else {
6866       pcbddc->local_primal_size_cc += 1;
6867     }
6868   }
6869   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6870   pcbddc->n_vertices = total_primal_vertices;
6871   /* permute indices in order to have a sorted set of vertices */
6872   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6873   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6874   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6875   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6876 
6877   /* nonzero structure of constraint matrix */
6878   /* and get reference dof for local constraints */
6879   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6880   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6881 
6882   j            = total_primal_vertices;
6883   total_counts = total_primal_vertices;
6884   cum          = total_primal_vertices;
6885   for (i = n_vertices; i < total_counts_cc; i++) {
6886     if (!PetscBTLookup(change_basis, i)) {
6887       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6888       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6889       cum++;
6890       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6891       for (k = 0; k < constraints_n[i]; k++) {
6892         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6893         nnz[j + k]                                        = size_of_constraint;
6894       }
6895       j += constraints_n[i];
6896     }
6897   }
6898   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6899   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6900   PetscCall(PetscFree(nnz));
6901 
6902   /* set values in constraint matrix */
6903   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6904   total_counts = total_primal_vertices;
6905   for (i = n_vertices; i < total_counts_cc; i++) {
6906     if (!PetscBTLookup(change_basis, i)) {
6907       PetscInt *cols;
6908 
6909       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6910       cols               = constraints_idxs + constraints_idxs_ptr[i];
6911       for (k = 0; k < constraints_n[i]; k++) {
6912         PetscInt     row = total_counts + k;
6913         PetscScalar *vals;
6914 
6915         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6916         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6917       }
6918       total_counts += constraints_n[i];
6919     }
6920   }
6921   /* assembling */
6922   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6923   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6924   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6925 
6926   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6927   if (pcbddc->use_change_of_basis) {
6928     /* dual and primal dofs on a single cc */
6929     PetscInt dual_dofs, primal_dofs;
6930     /* working stuff for GEQRF */
6931     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6932     PetscBLASInt lqr_work;
6933     /* working stuff for UNGQR */
6934     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6935     PetscBLASInt lgqr_work;
6936     /* working stuff for TRTRS */
6937     PetscScalar *trs_rhs = NULL;
6938     PetscBLASInt Blas_NRHS;
6939     /* pointers for values insertion into change of basis matrix */
6940     PetscInt    *start_rows, *start_cols;
6941     PetscScalar *start_vals;
6942     /* working stuff for values insertion */
6943     PetscBT   is_primal;
6944     PetscInt *aux_primal_numbering_B;
6945     /* matrix sizes */
6946     PetscInt global_size, local_size;
6947     /* temporary change of basis */
6948     Mat localChangeOfBasisMatrix;
6949     /* extra space for debugging */
6950     PetscScalar *dbg_work = NULL;
6951 
6952     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6953     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6954     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6955     /* nonzeros for local mat */
6956     PetscCall(PetscMalloc1(pcis->n, &nnz));
6957     if (!pcbddc->benign_change || pcbddc->fake_change) {
6958       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6959     } else {
6960       const PetscInt *ii;
6961       PetscInt        n;
6962       PetscBool       flg_row;
6963       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6964       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6965       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6966     }
6967     for (i = n_vertices; i < total_counts_cc; i++) {
6968       if (PetscBTLookup(change_basis, i)) {
6969         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6970         if (PetscBTLookup(qr_needed_idx, i)) {
6971           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6972         } else {
6973           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6974           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6975         }
6976       }
6977     }
6978     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6979     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6980     PetscCall(PetscFree(nnz));
6981     /* Set interior change in the matrix */
6982     if (!pcbddc->benign_change || pcbddc->fake_change) {
6983       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6984     } else {
6985       const PetscInt *ii, *jj;
6986       PetscScalar    *aa;
6987       PetscInt        n;
6988       PetscBool       flg_row;
6989       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6990       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6991       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6992       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6993       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6994     }
6995 
6996     if (pcbddc->dbg_flag) {
6997       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6998       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6999     }
7000 
7001     /* Now we loop on the constraints which need a change of basis */
7002     /*
7003        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7004        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7005 
7006        Basic blocks of change of basis matrix T computed:
7007 
7008           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7009 
7010             | 1        0   ...        0         s_1/S |
7011             | 0        1   ...        0         s_2/S |
7012             |              ...                        |
7013             | 0        ...            1     s_{n-1}/S |
7014             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7015 
7016             with S = \sum_{i=1}^n s_i^2
7017             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7018                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7019 
7020           - QR decomposition of constraints otherwise
7021     */
7022     if (qr_needed && max_size_of_constraint) {
7023       /* space to store Q */
7024       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7025       /* array to store scaling factors for reflectors */
7026       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7027       /* first we issue queries for optimal work */
7028       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7029       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7030       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7031       lqr_work = -1;
7032       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7033       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7034       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7035       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7036       lgqr_work = -1;
7037       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7038       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7039       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7040       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7041       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7042       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7043       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7044       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7045       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7046       /* array to store rhs and solution of triangular solver */
7047       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7048       /* allocating workspace for check */
7049       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7050     }
7051     /* array to store whether a node is primal or not */
7052     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7053     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7054     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7055     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7056     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7057     PetscCall(PetscFree(aux_primal_numbering_B));
7058 
7059     /* loop on constraints and see whether or not they need a change of basis and compute it */
7060     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7061       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7062       if (PetscBTLookup(change_basis, total_counts)) {
7063         /* get constraint info */
7064         primal_dofs = constraints_n[total_counts];
7065         dual_dofs   = size_of_constraint - primal_dofs;
7066 
7067         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7068 
7069         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7070 
7071           /* copy quadrature constraints for change of basis check */
7072           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7073           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7074           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7075 
7076           /* compute QR decomposition of constraints */
7077           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7078           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7079           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7080           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7081           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7082           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7083           PetscCall(PetscFPTrapPop());
7084 
7085           /* explicitly compute R^-T */
7086           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7087           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7088           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7089           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7090           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7091           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7092           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7093           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7094           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7095           PetscCall(PetscFPTrapPop());
7096 
7097           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7098           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7099           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7100           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7101           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7102           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7103           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7104           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7105           PetscCall(PetscFPTrapPop());
7106 
7107           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7108              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7109              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7110           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7111           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7112           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7113           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7114           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7115           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7116           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7117           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7118           PetscCall(PetscFPTrapPop());
7119           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7120 
7121           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7122           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7123           /* insert cols for primal dofs */
7124           for (j = 0; j < primal_dofs; j++) {
7125             start_vals = &qr_basis[j * size_of_constraint];
7126             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7127             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7128           }
7129           /* insert cols for dual dofs */
7130           for (j = 0, k = 0; j < dual_dofs; k++) {
7131             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7132               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7133               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7134               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7135               j++;
7136             }
7137           }
7138 
7139           /* check change of basis */
7140           if (pcbddc->dbg_flag) {
7141             PetscInt  ii, jj;
7142             PetscBool valid_qr = PETSC_TRUE;
7143             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7144             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7145             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7146             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7147             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7148             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7149             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7150             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7151             PetscCall(PetscFPTrapPop());
7152             for (jj = 0; jj < size_of_constraint; jj++) {
7153               for (ii = 0; ii < primal_dofs; ii++) {
7154                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7155                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7156               }
7157             }
7158             if (!valid_qr) {
7159               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7160               for (jj = 0; jj < size_of_constraint; jj++) {
7161                 for (ii = 0; ii < primal_dofs; ii++) {
7162                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7163                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7164                   }
7165                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7166                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7167                   }
7168                 }
7169               }
7170             } else {
7171               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7172             }
7173           }
7174         } else { /* simple transformation block */
7175           PetscInt    row, col;
7176           PetscScalar val, norm;
7177 
7178           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7179           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7180           for (j = 0; j < size_of_constraint; j++) {
7181             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7182             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7183             if (!PetscBTLookup(is_primal, row_B)) {
7184               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7185               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7186               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7187             } else {
7188               for (k = 0; k < size_of_constraint; k++) {
7189                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7190                 if (row != col) {
7191                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7192                 } else {
7193                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7194                 }
7195                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7196               }
7197             }
7198           }
7199           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7200         }
7201       } else {
7202         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7203       }
7204     }
7205 
7206     /* free workspace */
7207     if (qr_needed) {
7208       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7209       PetscCall(PetscFree(trs_rhs));
7210       PetscCall(PetscFree(qr_tau));
7211       PetscCall(PetscFree(qr_work));
7212       PetscCall(PetscFree(gqr_work));
7213       PetscCall(PetscFree(qr_basis));
7214     }
7215     PetscCall(PetscBTDestroy(&is_primal));
7216     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7217     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7218 
7219     /* assembling of global change of variable */
7220     if (!pcbddc->fake_change) {
7221       Mat      tmat;
7222       PetscInt bs;
7223 
7224       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7225       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7226       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7227       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7228       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7229       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7230       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
7231       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
7232       PetscCall(MatGetBlockSize(pc->pmat, &bs));
7233       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
7234       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
7235       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
7236       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7237       PetscCall(MatDestroy(&tmat));
7238       PetscCall(VecSet(pcis->vec1_global, 0.0));
7239       PetscCall(VecSet(pcis->vec1_N, 1.0));
7240       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7241       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7242       PetscCall(VecReciprocal(pcis->vec1_global));
7243       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7244 
7245       /* check */
7246       if (pcbddc->dbg_flag) {
7247         PetscReal error;
7248         Vec       x, x_change;
7249 
7250         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7251         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7252         PetscCall(VecSetRandom(x, NULL));
7253         PetscCall(VecCopy(x, pcis->vec1_global));
7254         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7255         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7256         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7257         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7258         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7259         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7260         PetscCall(VecAXPY(x, -1.0, x_change));
7261         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7262         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7263         PetscCall(VecDestroy(&x));
7264         PetscCall(VecDestroy(&x_change));
7265       }
7266       /* adapt sub_schurs computed (if any) */
7267       if (pcbddc->use_deluxe_scaling) {
7268         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7269 
7270         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7271         if (sub_schurs && sub_schurs->S_Ej_all) {
7272           Mat S_new, tmat;
7273           IS  is_all_N, is_V_Sall = NULL;
7274 
7275           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7276           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7277           if (pcbddc->deluxe_zerorows) {
7278             ISLocalToGlobalMapping NtoSall;
7279             IS                     is_V;
7280             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7281             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7282             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7283             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7284             PetscCall(ISDestroy(&is_V));
7285           }
7286           PetscCall(ISDestroy(&is_all_N));
7287           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7288           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7289           PetscCall(PetscObjectReference((PetscObject)S_new));
7290           if (pcbddc->deluxe_zerorows) {
7291             const PetscScalar *array;
7292             const PetscInt    *idxs_V, *idxs_all;
7293             PetscInt           i, n_V;
7294 
7295             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7296             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7297             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7298             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7299             PetscCall(VecGetArrayRead(pcis->D, &array));
7300             for (i = 0; i < n_V; i++) {
7301               PetscScalar val;
7302               PetscInt    idx;
7303 
7304               idx = idxs_V[i];
7305               val = array[idxs_all[idxs_V[i]]];
7306               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7307             }
7308             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7309             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7310             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7311             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7312             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7313           }
7314           sub_schurs->S_Ej_all = S_new;
7315           PetscCall(MatDestroy(&S_new));
7316           if (sub_schurs->sum_S_Ej_all) {
7317             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7318             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7319             PetscCall(PetscObjectReference((PetscObject)S_new));
7320             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7321             sub_schurs->sum_S_Ej_all = S_new;
7322             PetscCall(MatDestroy(&S_new));
7323           }
7324           PetscCall(ISDestroy(&is_V_Sall));
7325           PetscCall(MatDestroy(&tmat));
7326         }
7327         /* destroy any change of basis context in sub_schurs */
7328         if (sub_schurs && sub_schurs->change) {
7329           PetscInt i;
7330 
7331           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7332           PetscCall(PetscFree(sub_schurs->change));
7333         }
7334       }
7335       if (pcbddc->switch_static) { /* need to save the local change */
7336         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7337       } else {
7338         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7339       }
7340       /* determine if any process has changed the pressures locally */
7341       pcbddc->change_interior = pcbddc->benign_have_null;
7342     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7343       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7344       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7345       pcbddc->use_qr_single    = qr_needed;
7346     }
7347   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7348     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7349       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7350       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7351     } else {
7352       Mat benign_global = NULL;
7353       if (pcbddc->benign_have_null) {
7354         Mat M;
7355 
7356         pcbddc->change_interior = PETSC_TRUE;
7357         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7358         PetscCall(VecReciprocal(pcis->vec1_N));
7359         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7360         if (pcbddc->benign_change) {
7361           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7362           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7363         } else {
7364           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7365           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7366         }
7367         PetscCall(MatISSetLocalMat(benign_global, M));
7368         PetscCall(MatDestroy(&M));
7369         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7370         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7371       }
7372       if (pcbddc->user_ChangeOfBasisMatrix) {
7373         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7374         PetscCall(MatDestroy(&benign_global));
7375       } else if (pcbddc->benign_have_null) {
7376         pcbddc->ChangeOfBasisMatrix = benign_global;
7377       }
7378     }
7379     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7380       IS              is_global;
7381       const PetscInt *gidxs;
7382 
7383       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7384       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7385       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7386       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7387       PetscCall(ISDestroy(&is_global));
7388     }
7389   }
7390   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7391 
7392   if (!pcbddc->fake_change) {
7393     /* add pressure dofs to set of primal nodes for numbering purposes */
7394     for (i = 0; i < pcbddc->benign_n; i++) {
7395       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7396       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7397       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7398       pcbddc->local_primal_size_cc++;
7399       pcbddc->local_primal_size++;
7400     }
7401 
7402     /* check if a new primal space has been introduced (also take into account benign trick) */
7403     pcbddc->new_primal_space_local = PETSC_TRUE;
7404     if (olocal_primal_size == pcbddc->local_primal_size) {
7405       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7406       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7407       if (!pcbddc->new_primal_space_local) {
7408         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7409         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7410       }
7411     }
7412     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7413     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7414   }
7415   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7416 
7417   /* flush dbg viewer */
7418   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7419 
7420   /* free workspace */
7421   PetscCall(PetscBTDestroy(&qr_needed_idx));
7422   PetscCall(PetscBTDestroy(&change_basis));
7423   if (!pcbddc->adaptive_selection) {
7424     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7425     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7426   } else {
7427     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7428     PetscCall(PetscFree(constraints_n));
7429     PetscCall(PetscFree(constraints_idxs_B));
7430   }
7431   PetscFunctionReturn(PETSC_SUCCESS);
7432 }
7433 
7434 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7435 {
7436   ISLocalToGlobalMapping map;
7437   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7438   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7439   PetscInt               i, N;
7440   PetscBool              rcsr = PETSC_FALSE;
7441 
7442   PetscFunctionBegin;
7443   if (pcbddc->recompute_topography) {
7444     pcbddc->graphanalyzed = PETSC_FALSE;
7445     /* Reset previously computed graph */
7446     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7447     /* Init local Graph struct */
7448     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7449     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7450     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7451 
7452     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7453     /* Check validity of the csr graph passed in by the user */
7454     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7455                pcbddc->mat_graph->nvtxs);
7456 
7457     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7458     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7459       PetscInt *xadj, *adjncy;
7460       PetscInt  nvtxs;
7461       PetscBool flg_row;
7462       Mat       A;
7463 
7464       PetscCall(PetscObjectReference((PetscObject)matis->A));
7465       A = matis->A;
7466       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7467         Mat AtA;
7468 
7469         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7470         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7471         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7472         PetscCall(MatProductSetFromOptions(AtA));
7473         PetscCall(MatProductSymbolic(AtA));
7474         PetscCall(MatProductClear(AtA));
7475         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7476         AtA->assembled = PETSC_TRUE;
7477         PetscCall(MatDestroy(&A));
7478         A = AtA;
7479       }
7480       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7481       if (flg_row) {
7482         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7483         pcbddc->computed_rowadj = PETSC_TRUE;
7484         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7485         rcsr = PETSC_TRUE;
7486       }
7487       PetscCall(MatDestroy(&A));
7488     }
7489     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7490 
7491     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7492       PetscReal   *lcoords;
7493       PetscInt     n;
7494       MPI_Datatype dimrealtype;
7495       PetscMPIInt  cdimi;
7496 
7497       /* TODO: support for blocked */
7498       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7499       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7500       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7501       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7502       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7503       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7504       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7505       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7506       PetscCallMPI(MPI_Type_free(&dimrealtype));
7507       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7508 
7509       pcbddc->mat_graph->coords = lcoords;
7510       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7511       pcbddc->mat_graph->cnloc  = n;
7512     }
7513     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7514                pcbddc->mat_graph->nvtxs);
7515     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7516 
7517     /* attach info on disconnected subdomains if present */
7518     if (pcbddc->n_local_subs) {
7519       PetscInt *local_subs, n, totn;
7520 
7521       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7522       PetscCall(PetscMalloc1(n, &local_subs));
7523       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7524       for (i = 0; i < pcbddc->n_local_subs; i++) {
7525         const PetscInt *idxs;
7526         PetscInt        nl, j;
7527 
7528         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7529         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7530         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7531         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7532       }
7533       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7534       pcbddc->mat_graph->n_local_subs = totn + 1;
7535       pcbddc->mat_graph->local_subs   = local_subs;
7536     }
7537 
7538     /* Setup of Graph */
7539     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7540   }
7541 
7542   if (!pcbddc->graphanalyzed) {
7543     /* Graph's connected components analysis */
7544     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7545     pcbddc->graphanalyzed   = PETSC_TRUE;
7546     pcbddc->corner_selected = pcbddc->corner_selection;
7547   }
7548   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7549   PetscFunctionReturn(PETSC_SUCCESS);
7550 }
7551 
7552 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7553 {
7554   PetscInt     i, j, n;
7555   PetscScalar *alphas;
7556   PetscReal    norm, *onorms;
7557 
7558   PetscFunctionBegin;
7559   n = *nio;
7560   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7561   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7562   PetscCall(VecNormalize(vecs[0], &norm));
7563   if (norm < PETSC_SMALL) {
7564     onorms[0] = 0.0;
7565     PetscCall(VecSet(vecs[0], 0.0));
7566   } else {
7567     onorms[0] = norm;
7568   }
7569 
7570   for (i = 1; i < n; i++) {
7571     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7572     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7573     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7574     PetscCall(VecNormalize(vecs[i], &norm));
7575     if (norm < PETSC_SMALL) {
7576       onorms[i] = 0.0;
7577       PetscCall(VecSet(vecs[i], 0.0));
7578     } else {
7579       onorms[i] = norm;
7580     }
7581   }
7582   /* push nonzero vectors at the beginning */
7583   for (i = 0; i < n; i++) {
7584     if (onorms[i] == 0.0) {
7585       for (j = i + 1; j < n; j++) {
7586         if (onorms[j] != 0.0) {
7587           PetscCall(VecCopy(vecs[j], vecs[i]));
7588           onorms[j] = 0.0;
7589         }
7590       }
7591     }
7592   }
7593   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7594   PetscCall(PetscFree2(alphas, onorms));
7595   PetscFunctionReturn(PETSC_SUCCESS);
7596 }
7597 
7598 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7599 {
7600   ISLocalToGlobalMapping mapping;
7601   Mat                    A;
7602   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7603   PetscMPIInt            size, rank, color;
7604   PetscInt              *xadj, *adjncy;
7605   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7606   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7607   PetscInt               void_procs, *procs_candidates = NULL;
7608   PetscInt               xadj_count, *count;
7609   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7610   PetscSubcomm           psubcomm;
7611   MPI_Comm               subcomm;
7612 
7613   PetscFunctionBegin;
7614   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7615   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7616   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7617   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7618   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7619   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7620 
7621   if (have_void) *have_void = PETSC_FALSE;
7622   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7623   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7624   PetscCall(MatISGetLocalMat(mat, &A));
7625   PetscCall(MatGetLocalSize(A, &n, NULL));
7626   im_active = !!n;
7627   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7628   void_procs = size - active_procs;
7629   /* get ranks of non-active processes in mat communicator */
7630   if (void_procs) {
7631     PetscInt ncand;
7632 
7633     if (have_void) *have_void = PETSC_TRUE;
7634     PetscCall(PetscMalloc1(size, &procs_candidates));
7635     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7636     for (i = 0, ncand = 0; i < size; i++) {
7637       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7638     }
7639     /* force n_subdomains to be not greater that the number of non-active processes */
7640     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7641   }
7642 
7643   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7644      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7645   PetscCall(MatGetSize(mat, &N, NULL));
7646   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7647     PetscInt issize, isidx, dest;
7648     if (*n_subdomains == 1) dest = 0;
7649     else dest = rank;
7650     if (im_active) {
7651       issize = 1;
7652       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7653         isidx = procs_candidates[dest];
7654       } else {
7655         isidx = dest;
7656       }
7657     } else {
7658       issize = 0;
7659       isidx  = -1;
7660     }
7661     if (*n_subdomains != 1) *n_subdomains = active_procs;
7662     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7663     PetscCall(PetscFree(procs_candidates));
7664     PetscFunctionReturn(PETSC_SUCCESS);
7665   }
7666   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7667   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7668   threshold = PetscMax(threshold, 2);
7669 
7670   /* Get info on mapping */
7671   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7672   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7673 
7674   /* build local CSR graph of subdomains' connectivity */
7675   PetscCall(PetscMalloc1(2, &xadj));
7676   xadj[0] = 0;
7677   xadj[1] = PetscMax(n_neighs - 1, 0);
7678   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7679   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7680   PetscCall(PetscCalloc1(n, &count));
7681   for (i = 1; i < n_neighs; i++)
7682     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7683 
7684   xadj_count = 0;
7685   for (i = 1; i < n_neighs; i++) {
7686     for (j = 0; j < n_shared[i]; j++) {
7687       if (count[shared[i][j]] < threshold) {
7688         adjncy[xadj_count]     = neighs[i];
7689         adjncy_wgt[xadj_count] = n_shared[i];
7690         xadj_count++;
7691         break;
7692       }
7693     }
7694   }
7695   xadj[1] = xadj_count;
7696   PetscCall(PetscFree(count));
7697   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7698   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7699 
7700   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7701 
7702   /* Restrict work on active processes only */
7703   PetscCall(PetscMPIIntCast(im_active, &color));
7704   if (void_procs) {
7705     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7706     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7707     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7708     subcomm = PetscSubcommChild(psubcomm);
7709   } else {
7710     psubcomm = NULL;
7711     subcomm  = PetscObjectComm((PetscObject)mat);
7712   }
7713 
7714   v_wgt = NULL;
7715   if (!color) {
7716     PetscCall(PetscFree(xadj));
7717     PetscCall(PetscFree(adjncy));
7718     PetscCall(PetscFree(adjncy_wgt));
7719   } else {
7720     Mat             subdomain_adj;
7721     IS              new_ranks, new_ranks_contig;
7722     MatPartitioning partitioner;
7723     PetscInt        rstart, rend;
7724     PetscMPIInt     irstart = 0, irend = 0;
7725     PetscInt       *is_indices, *oldranks;
7726     PetscMPIInt     size;
7727     PetscBool       aggregate;
7728 
7729     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7730     if (void_procs) {
7731       PetscInt prank = rank;
7732       PetscCall(PetscMalloc1(size, &oldranks));
7733       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7734       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7735       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7736     } else {
7737       oldranks = NULL;
7738     }
7739     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7740     if (aggregate) { /* TODO: all this part could be made more efficient */
7741       PetscInt     lrows, row, ncols, *cols;
7742       PetscMPIInt  nrank;
7743       PetscScalar *vals;
7744 
7745       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7746       lrows = 0;
7747       if (nrank < redprocs) {
7748         lrows = size / redprocs;
7749         if (nrank < size % redprocs) lrows++;
7750       }
7751       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7752       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7753       PetscCall(PetscMPIIntCast(rstart, &irstart));
7754       PetscCall(PetscMPIIntCast(rend, &irend));
7755       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7756       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7757       row   = nrank;
7758       ncols = xadj[1] - xadj[0];
7759       cols  = adjncy;
7760       PetscCall(PetscMalloc1(ncols, &vals));
7761       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7762       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7763       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7764       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7765       PetscCall(PetscFree(xadj));
7766       PetscCall(PetscFree(adjncy));
7767       PetscCall(PetscFree(adjncy_wgt));
7768       PetscCall(PetscFree(vals));
7769       if (use_vwgt) {
7770         Vec                v;
7771         const PetscScalar *array;
7772         PetscInt           nl;
7773 
7774         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7775         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7776         PetscCall(VecAssemblyBegin(v));
7777         PetscCall(VecAssemblyEnd(v));
7778         PetscCall(VecGetLocalSize(v, &nl));
7779         PetscCall(VecGetArrayRead(v, &array));
7780         PetscCall(PetscMalloc1(nl, &v_wgt));
7781         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7782         PetscCall(VecRestoreArrayRead(v, &array));
7783         PetscCall(VecDestroy(&v));
7784       }
7785     } else {
7786       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7787       if (use_vwgt) {
7788         PetscCall(PetscMalloc1(1, &v_wgt));
7789         v_wgt[0] = n;
7790       }
7791     }
7792     /* PetscCall(MatView(subdomain_adj,0)); */
7793 
7794     /* Partition */
7795     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7796 #if defined(PETSC_HAVE_PTSCOTCH)
7797     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7798 #elif defined(PETSC_HAVE_PARMETIS)
7799     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7800 #else
7801     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7802 #endif
7803     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7804     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7805     *n_subdomains = PetscMin(size, *n_subdomains);
7806     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7807     PetscCall(MatPartitioningSetFromOptions(partitioner));
7808     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7809     /* PetscCall(MatPartitioningView(partitioner,0)); */
7810 
7811     /* renumber new_ranks to avoid "holes" in new set of processors */
7812     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7813     PetscCall(ISDestroy(&new_ranks));
7814     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7815     if (!aggregate) {
7816       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7817         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7818         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7819       } else if (oldranks) {
7820         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7821       } else {
7822         ranks_send_to_idx[0] = is_indices[0];
7823       }
7824     } else {
7825       PetscInt     idx = 0;
7826       PetscMPIInt  tag;
7827       MPI_Request *reqs;
7828 
7829       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7830       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7831       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7832       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7833       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7834       PetscCall(PetscFree(reqs));
7835       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7836         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7837         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7838       } else if (oldranks) {
7839         ranks_send_to_idx[0] = oldranks[idx];
7840       } else {
7841         ranks_send_to_idx[0] = idx;
7842       }
7843     }
7844     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7845     /* clean up */
7846     PetscCall(PetscFree(oldranks));
7847     PetscCall(ISDestroy(&new_ranks_contig));
7848     PetscCall(MatDestroy(&subdomain_adj));
7849     PetscCall(MatPartitioningDestroy(&partitioner));
7850   }
7851   PetscCall(PetscSubcommDestroy(&psubcomm));
7852   PetscCall(PetscFree(procs_candidates));
7853 
7854   /* assemble parallel IS for sends */
7855   i = 1;
7856   if (!color) i = 0;
7857   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7858   PetscFunctionReturn(PETSC_SUCCESS);
7859 }
7860 
7861 typedef enum {
7862   MATDENSE_PRIVATE = 0,
7863   MATAIJ_PRIVATE,
7864   MATBAIJ_PRIVATE,
7865   MATSBAIJ_PRIVATE
7866 } MatTypePrivate;
7867 
7868 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7869 {
7870   Mat                    local_mat;
7871   IS                     is_sends_internal;
7872   PetscInt               rows, cols, new_local_rows;
7873   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7874   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7875   ISLocalToGlobalMapping l2gmap;
7876   PetscInt              *l2gmap_indices;
7877   const PetscInt        *is_indices;
7878   MatType                new_local_type;
7879   /* buffers */
7880   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7881   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7882   PetscInt          *recv_buffer_idxs_local;
7883   PetscScalar       *ptr_vals, *recv_buffer_vals;
7884   const PetscScalar *send_buffer_vals;
7885   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7886   /* MPI */
7887   MPI_Comm     comm, comm_n;
7888   PetscSubcomm subcomm;
7889   PetscMPIInt  n_sends, n_recvs, size;
7890   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7891   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7892   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7893   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7894   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7895 
7896   PetscFunctionBegin;
7897   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7898   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7899   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7900   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7901   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7902   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7903   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7904   PetscValidLogicalCollectiveInt(mat, nis, 8);
7905   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7906   if (nvecs) {
7907     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7908     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7909   }
7910   /* further checks */
7911   PetscCall(MatISGetLocalMat(mat, &local_mat));
7912   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7913   /* XXX hack for multi_element */
7914   if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat));
7915   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7916   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7917 
7918   PetscCall(MatGetSize(local_mat, &rows, &cols));
7919   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7920   if (reuse && *mat_n) {
7921     PetscInt mrows, mcols, mnrows, mncols;
7922     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7923     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7924     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7925     PetscCall(MatGetSize(mat, &mrows, &mcols));
7926     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7927     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7928     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7929   }
7930   PetscCall(MatGetBlockSize(local_mat, &bs));
7931   PetscValidLogicalCollectiveInt(mat, bs, 1);
7932 
7933   /* prepare IS for sending if not provided */
7934   if (!is_sends) {
7935     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7936     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7937   } else {
7938     PetscCall(PetscObjectReference((PetscObject)is_sends));
7939     is_sends_internal = is_sends;
7940   }
7941 
7942   /* get comm */
7943   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7944 
7945   /* compute number of sends */
7946   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7947   PetscCall(PetscMPIIntCast(i, &n_sends));
7948 
7949   /* compute number of receives */
7950   PetscCallMPI(MPI_Comm_size(comm, &size));
7951   PetscCall(PetscMalloc1(size, &iflags));
7952   PetscCall(PetscArrayzero(iflags, size));
7953   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7954   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7955   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7956   PetscCall(PetscFree(iflags));
7957 
7958   /* restrict comm if requested */
7959   subcomm     = NULL;
7960   destroy_mat = PETSC_FALSE;
7961   if (restrict_comm) {
7962     PetscMPIInt color, subcommsize;
7963 
7964     color = 0;
7965     if (restrict_full) {
7966       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7967     } else {
7968       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7969     }
7970     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7971     subcommsize = size - subcommsize;
7972     /* check if reuse has been requested */
7973     if (reuse) {
7974       if (*mat_n) {
7975         PetscMPIInt subcommsize2;
7976         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7977         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7978         comm_n = PetscObjectComm((PetscObject)*mat_n);
7979       } else {
7980         comm_n = PETSC_COMM_SELF;
7981       }
7982     } else { /* MAT_INITIAL_MATRIX */
7983       PetscMPIInt rank;
7984 
7985       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7986       PetscCall(PetscSubcommCreate(comm, &subcomm));
7987       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7988       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7989       comm_n = PetscSubcommChild(subcomm);
7990     }
7991     /* flag to destroy *mat_n if not significative */
7992     if (color) destroy_mat = PETSC_TRUE;
7993   } else {
7994     comm_n = comm;
7995   }
7996 
7997   /* prepare send/receive buffers */
7998   PetscCall(PetscMalloc1(size, &ilengths_idxs));
7999   PetscCall(PetscArrayzero(ilengths_idxs, size));
8000   PetscCall(PetscMalloc1(size, &ilengths_vals));
8001   PetscCall(PetscArrayzero(ilengths_vals, size));
8002   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8003 
8004   /* Get data from local matrices */
8005   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8006   /* TODO: See below some guidelines on how to prepare the local buffers */
8007   /*
8008        send_buffer_vals should contain the raw values of the local matrix
8009        send_buffer_idxs should contain:
8010        - MatType_PRIVATE type
8011        - PetscInt        size_of_l2gmap
8012        - PetscInt        global_row_indices[size_of_l2gmap]
8013        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8014     */
8015   {
8016     ISLocalToGlobalMapping mapping;
8017 
8018     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8019     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8020     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8021     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8022     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8023     send_buffer_idxs[1] = i;
8024     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8025     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8026     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8027     PetscCall(PetscMPIIntCast(i, &len));
8028     for (i = 0; i < n_sends; i++) {
8029       ilengths_vals[is_indices[i]] = len * len;
8030       ilengths_idxs[is_indices[i]] = len + 2;
8031     }
8032   }
8033   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8034   /* additional is (if any) */
8035   if (nis) {
8036     PetscMPIInt psum;
8037     PetscInt    j;
8038     for (j = 0, psum = 0; j < nis; j++) {
8039       PetscInt plen;
8040       PetscCall(ISGetLocalSize(isarray[j], &plen));
8041       PetscCall(PetscMPIIntCast(plen, &len));
8042       psum += len + 1; /* indices + length */
8043     }
8044     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8045     for (j = 0, psum = 0; j < nis; j++) {
8046       PetscInt        plen;
8047       const PetscInt *is_array_idxs;
8048       PetscCall(ISGetLocalSize(isarray[j], &plen));
8049       send_buffer_idxs_is[psum] = plen;
8050       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8051       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8052       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8053       psum += plen + 1; /* indices + length */
8054     }
8055     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8056     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8057   }
8058   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8059 
8060   buf_size_idxs    = 0;
8061   buf_size_vals    = 0;
8062   buf_size_idxs_is = 0;
8063   buf_size_vecs    = 0;
8064   for (i = 0; i < n_recvs; i++) {
8065     buf_size_idxs += olengths_idxs[i];
8066     buf_size_vals += olengths_vals[i];
8067     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8068     if (nvecs) buf_size_vecs += olengths_idxs[i];
8069   }
8070   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8071   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8072   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8073   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8074 
8075   /* get new tags for clean communications */
8076   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8077   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8078   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8079   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8080 
8081   /* allocate for requests */
8082   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8083   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8084   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8085   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8086   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8087   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8088   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8089   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8090 
8091   /* communications */
8092   ptr_idxs    = recv_buffer_idxs;
8093   ptr_vals    = recv_buffer_vals;
8094   ptr_idxs_is = recv_buffer_idxs_is;
8095   ptr_vecs    = recv_buffer_vecs;
8096   for (i = 0; i < n_recvs; i++) {
8097     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8098     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8099     ptr_idxs += olengths_idxs[i];
8100     ptr_vals += olengths_vals[i];
8101     if (nis) {
8102       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8103       ptr_idxs_is += olengths_idxs_is[i];
8104     }
8105     if (nvecs) {
8106       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8107       ptr_vecs += olengths_idxs[i] - 2;
8108     }
8109   }
8110   for (i = 0; i < n_sends; i++) {
8111     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8112     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8113     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8114     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8115     if (nvecs) {
8116       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8117       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8118     }
8119   }
8120   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8121   PetscCall(ISDestroy(&is_sends_internal));
8122 
8123   /* assemble new l2g map */
8124   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8125   ptr_idxs       = recv_buffer_idxs;
8126   new_local_rows = 0;
8127   for (i = 0; i < n_recvs; i++) {
8128     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8129     ptr_idxs += olengths_idxs[i];
8130   }
8131   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8132   ptr_idxs       = recv_buffer_idxs;
8133   new_local_rows = 0;
8134   for (i = 0; i < n_recvs; i++) {
8135     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8136     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8137     ptr_idxs += olengths_idxs[i];
8138   }
8139   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8140   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8141   PetscCall(PetscFree(l2gmap_indices));
8142 
8143   /* infer new local matrix type from received local matrices type */
8144   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8145   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8146   if (n_recvs) {
8147     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8148     ptr_idxs                              = recv_buffer_idxs;
8149     for (i = 0; i < n_recvs; i++) {
8150       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8151         new_local_type_private = MATAIJ_PRIVATE;
8152         break;
8153       }
8154       ptr_idxs += olengths_idxs[i];
8155     }
8156     switch (new_local_type_private) {
8157     case MATDENSE_PRIVATE:
8158       new_local_type = MATSEQAIJ;
8159       bs             = 1;
8160       break;
8161     case MATAIJ_PRIVATE:
8162       new_local_type = MATSEQAIJ;
8163       bs             = 1;
8164       break;
8165     case MATBAIJ_PRIVATE:
8166       new_local_type = MATSEQBAIJ;
8167       break;
8168     case MATSBAIJ_PRIVATE:
8169       new_local_type = MATSEQSBAIJ;
8170       break;
8171     default:
8172       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8173     }
8174   } else { /* by default, new_local_type is seqaij */
8175     new_local_type = MATSEQAIJ;
8176     bs             = 1;
8177   }
8178 
8179   /* create MATIS object if needed */
8180   if (!reuse) {
8181     PetscCall(MatGetSize(mat, &rows, &cols));
8182     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8183   } else {
8184     /* it also destroys the local matrices */
8185     if (*mat_n) {
8186       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8187     } else { /* this is a fake object */
8188       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8189     }
8190   }
8191   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8192   PetscCall(MatSetType(local_mat, new_local_type));
8193 
8194   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8195 
8196   /* Global to local map of received indices */
8197   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8198   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8199   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8200 
8201   /* restore attributes -> type of incoming data and its size */
8202   buf_size_idxs = 0;
8203   for (i = 0; i < n_recvs; i++) {
8204     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8205     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8206     buf_size_idxs += olengths_idxs[i];
8207   }
8208   PetscCall(PetscFree(recv_buffer_idxs));
8209 
8210   /* set preallocation */
8211   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8212   if (!newisdense) {
8213     PetscInt *new_local_nnz = NULL;
8214 
8215     ptr_idxs = recv_buffer_idxs_local;
8216     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8217     for (i = 0; i < n_recvs; i++) {
8218       PetscInt j;
8219       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8220         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8221       } else {
8222         /* TODO */
8223       }
8224       ptr_idxs += olengths_idxs[i];
8225     }
8226     if (new_local_nnz) {
8227       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8228       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8229       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8230       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8231       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8232       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8233     } else {
8234       PetscCall(MatSetUp(local_mat));
8235     }
8236     PetscCall(PetscFree(new_local_nnz));
8237   } else {
8238     PetscCall(MatSetUp(local_mat));
8239   }
8240 
8241   /* set values */
8242   ptr_vals = recv_buffer_vals;
8243   ptr_idxs = recv_buffer_idxs_local;
8244   for (i = 0; i < n_recvs; i++) {
8245     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8246       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8247       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8248       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8249       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8250       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8251     } else {
8252       /* TODO */
8253     }
8254     ptr_idxs += olengths_idxs[i];
8255     ptr_vals += olengths_vals[i];
8256   }
8257   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8258   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8259   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8260   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8261   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8262   PetscCall(PetscFree(recv_buffer_vals));
8263 
8264 #if 0
8265   if (!restrict_comm) { /* check */
8266     Vec       lvec,rvec;
8267     PetscReal infty_error;
8268 
8269     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8270     PetscCall(VecSetRandom(rvec,NULL));
8271     PetscCall(MatMult(mat,rvec,lvec));
8272     PetscCall(VecScale(lvec,-1.0));
8273     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8274     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8275     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8276     PetscCall(VecDestroy(&rvec));
8277     PetscCall(VecDestroy(&lvec));
8278   }
8279 #endif
8280 
8281   /* assemble new additional is (if any) */
8282   if (nis) {
8283     PetscInt **temp_idxs, *count_is, j, psum;
8284 
8285     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8286     PetscCall(PetscCalloc1(nis, &count_is));
8287     ptr_idxs = recv_buffer_idxs_is;
8288     psum     = 0;
8289     for (i = 0; i < n_recvs; i++) {
8290       for (j = 0; j < nis; j++) {
8291         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8292         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8293         psum += plen;
8294         ptr_idxs += plen + 1; /* shift pointer to received data */
8295       }
8296     }
8297     PetscCall(PetscMalloc1(nis, &temp_idxs));
8298     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8299     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8300     PetscCall(PetscArrayzero(count_is, nis));
8301     ptr_idxs = recv_buffer_idxs_is;
8302     for (i = 0; i < n_recvs; i++) {
8303       for (j = 0; j < nis; j++) {
8304         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8305         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8306         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8307         ptr_idxs += plen + 1; /* shift pointer to received data */
8308       }
8309     }
8310     for (i = 0; i < nis; i++) {
8311       PetscCall(ISDestroy(&isarray[i]));
8312       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8313       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8314     }
8315     PetscCall(PetscFree(count_is));
8316     PetscCall(PetscFree(temp_idxs[0]));
8317     PetscCall(PetscFree(temp_idxs));
8318   }
8319   /* free workspace */
8320   PetscCall(PetscFree(recv_buffer_idxs_is));
8321   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8322   PetscCall(PetscFree(send_buffer_idxs));
8323   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8324   if (isdense) {
8325     PetscCall(MatISGetLocalMat(mat, &local_mat));
8326     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8327     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8328   } else {
8329     /* PetscCall(PetscFree(send_buffer_vals)); */
8330   }
8331   if (nis) {
8332     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8333     PetscCall(PetscFree(send_buffer_idxs_is));
8334   }
8335 
8336   if (nvecs) {
8337     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8338     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8339     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8340     PetscCall(VecDestroy(&nnsp_vec[0]));
8341     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8342     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8343     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8344     /* set values */
8345     ptr_vals = recv_buffer_vecs;
8346     ptr_idxs = recv_buffer_idxs_local;
8347     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8348     for (i = 0; i < n_recvs; i++) {
8349       PetscInt j;
8350       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8351       ptr_idxs += olengths_idxs[i];
8352       ptr_vals += olengths_idxs[i] - 2;
8353     }
8354     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8355     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8356     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8357   }
8358 
8359   PetscCall(PetscFree(recv_buffer_vecs));
8360   PetscCall(PetscFree(recv_buffer_idxs_local));
8361   PetscCall(PetscFree(recv_req_idxs));
8362   PetscCall(PetscFree(recv_req_vals));
8363   PetscCall(PetscFree(recv_req_vecs));
8364   PetscCall(PetscFree(recv_req_idxs_is));
8365   PetscCall(PetscFree(send_req_idxs));
8366   PetscCall(PetscFree(send_req_vals));
8367   PetscCall(PetscFree(send_req_vecs));
8368   PetscCall(PetscFree(send_req_idxs_is));
8369   PetscCall(PetscFree(ilengths_vals));
8370   PetscCall(PetscFree(ilengths_idxs));
8371   PetscCall(PetscFree(olengths_vals));
8372   PetscCall(PetscFree(olengths_idxs));
8373   PetscCall(PetscFree(onodes));
8374   if (nis) {
8375     PetscCall(PetscFree(ilengths_idxs_is));
8376     PetscCall(PetscFree(olengths_idxs_is));
8377     PetscCall(PetscFree(onodes_is));
8378   }
8379   PetscCall(PetscSubcommDestroy(&subcomm));
8380   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8381     PetscCall(MatDestroy(mat_n));
8382     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8383     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8384       PetscCall(VecDestroy(&nnsp_vec[0]));
8385     }
8386     *mat_n = NULL;
8387   }
8388   PetscFunctionReturn(PETSC_SUCCESS);
8389 }
8390 
8391 /* temporary hack into ksp private data structure */
8392 #include <petsc/private/kspimpl.h>
8393 
8394 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8395 {
8396   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8397   PC_IS                 *pcis   = (PC_IS *)pc->data;
8398   PCBDDCGraph            graph  = pcbddc->mat_graph;
8399   Mat                    coarse_mat, coarse_mat_is;
8400   Mat                    coarsedivudotp = NULL;
8401   Mat                    coarseG, t_coarse_mat_is;
8402   MatNullSpace           CoarseNullSpace = NULL;
8403   ISLocalToGlobalMapping coarse_islg;
8404   IS                     coarse_is, *isarray, corners;
8405   PetscInt               i, im_active = -1, active_procs = -1;
8406   PetscInt               nis, nisdofs, nisneu, nisvert;
8407   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8408   PC                     pc_temp;
8409   PCType                 coarse_pc_type;
8410   KSPType                coarse_ksp_type;
8411   PetscBool              multilevel_requested, multilevel_allowed;
8412   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8413   PetscInt               ncoarse, nedcfield;
8414   PetscBool              compute_vecs = PETSC_FALSE;
8415   PetscScalar           *array;
8416   MatReuse               coarse_mat_reuse;
8417   PetscBool              restr, full_restr, have_void;
8418   PetscMPIInt            size;
8419 
8420   PetscFunctionBegin;
8421   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8422   /* Assign global numbering to coarse dofs */
8423   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8424     PetscInt ocoarse_size;
8425     compute_vecs = PETSC_TRUE;
8426 
8427     pcbddc->new_primal_space = PETSC_TRUE;
8428     ocoarse_size             = pcbddc->coarse_size;
8429     PetscCall(PetscFree(pcbddc->global_primal_indices));
8430     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8431     /* see if we can avoid some work */
8432     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8433       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8434       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8435         PetscCall(KSPReset(pcbddc->coarse_ksp));
8436         coarse_reuse = PETSC_FALSE;
8437       } else { /* we can safely reuse already computed coarse matrix */
8438         coarse_reuse = PETSC_TRUE;
8439       }
8440     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8441       coarse_reuse = PETSC_FALSE;
8442     }
8443     /* reset any subassembling information */
8444     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8445   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8446     coarse_reuse = PETSC_TRUE;
8447   }
8448   if (coarse_reuse && pcbddc->coarse_ksp) {
8449     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8450     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8451     coarse_mat_reuse = MAT_REUSE_MATRIX;
8452   } else {
8453     coarse_mat       = NULL;
8454     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8455   }
8456 
8457   /* creates temporary l2gmap and IS for coarse indexes */
8458   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8459   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8460 
8461   /* creates temporary MATIS object for coarse matrix */
8462   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8463   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8464   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8465   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE));
8466   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8467   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8468   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8469   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8470   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8471 
8472   /* count "active" (i.e. with positive local size) and "void" processes */
8473   im_active = !!pcis->n;
8474   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8475 
8476   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8477   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8478   /* full_restr : just use the receivers from the subassembling pattern */
8479   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8480   coarse_mat_is        = NULL;
8481   multilevel_allowed   = PETSC_FALSE;
8482   multilevel_requested = PETSC_FALSE;
8483   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8484   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8485   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8486   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8487   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8488   if (multilevel_requested) {
8489     ncoarse    = active_procs / coarsening_ratio;
8490     restr      = PETSC_FALSE;
8491     full_restr = PETSC_FALSE;
8492   } else {
8493     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8494     restr      = PETSC_TRUE;
8495     full_restr = PETSC_TRUE;
8496   }
8497   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8498   ncoarse = PetscMax(1, ncoarse);
8499   if (!pcbddc->coarse_subassembling) {
8500     if (coarsening_ratio > 1) {
8501       if (multilevel_requested) {
8502         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8503       } else {
8504         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8505       }
8506     } else {
8507       PetscMPIInt rank;
8508 
8509       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8510       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8511       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8512     }
8513   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8514     PetscInt psum;
8515     if (pcbddc->coarse_ksp) psum = 1;
8516     else psum = 0;
8517     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8518     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8519   }
8520   /* determine if we can go multilevel */
8521   if (multilevel_requested) {
8522     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8523     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8524   }
8525   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8526 
8527   /* dump subassembling pattern */
8528   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8529   /* compute dofs splitting and neumann boundaries for coarse dofs */
8530   nedcfield = -1;
8531   corners   = NULL;
8532   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8533     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8534     const PetscInt        *idxs;
8535     ISLocalToGlobalMapping tmap;
8536 
8537     /* create map between primal indices (in local representative ordering) and local primal numbering */
8538     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8539     /* allocate space for temporary storage */
8540     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8541     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8542     /* allocate for IS array */
8543     nisdofs = pcbddc->n_ISForDofsLocal;
8544     if (pcbddc->nedclocal) {
8545       if (pcbddc->nedfield > -1) {
8546         nedcfield = pcbddc->nedfield;
8547       } else {
8548         nedcfield = 0;
8549         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8550         nisdofs = 1;
8551       }
8552     }
8553     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8554     nisvert = 0; /* nisvert is not used */
8555     nis     = nisdofs + nisneu + nisvert;
8556     PetscCall(PetscMalloc1(nis, &isarray));
8557     /* dofs splitting */
8558     for (i = 0; i < nisdofs; i++) {
8559       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8560       if (nedcfield != i) {
8561         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8562         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8563         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8564         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8565       } else {
8566         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8567         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8568         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8569         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8570         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8571       }
8572       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8573       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8574       /* PetscCall(ISView(isarray[i],0)); */
8575     }
8576     /* neumann boundaries */
8577     if (pcbddc->NeumannBoundariesLocal) {
8578       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8579       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8580       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8581       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8582       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8583       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8584       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8585       /* PetscCall(ISView(isarray[nisdofs],0)); */
8586     }
8587     /* coordinates */
8588     if (pcbddc->corner_selected) {
8589       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8590       PetscCall(ISGetLocalSize(corners, &tsize));
8591       PetscCall(ISGetIndices(corners, &idxs));
8592       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8593       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8594       PetscCall(ISRestoreIndices(corners, &idxs));
8595       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8596       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8597       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8598     }
8599     PetscCall(PetscFree(tidxs));
8600     PetscCall(PetscFree(tidxs2));
8601     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8602   } else {
8603     nis     = 0;
8604     nisdofs = 0;
8605     nisneu  = 0;
8606     nisvert = 0;
8607     isarray = NULL;
8608   }
8609   /* destroy no longer needed map */
8610   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8611 
8612   /* subassemble */
8613   if (multilevel_allowed) {
8614     Vec       vp[1];
8615     PetscInt  nvecs = 0;
8616     PetscBool reuse;
8617 
8618     vp[0] = NULL;
8619     /* XXX HDIV also */
8620     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8621       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8622       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8623       PetscCall(VecSetType(vp[0], VECSTANDARD));
8624       nvecs = 1;
8625 
8626       if (pcbddc->divudotp) {
8627         Mat      B, loc_divudotp;
8628         Vec      v, p;
8629         IS       dummy;
8630         PetscInt np;
8631 
8632         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8633         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8634         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8635         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8636         PetscCall(MatCreateVecs(B, &v, &p));
8637         PetscCall(VecSet(p, 1.));
8638         PetscCall(MatMultTranspose(B, p, v));
8639         PetscCall(VecDestroy(&p));
8640         PetscCall(MatDestroy(&B));
8641         PetscCall(VecGetArray(vp[0], &array));
8642         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8643         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8644         PetscCall(VecResetArray(pcbddc->vec1_P));
8645         PetscCall(VecRestoreArray(vp[0], &array));
8646         PetscCall(ISDestroy(&dummy));
8647         PetscCall(VecDestroy(&v));
8648       }
8649     }
8650     if (coarse_mat) reuse = PETSC_TRUE;
8651     else reuse = PETSC_FALSE;
8652     if (multi_element) {
8653       /* XXX divudotp */
8654       PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE));
8655       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8656       coarse_mat_is = t_coarse_mat_is;
8657     } else {
8658       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8659       if (reuse) {
8660         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8661       } else {
8662         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8663       }
8664       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8665         PetscScalar       *arraym;
8666         const PetscScalar *arrayv;
8667         PetscInt           nl;
8668         PetscCall(VecGetLocalSize(vp[0], &nl));
8669         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8670         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8671         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8672         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8673         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8674         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8675         PetscCall(VecDestroy(&vp[0]));
8676       } else {
8677         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8678       }
8679     }
8680   } else {
8681     if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8682     else {
8683       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8684       coarse_mat_is = t_coarse_mat_is;
8685     }
8686   }
8687   if (coarse_mat_is || coarse_mat) {
8688     if (!multilevel_allowed) {
8689       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8690     } else {
8691       /* if this matrix is present, it means we are not reusing the coarse matrix */
8692       if (coarse_mat_is) {
8693         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8694         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8695         coarse_mat = coarse_mat_is;
8696       }
8697     }
8698   }
8699   PetscCall(MatDestroy(&t_coarse_mat_is));
8700   PetscCall(MatDestroy(&coarse_mat_is));
8701 
8702   /* create local to global scatters for coarse problem */
8703   if (compute_vecs) {
8704     PetscInt lrows;
8705     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8706     if (coarse_mat) {
8707       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8708     } else {
8709       lrows = 0;
8710     }
8711     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8712     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8713     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8714     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8715     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8716   }
8717   PetscCall(ISDestroy(&coarse_is));
8718 
8719   /* set defaults for coarse KSP and PC */
8720   if (multilevel_allowed) {
8721     coarse_ksp_type = KSPRICHARDSON;
8722     coarse_pc_type  = PCBDDC;
8723   } else {
8724     coarse_ksp_type = KSPPREONLY;
8725     coarse_pc_type  = PCREDUNDANT;
8726   }
8727 
8728   /* print some info if requested */
8729   if (pcbddc->dbg_flag) {
8730     if (!multilevel_allowed) {
8731       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8732       if (multilevel_requested) {
8733         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8734       } else if (pcbddc->max_levels) {
8735         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8736       }
8737       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8738     }
8739   }
8740 
8741   /* communicate coarse discrete gradient */
8742   coarseG = NULL;
8743   if (pcbddc->nedcG && multilevel_allowed) {
8744     MPI_Comm ccomm;
8745     if (coarse_mat) {
8746       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8747     } else {
8748       ccomm = MPI_COMM_NULL;
8749     }
8750     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8751   }
8752 
8753   /* create the coarse KSP object only once with defaults */
8754   if (coarse_mat) {
8755     PetscBool   isredundant, isbddc, force, valid;
8756     PetscViewer dbg_viewer = NULL;
8757     PetscBool   isset, issym, isher, isspd;
8758 
8759     if (pcbddc->dbg_flag) {
8760       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8761       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8762     }
8763     if (!pcbddc->coarse_ksp) {
8764       char   prefix[256], str_level[16];
8765       size_t len;
8766 
8767       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8768       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8769       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8770       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8771       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8772       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8773       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8774       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8775       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8776       /* TODO is this logic correct? should check for coarse_mat type */
8777       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8778       /* prefix */
8779       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8780       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8781       if (!pcbddc->current_level) {
8782         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8783         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8784       } else {
8785         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8786         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8787         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8788         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8789         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8790         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8791         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8792       }
8793       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8794       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8795       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8796       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8797       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8798       /* allow user customization */
8799       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8800       /* get some info after set from options */
8801       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8802       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8803       force = PETSC_FALSE;
8804       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8805       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8806       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8807       if (multilevel_allowed && !force && !valid) {
8808         isbddc = PETSC_TRUE;
8809         PetscCall(PCSetType(pc_temp, PCBDDC));
8810         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8811         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8812         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8813         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8814           PetscObjectOptionsBegin((PetscObject)pc_temp);
8815           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8816           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8817           PetscOptionsEnd();
8818           pc_temp->setfromoptionscalled++;
8819         }
8820       }
8821     }
8822     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8823     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8824     if (nisdofs) {
8825       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8826       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8827     }
8828     if (nisneu) {
8829       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8830       PetscCall(ISDestroy(&isarray[nisdofs]));
8831     }
8832     if (nisvert) {
8833       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8834       PetscCall(ISDestroy(&isarray[nis - 1]));
8835     }
8836     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8837 
8838     /* get some info after set from options */
8839     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8840 
8841     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8842     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8843     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8844     force = PETSC_FALSE;
8845     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8846     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8847     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8848     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8849     if (isredundant) {
8850       KSP inner_ksp;
8851       PC  inner_pc;
8852 
8853       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8854       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8855     }
8856 
8857     /* parameters which miss an API */
8858     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8859     if (isbddc) {
8860       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8861 
8862       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8863       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8864       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8865       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8866       if (pcbddc_coarse->benign_saddle_point) {
8867         Mat                    coarsedivudotp_is;
8868         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8869         IS                     row, col;
8870         const PetscInt        *gidxs;
8871         PetscInt               n, st, M, N;
8872 
8873         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8874         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8875         st = st - n;
8876         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8877         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8878         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8879         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8880         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8881         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8882         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8883         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8884         PetscCall(ISGetSize(row, &M));
8885         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8886         PetscCall(ISDestroy(&row));
8887         PetscCall(ISDestroy(&col));
8888         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8889         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8890         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8891         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8892         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8893         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8894         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8895         PetscCall(MatDestroy(&coarsedivudotp));
8896         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8897         PetscCall(MatDestroy(&coarsedivudotp_is));
8898         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8899         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8900       }
8901     }
8902 
8903     /* propagate symmetry info of coarse matrix */
8904     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8905     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8906     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8907     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8908     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8909     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8910     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8911 
8912     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8913     /* set operators */
8914     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8915     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8916     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8917     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8918   }
8919   PetscCall(MatDestroy(&coarseG));
8920   PetscCall(PetscFree(isarray));
8921 #if 0
8922   {
8923     PetscViewer viewer;
8924     char filename[256];
8925     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8926     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8927     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8928     PetscCall(MatView(coarse_mat,viewer));
8929     PetscCall(PetscViewerPopFormat(viewer));
8930     PetscCall(PetscViewerDestroy(&viewer));
8931   }
8932 #endif
8933 
8934   if (corners) {
8935     Vec             gv;
8936     IS              is;
8937     const PetscInt *idxs;
8938     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8939     PetscScalar    *coords;
8940 
8941     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8942     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8943     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8944     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8945     PetscCall(VecSetBlockSize(gv, cdim));
8946     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8947     PetscCall(VecSetType(gv, VECSTANDARD));
8948     PetscCall(VecSetFromOptions(gv));
8949     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8950 
8951     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8952     PetscCall(ISGetLocalSize(is, &n));
8953     PetscCall(ISGetIndices(is, &idxs));
8954     PetscCall(PetscMalloc1(n * cdim, &coords));
8955     for (i = 0; i < n; i++) {
8956       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8957     }
8958     PetscCall(ISRestoreIndices(is, &idxs));
8959     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8960 
8961     PetscCall(ISGetLocalSize(corners, &n));
8962     PetscCall(ISGetIndices(corners, &idxs));
8963     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8964     PetscCall(ISRestoreIndices(corners, &idxs));
8965     PetscCall(PetscFree(coords));
8966     PetscCall(VecAssemblyBegin(gv));
8967     PetscCall(VecAssemblyEnd(gv));
8968     PetscCall(VecGetArray(gv, &coords));
8969     if (pcbddc->coarse_ksp) {
8970       PC        coarse_pc;
8971       PetscBool isbddc;
8972 
8973       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8974       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8975       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8976         PetscReal *realcoords;
8977 
8978         PetscCall(VecGetLocalSize(gv, &n));
8979 #if defined(PETSC_USE_COMPLEX)
8980         PetscCall(PetscMalloc1(n, &realcoords));
8981         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8982 #else
8983         realcoords = coords;
8984 #endif
8985         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8986 #if defined(PETSC_USE_COMPLEX)
8987         PetscCall(PetscFree(realcoords));
8988 #endif
8989       }
8990     }
8991     PetscCall(VecRestoreArray(gv, &coords));
8992     PetscCall(VecDestroy(&gv));
8993   }
8994   PetscCall(ISDestroy(&corners));
8995 
8996   if (pcbddc->coarse_ksp) {
8997     Vec crhs, csol;
8998 
8999     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9000     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9001     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9002     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9003   }
9004   PetscCall(MatDestroy(&coarsedivudotp));
9005 
9006   /* compute null space for coarse solver if the benign trick has been requested */
9007   if (pcbddc->benign_null) {
9008     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9009     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9010     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9011     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9012     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9013     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9014     if (coarse_mat) {
9015       Vec          nullv;
9016       PetscScalar *array, *array2;
9017       PetscInt     nl;
9018 
9019       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9020       PetscCall(VecGetLocalSize(nullv, &nl));
9021       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9022       PetscCall(VecGetArray(nullv, &array2));
9023       PetscCall(PetscArraycpy(array2, array, nl));
9024       PetscCall(VecRestoreArray(nullv, &array2));
9025       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9026       PetscCall(VecNormalize(nullv, NULL));
9027       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9028       PetscCall(VecDestroy(&nullv));
9029     }
9030   }
9031   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9032 
9033   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9034   if (pcbddc->coarse_ksp) {
9035     PetscBool ispreonly;
9036 
9037     if (CoarseNullSpace) {
9038       PetscBool isnull;
9039 
9040       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9041       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9042       /* TODO: add local nullspaces (if any) */
9043     }
9044     /* setup coarse ksp */
9045     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9046     /* Check coarse problem if in debug mode or if solving with an iterative method */
9047     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9048     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9049       KSP         check_ksp;
9050       KSPType     check_ksp_type;
9051       PC          check_pc;
9052       Vec         check_vec, coarse_vec;
9053       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9054       PetscInt    its;
9055       PetscBool   compute_eigs;
9056       PetscReal  *eigs_r, *eigs_c;
9057       PetscInt    neigs;
9058       const char *prefix;
9059 
9060       /* Create ksp object suitable for estimation of extreme eigenvalues */
9061       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9062       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9063       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9064       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9065       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9066       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9067       /* prevent from setup unneeded object */
9068       PetscCall(KSPGetPC(check_ksp, &check_pc));
9069       PetscCall(PCSetType(check_pc, PCNONE));
9070       if (ispreonly) {
9071         check_ksp_type = KSPPREONLY;
9072         compute_eigs   = PETSC_FALSE;
9073       } else {
9074         check_ksp_type = KSPGMRES;
9075         compute_eigs   = PETSC_TRUE;
9076       }
9077       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9078       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9079       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9080       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9081       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9082       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9083       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9084       PetscCall(KSPSetFromOptions(check_ksp));
9085       PetscCall(KSPSetUp(check_ksp));
9086       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9087       PetscCall(KSPSetPC(check_ksp, check_pc));
9088       /* create random vec */
9089       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9090       PetscCall(VecSetRandom(check_vec, NULL));
9091       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9092       /* solve coarse problem */
9093       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9094       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9095       /* set eigenvalue estimation if preonly has not been requested */
9096       if (compute_eigs) {
9097         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9098         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9099         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9100         if (neigs) {
9101           lambda_max = eigs_r[neigs - 1];
9102           lambda_min = eigs_r[0];
9103           if (pcbddc->use_coarse_estimates) {
9104             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9105               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9106               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9107             }
9108           }
9109         }
9110       }
9111 
9112       /* check coarse problem residual error */
9113       if (pcbddc->dbg_flag) {
9114         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9115         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9116         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9117         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9118         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9119         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9120         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9121         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9122         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9123         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9124         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9125         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9126         if (compute_eigs) {
9127           PetscReal          lambda_max_s, lambda_min_s;
9128           KSPConvergedReason reason;
9129           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9130           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9131           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9132           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9133           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9134           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9135         }
9136         PetscCall(PetscViewerFlush(dbg_viewer));
9137         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9138       }
9139       PetscCall(VecDestroy(&check_vec));
9140       PetscCall(VecDestroy(&coarse_vec));
9141       PetscCall(KSPDestroy(&check_ksp));
9142       if (compute_eigs) {
9143         PetscCall(PetscFree(eigs_r));
9144         PetscCall(PetscFree(eigs_c));
9145       }
9146     }
9147   }
9148   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9149   /* print additional info */
9150   if (pcbddc->dbg_flag) {
9151     /* waits until all processes reaches this point */
9152     PetscCall(PetscBarrier((PetscObject)pc));
9153     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9154     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9155   }
9156 
9157   /* free memory */
9158   PetscCall(MatDestroy(&coarse_mat));
9159   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9160   PetscFunctionReturn(PETSC_SUCCESS);
9161 }
9162 
9163 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9164 {
9165   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9166   PC_IS          *pcis   = (PC_IS *)pc->data;
9167   IS              subset, subset_mult, subset_n;
9168   PetscInt        local_size, coarse_size = 0;
9169   PetscInt       *local_primal_indices = NULL;
9170   const PetscInt *t_local_primal_indices;
9171 
9172   PetscFunctionBegin;
9173   /* Compute global number of coarse dofs */
9174   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9175   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9176   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9177   PetscCall(ISDestroy(&subset_n));
9178   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9179   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9180   PetscCall(ISDestroy(&subset));
9181   PetscCall(ISDestroy(&subset_mult));
9182   PetscCall(ISGetLocalSize(subset_n, &local_size));
9183   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9184   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9185   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9186   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9187   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9188   PetscCall(ISDestroy(&subset_n));
9189 
9190   if (pcbddc->dbg_flag) {
9191     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9192     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9193     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9194     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9195   }
9196 
9197   /* get back data */
9198   *coarse_size_n          = coarse_size;
9199   *local_primal_indices_n = local_primal_indices;
9200   PetscFunctionReturn(PETSC_SUCCESS);
9201 }
9202 
9203 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9204 {
9205   IS           localis_t;
9206   PetscInt     i, lsize, *idxs, n;
9207   PetscScalar *vals;
9208 
9209   PetscFunctionBegin;
9210   /* get indices in local ordering exploiting local to global map */
9211   PetscCall(ISGetLocalSize(globalis, &lsize));
9212   PetscCall(PetscMalloc1(lsize, &vals));
9213   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9214   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9215   PetscCall(VecSet(gwork, 0.0));
9216   PetscCall(VecSet(lwork, 0.0));
9217   if (idxs) { /* multilevel guard */
9218     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9219     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9220   }
9221   PetscCall(VecAssemblyBegin(gwork));
9222   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9223   PetscCall(PetscFree(vals));
9224   PetscCall(VecAssemblyEnd(gwork));
9225   /* now compute set in local ordering */
9226   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9227   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9228   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9229   PetscCall(VecGetSize(lwork, &n));
9230   for (i = 0, lsize = 0; i < n; i++) {
9231     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9232   }
9233   PetscCall(PetscMalloc1(lsize, &idxs));
9234   for (i = 0, lsize = 0; i < n; i++) {
9235     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9236   }
9237   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9238   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9239   *localis = localis_t;
9240   PetscFunctionReturn(PETSC_SUCCESS);
9241 }
9242 
9243 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9244 {
9245   PC_IS   *pcis   = (PC_IS *)pc->data;
9246   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9247   PC_IS   *pcisf;
9248   PC_BDDC *pcbddcf;
9249   PC       pcf;
9250 
9251   PetscFunctionBegin;
9252   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9253   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9254   PetscCall(PCSetType(pcf, PCBDDC));
9255 
9256   pcisf   = (PC_IS *)pcf->data;
9257   pcbddcf = (PC_BDDC *)pcf->data;
9258 
9259   pcisf->is_B_local = pcis->is_B_local;
9260   pcisf->vec1_N     = pcis->vec1_N;
9261   pcisf->BtoNmap    = pcis->BtoNmap;
9262   pcisf->n          = pcis->n;
9263   pcisf->n_B        = pcis->n_B;
9264 
9265   PetscCall(PetscFree(pcbddcf->mat_graph));
9266   PetscCall(PetscFree(pcbddcf->sub_schurs));
9267   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9268   pcbddcf->sub_schurs            = schurs;
9269   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9270   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9271   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9272   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9273   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9274   pcbddcf->use_faces             = PETSC_TRUE;
9275   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9276   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9277   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9278   pcbddcf->fake_change           = PETSC_TRUE;
9279   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9280 
9281   PetscCall(PCBDDCAdaptiveSelection(pcf));
9282   PetscCall(PCBDDCConstraintsSetUp(pcf));
9283 
9284   *change = pcbddcf->ConstraintMatrix;
9285   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9286   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9287   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9288 
9289   if (schurs) pcbddcf->sub_schurs = NULL;
9290   pcbddcf->ConstraintMatrix = NULL;
9291   pcbddcf->mat_graph        = NULL;
9292   pcisf->is_B_local         = NULL;
9293   pcisf->vec1_N             = NULL;
9294   pcisf->BtoNmap            = NULL;
9295   PetscCall(PCDestroy(&pcf));
9296   PetscFunctionReturn(PETSC_SUCCESS);
9297 }
9298 
9299 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9300 {
9301   PC_IS          *pcis       = (PC_IS *)pc->data;
9302   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9303   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9304   Mat             S_j;
9305   PetscInt       *used_xadj, *used_adjncy;
9306   PetscBool       free_used_adj;
9307 
9308   PetscFunctionBegin;
9309   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9310   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9311   free_used_adj = PETSC_FALSE;
9312   if (pcbddc->sub_schurs_layers == -1) {
9313     used_xadj   = NULL;
9314     used_adjncy = NULL;
9315   } else {
9316     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9317       used_xadj   = pcbddc->mat_graph->xadj;
9318       used_adjncy = pcbddc->mat_graph->adjncy;
9319     } else if (pcbddc->computed_rowadj) {
9320       used_xadj   = pcbddc->mat_graph->xadj;
9321       used_adjncy = pcbddc->mat_graph->adjncy;
9322     } else {
9323       PetscBool       flg_row = PETSC_FALSE;
9324       const PetscInt *xadj, *adjncy;
9325       PetscInt        nvtxs;
9326 
9327       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9328       if (flg_row) {
9329         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9330         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9331         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9332         free_used_adj = PETSC_TRUE;
9333       } else {
9334         pcbddc->sub_schurs_layers = -1;
9335         used_xadj                 = NULL;
9336         used_adjncy               = NULL;
9337       }
9338       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9339     }
9340   }
9341 
9342   /* setup sub_schurs data */
9343   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9344   if (!sub_schurs->schur_explicit) {
9345     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9346     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9347     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9348   } else {
9349     Mat       change        = NULL;
9350     Vec       scaling       = NULL;
9351     IS        change_primal = NULL, iP;
9352     PetscInt  benign_n;
9353     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9354     PetscBool need_change       = PETSC_FALSE;
9355     PetscBool discrete_harmonic = PETSC_FALSE;
9356 
9357     if (!pcbddc->use_vertices && reuse_solvers) {
9358       PetscInt n_vertices;
9359 
9360       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9361       reuse_solvers = (PetscBool)!n_vertices;
9362     }
9363     if (!pcbddc->benign_change_explicit) {
9364       benign_n = pcbddc->benign_n;
9365     } else {
9366       benign_n = 0;
9367     }
9368     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9369        We need a global reduction to avoid possible deadlocks.
9370        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9371     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9372       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9373       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9374       need_change = (PetscBool)(!need_change);
9375     }
9376     /* If the user defines additional constraints, we import them here */
9377     if (need_change) {
9378       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9379       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9380     }
9381     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9382 
9383     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9384     if (iP) {
9385       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9386       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9387       PetscOptionsEnd();
9388     }
9389     if (discrete_harmonic) {
9390       Mat A;
9391       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9392       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9393       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9394       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9395                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9396       PetscCall(MatDestroy(&A));
9397     } else {
9398       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9399                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9400     }
9401     PetscCall(MatDestroy(&change));
9402     PetscCall(ISDestroy(&change_primal));
9403   }
9404   PetscCall(MatDestroy(&S_j));
9405 
9406   /* free adjacency */
9407   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9408   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9409   PetscFunctionReturn(PETSC_SUCCESS);
9410 }
9411 
9412 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9413 {
9414   PC_IS      *pcis   = (PC_IS *)pc->data;
9415   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9416   PCBDDCGraph graph;
9417 
9418   PetscFunctionBegin;
9419   /* attach interface graph for determining subsets */
9420   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9421     IS       verticesIS, verticescomm;
9422     PetscInt vsize, *idxs;
9423 
9424     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9425     PetscCall(ISGetSize(verticesIS, &vsize));
9426     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9427     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9428     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9429     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9430     PetscCall(PCBDDCGraphCreate(&graph));
9431     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9432     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9433     PetscCall(ISDestroy(&verticescomm));
9434     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9435   } else {
9436     graph = pcbddc->mat_graph;
9437   }
9438   /* print some info */
9439   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9440     IS       vertices;
9441     PetscInt nv, nedges, nfaces;
9442     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9443     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9444     PetscCall(ISGetSize(vertices, &nv));
9445     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9446     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9447     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9448     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9449     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9450     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9451     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9452     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9453   }
9454 
9455   /* sub_schurs init */
9456   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9457   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9458 
9459   /* free graph struct */
9460   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9461   PetscFunctionReturn(PETSC_SUCCESS);
9462 }
9463 
9464 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9465 {
9466   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9467   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9468   const PetscInt *idxs;
9469   IS              gis;
9470 
9471   PetscFunctionBegin;
9472   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9473   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9474   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9475   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9476   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9477   PetscCall(ISGetLocalSize(is, &ni));
9478   PetscCall(ISGetIndices(is, &idxs));
9479   for (PetscInt i = 0; i < ni; i++) {
9480     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9481     matis->sf_leafdata[idxs[i]] = 1;
9482   }
9483   PetscCall(ISRestoreIndices(is, &idxs));
9484   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9485   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9486   ln = 0;
9487   for (PetscInt i = 0; i < n; i++) {
9488     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9489   }
9490   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9491   PetscCall(ISView(gis, viewer));
9492   PetscCall(ISDestroy(&gis));
9493   PetscFunctionReturn(PETSC_SUCCESS);
9494 }
9495 
9496 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9497 {
9498   PetscInt    header[11];
9499   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9500   PetscViewer viewer;
9501   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9502 
9503   PetscFunctionBegin;
9504   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9505   if (load) {
9506     IS  is;
9507     Mat A;
9508 
9509     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9510     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9511     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9512     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9513     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9514     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9515     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9516     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9517     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9518     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9519     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9520     if (header[0]) {
9521       PetscCall(ISCreate(comm, &is));
9522       PetscCall(ISLoad(is, viewer));
9523       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9524       PetscCall(ISDestroy(&is));
9525     }
9526     if (header[1]) {
9527       PetscCall(ISCreate(comm, &is));
9528       PetscCall(ISLoad(is, viewer));
9529       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9530       PetscCall(ISDestroy(&is));
9531     }
9532     if (header[2]) {
9533       IS *isarray;
9534 
9535       PetscCall(PetscMalloc1(header[2], &isarray));
9536       for (PetscInt i = 0; i < header[2]; i++) {
9537         PetscCall(ISCreate(comm, &isarray[i]));
9538         PetscCall(ISLoad(isarray[i], viewer));
9539       }
9540       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9541       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9542       PetscCall(PetscFree(isarray));
9543     }
9544     if (header[3]) {
9545       PetscCall(ISCreate(comm, &is));
9546       PetscCall(ISLoad(is, viewer));
9547       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9548       PetscCall(ISDestroy(&is));
9549     }
9550     if (header[4]) {
9551       PetscCall(MatCreate(comm, &A));
9552       PetscCall(MatSetType(A, MATAIJ));
9553       PetscCall(MatLoad(A, viewer));
9554       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9555       PetscCall(MatDestroy(&A));
9556     }
9557     if (header[9]) {
9558       PetscCall(MatCreate(comm, &A));
9559       PetscCall(MatSetType(A, MATIS));
9560       PetscCall(MatLoad(A, viewer));
9561       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9562       PetscCall(MatDestroy(&A));
9563     }
9564   } else {
9565     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9566     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9567     header[2]  = pcbddc->n_ISForDofsLocal;
9568     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9569     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9570     header[5]  = pcbddc->nedorder;
9571     header[6]  = pcbddc->nedfield;
9572     header[7]  = (PetscInt)pcbddc->nedglobal;
9573     header[8]  = (PetscInt)pcbddc->conforming;
9574     header[9]  = (PetscInt)!!pcbddc->divudotp;
9575     header[10] = (PetscInt)pcbddc->divudotp_trans;
9576     if (header[4]) header[3] = 0;
9577 
9578     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9579     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9580     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9581     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9582     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9583     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9584     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9585   }
9586   PetscCall(PetscViewerDestroy(&viewer));
9587   PetscFunctionReturn(PETSC_SUCCESS);
9588 }
9589 
9590 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9591 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9592 {
9593   Mat         At;
9594   IS          rows;
9595   PetscInt    rst, ren;
9596   PetscLayout rmap;
9597 
9598   PetscFunctionBegin;
9599   rst = ren = 0;
9600   if (ccomm != MPI_COMM_NULL) {
9601     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9602     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9603     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9604     PetscCall(PetscLayoutSetUp(rmap));
9605     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9606   }
9607   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9608   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9609   PetscCall(ISDestroy(&rows));
9610 
9611   if (ccomm != MPI_COMM_NULL) {
9612     Mat_MPIAIJ *a, *b;
9613     IS          from, to;
9614     Vec         gvec;
9615     PetscInt    lsize;
9616 
9617     PetscCall(MatCreate(ccomm, B));
9618     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9619     PetscCall(MatSetType(*B, MATAIJ));
9620     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9621     PetscCall(PetscLayoutSetUp((*B)->cmap));
9622     a = (Mat_MPIAIJ *)At->data;
9623     b = (Mat_MPIAIJ *)(*B)->data;
9624     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9625     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9626     PetscCall(PetscObjectReference((PetscObject)a->A));
9627     PetscCall(PetscObjectReference((PetscObject)a->B));
9628     b->A = a->A;
9629     b->B = a->B;
9630 
9631     b->donotstash   = a->donotstash;
9632     b->roworiented  = a->roworiented;
9633     b->rowindices   = NULL;
9634     b->rowvalues    = NULL;
9635     b->getrowactive = PETSC_FALSE;
9636 
9637     (*B)->rmap         = rmap;
9638     (*B)->factortype   = A->factortype;
9639     (*B)->assembled    = PETSC_TRUE;
9640     (*B)->insertmode   = NOT_SET_VALUES;
9641     (*B)->preallocated = PETSC_TRUE;
9642 
9643     if (a->colmap) {
9644 #if defined(PETSC_USE_CTABLE)
9645       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9646 #else
9647       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9648       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9649 #endif
9650     } else b->colmap = NULL;
9651     if (a->garray) {
9652       PetscInt len;
9653       len = a->B->cmap->n;
9654       PetscCall(PetscMalloc1(len + 1, &b->garray));
9655       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9656     } else b->garray = NULL;
9657 
9658     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9659     b->lvec = a->lvec;
9660 
9661     /* cannot use VecScatterCopy */
9662     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9663     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9664     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9665     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9666     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9667     PetscCall(ISDestroy(&from));
9668     PetscCall(ISDestroy(&to));
9669     PetscCall(VecDestroy(&gvec));
9670   }
9671   PetscCall(MatDestroy(&At));
9672   PetscFunctionReturn(PETSC_SUCCESS);
9673 }
9674 
9675 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9676 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9677 {
9678   PetscBool isaij;
9679   MPI_Comm  comm;
9680 
9681   PetscFunctionBegin;
9682   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9683   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9684   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9685   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9686   if (isaij) { /* SeqAIJ supports repeated rows */
9687     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9688   } else {
9689     Mat                A_loc;
9690     Mat_SeqAIJ        *da;
9691     PetscSF            sf;
9692     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9693     PetscScalar       *daa;
9694     const PetscInt    *idxs;
9695     const PetscSFNode *iremotes;
9696     PetscSFNode       *remotes;
9697 
9698     /* SF for incoming rows */
9699     PetscCall(PetscSFCreate(comm, &sf));
9700     PetscCall(ISGetLocalSize(rows, &ni));
9701     PetscCall(ISGetIndices(rows, &idxs));
9702     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9703     PetscCall(ISRestoreIndices(rows, &idxs));
9704 
9705     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9706     da = (Mat_SeqAIJ *)A_loc->data;
9707     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9708     for (PetscInt i = 0; i < m; i++) {
9709       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9710       rdata[2 * i + 1] = da->i[i];
9711     }
9712     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9713     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9714     PetscCall(PetscMalloc1(ni + 1, &di));
9715     di[0] = 0;
9716     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9717     PetscCall(PetscMalloc1(di[ni], &dj));
9718     PetscCall(PetscMalloc1(di[ni], &daa));
9719     PetscCall(PetscMalloc1(di[ni], &remotes));
9720 
9721     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9722 
9723     /* SF graph for nonzeros */
9724     c = 0;
9725     for (PetscInt i = 0; i < ni; i++) {
9726       const PetscInt rank  = iremotes[i].rank;
9727       const PetscInt rsize = ldata[2 * i];
9728       for (PetscInt j = 0; j < rsize; j++) {
9729         remotes[c].rank  = rank;
9730         remotes[c].index = ldata[2 * i + 1] + j;
9731         c++;
9732       }
9733     }
9734     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9735     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9736     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9737     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9738     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9739     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9740 
9741     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9742     PetscCall(MatDestroy(&A_loc));
9743     PetscCall(PetscSFDestroy(&sf));
9744     PetscCall(PetscFree(di));
9745     PetscCall(PetscFree(dj));
9746     PetscCall(PetscFree(daa));
9747     PetscCall(PetscFree(remotes));
9748     PetscCall(PetscFree2(ldata, rdata));
9749   }
9750   PetscFunctionReturn(PETSC_SUCCESS);
9751 }
9752