xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 732a5147dd1d45c58175f9f4a7717b4547ac76a2)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (no preallocation) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1334   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1335   PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
1336   //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1337   //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1338   //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1339 
1340   /* Defaults to identity */
1341   {
1342     Vec                w;
1343     const PetscScalar *wa;
1344 
1345     PetscCall(MatCreateVecs(T, &w, NULL));
1346     PetscCall(VecSetLocalToGlobalMapping(w, al2g));
1347     PetscCall(VecSet(w, 1.0));
1348     for (i = 0; i < nee; i++) {
1349       const PetscInt *idxs;
1350       PetscInt        nl;
1351 
1352       PetscCall(ISGetLocalSize(eedges[i], &nl));
1353       PetscCall(ISGetIndices(eedges[i], &idxs));
1354       PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES));
1355       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1356     }
1357     PetscCall(VecAssemblyBegin(w));
1358     PetscCall(VecAssemblyEnd(w));
1359     PetscCall(VecGetArrayRead(w, &wa));
1360     for (i = T->rmap->rstart; i < T->rmap->rend; i++)
1361       if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1362     PetscCall(VecRestoreArrayRead(w, &wa));
1363     PetscCall(VecDestroy(&w));
1364   }
1365 
1366   /* Create discrete gradient for the coarser level if needed */
1367   PetscCall(MatDestroy(&pcbddc->nedcG));
1368   PetscCall(ISDestroy(&pcbddc->nedclocal));
1369   if (pcbddc->current_level < pcbddc->max_levels) {
1370     ISLocalToGlobalMapping cel2g, cvl2g;
1371     IS                     wis, gwis;
1372     PetscInt               cnv, cne;
1373 
1374     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1375     if (fl2g) {
1376       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1377     } else {
1378       PetscCall(PetscObjectReference((PetscObject)wis));
1379       pcbddc->nedclocal = wis;
1380     }
1381     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1382     PetscCall(ISDestroy(&wis));
1383     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1384     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1385     PetscCall(ISDestroy(&wis));
1386     PetscCall(ISDestroy(&gwis));
1387 
1388     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1389     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1390     PetscCall(ISDestroy(&wis));
1391     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1392     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1393     PetscCall(ISDestroy(&wis));
1394     PetscCall(ISDestroy(&gwis));
1395 
1396     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1397     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1398     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1399     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1400     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1401     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1402     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1403     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1404   }
1405   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1406 
1407 #if defined(PRINT_GDET)
1408   inc = 0;
1409   lev = pcbddc->current_level;
1410 #endif
1411 
1412   /* Insert values in the change of basis matrix */
1413   for (i = 0; i < nee; i++) {
1414     Mat         Gins = NULL, GKins = NULL;
1415     IS          cornersis = NULL;
1416     PetscScalar cvals[2];
1417 
1418     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1419     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1420     if (Gins && GKins) {
1421       const PetscScalar *data;
1422       const PetscInt    *rows, *cols;
1423       PetscInt           nrh, nch, nrc, ncc;
1424 
1425       PetscCall(ISGetIndices(eedges[i], &cols));
1426       /* H1 */
1427       PetscCall(ISGetIndices(extrows[i], &rows));
1428       PetscCall(MatGetSize(Gins, &nrh, &nch));
1429       PetscCall(MatDenseGetArrayRead(Gins, &data));
1430       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1431       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1432       PetscCall(ISRestoreIndices(extrows[i], &rows));
1433       /* complement */
1434       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1435       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1436       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1437       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1438       PetscCall(MatDenseGetArrayRead(GKins, &data));
1439       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1440       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1441 
1442       /* coarse discrete gradient */
1443       if (pcbddc->nedcG) {
1444         PetscInt cols[2];
1445 
1446         cols[0] = 2 * i;
1447         cols[1] = 2 * i + 1;
1448         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1449       }
1450       PetscCall(ISRestoreIndices(eedges[i], &cols));
1451     }
1452     PetscCall(ISDestroy(&extrows[i]));
1453     PetscCall(ISDestroy(&extcols[i]));
1454     PetscCall(ISDestroy(&cornersis));
1455     PetscCall(MatDestroy(&Gins));
1456     PetscCall(MatDestroy(&GKins));
1457   }
1458 
1459   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1460   if (elements_corners && pcbddc->mat_graph->multi_element) {
1461     MatNullSpace nnsp;
1462     Vec          quad_vec;
1463 
1464     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1465     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1466     PetscCall(VecLockReadPop(quad_vec));
1467     PetscCall(VecSetLocalToGlobalMapping(quad_vec, al2g));
1468     for (i = 0; i < nee; i++) {
1469       const PetscInt *idxs;
1470       PetscScalar     one = 1.0;
1471 
1472       PetscCall(ISGetLocalSize(eedges[i], &cum));
1473       if (!cum) continue;
1474       PetscCall(ISGetIndices(eedges[i], &idxs));
1475       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1476       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1477     }
1478     PetscCall(VecLockReadPush(quad_vec));
1479     PetscCall(VecDestroy(&quad_vec));
1480     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1481     PetscCall(MatNullSpaceDestroy(&nnsp));
1482   }
1483   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1484   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1485 
1486   /* Start assembling */
1487   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1488   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1489 
1490   /* Free */
1491   if (fl2g) {
1492     PetscCall(ISDestroy(&primals));
1493     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1494     PetscCall(PetscFree(eedges));
1495   }
1496 
1497   /* hack mat_graph with primal dofs on the coarse edges */
1498   {
1499     PCBDDCGraph graph  = pcbddc->mat_graph;
1500     PetscInt   *oqueue = graph->queue;
1501     PetscInt   *ocptr  = graph->cptr;
1502     PetscInt    ncc, *idxs;
1503 
1504     /* find first primal edge */
1505     if (pcbddc->nedclocal) {
1506       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1507     } else {
1508       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1509       idxs = cedges;
1510     }
1511     cum = 0;
1512     while (cum < nee && cedges[cum] < 0) cum++;
1513 
1514     /* adapt connected components */
1515     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1516     graph->cptr[0] = 0;
1517     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1518       PetscInt lc = ocptr[i + 1] - ocptr[i];
1519       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1520         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1521         graph->queue[graph->cptr[ncc]] = cedges[cum];
1522         ncc++;
1523         lc--;
1524         cum++;
1525         while (cum < nee && cedges[cum] < 0) cum++;
1526       }
1527       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1528       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1529       ncc++;
1530     }
1531     graph->ncc = ncc;
1532     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1533     PetscCall(PetscFree2(ocptr, oqueue));
1534   }
1535   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1536   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1537   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1538 
1539   PetscCall(ISDestroy(&nedfieldlocal));
1540   PetscCall(PetscFree(extrow));
1541   PetscCall(PetscFree2(work, rwork));
1542   PetscCall(PetscFree(corners));
1543   PetscCall(PetscFree(cedges));
1544   PetscCall(PetscFree(extrows));
1545   PetscCall(PetscFree(extcols));
1546   PetscCall(MatDestroy(&lG));
1547 
1548   /* Complete assembling */
1549   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1550   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1551   if (pcbddc->nedcG) {
1552     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1553     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1554   }
1555 
1556   PetscCall(ISDestroy(&elements_corners));
1557 
1558   /* set change of basis */
1559   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1560   PetscCall(MatDestroy(&T));
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 /* the near-null space of BDDC carries information on quadrature weights,
1565    and these can be collinear -> so cheat with MatNullSpaceCreate
1566    and create a suitable set of basis vectors first */
1567 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1568 {
1569   PetscInt i;
1570 
1571   PetscFunctionBegin;
1572   for (i = 0; i < nvecs; i++) {
1573     PetscInt first, last;
1574 
1575     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1576     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1577     if (i >= first && i < last) {
1578       PetscScalar *data;
1579       PetscCall(VecGetArray(quad_vecs[i], &data));
1580       if (!has_const) {
1581         data[i - first] = 1.;
1582       } else {
1583         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1584         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1585       }
1586       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1587     }
1588     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1589   }
1590   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1591   for (i = 0; i < nvecs; i++) { /* reset vectors */
1592     PetscInt first, last;
1593     PetscCall(VecLockReadPop(quad_vecs[i]));
1594     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1595     if (i >= first && i < last) {
1596       PetscScalar *data;
1597       PetscCall(VecGetArray(quad_vecs[i], &data));
1598       if (!has_const) {
1599         data[i - first] = 0.;
1600       } else {
1601         data[2 * i - first]     = 0.;
1602         data[2 * i - first + 1] = 0.;
1603       }
1604       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1605     }
1606     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1607     PetscCall(VecLockReadPush(quad_vecs[i]));
1608   }
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1613 {
1614   Mat                    loc_divudotp;
1615   Vec                    p, v, quad_vec;
1616   ISLocalToGlobalMapping map;
1617   PetscScalar           *array;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1621   if (!transpose) {
1622     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1623   } else {
1624     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1625   }
1626   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1627   PetscCall(VecLockReadPop(quad_vec));
1628   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1629 
1630   /* compute local quad vec */
1631   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1632   if (!transpose) {
1633     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1634   } else {
1635     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1636   }
1637   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1638   PetscCall(VecSet(p, 1.));
1639   if (!transpose) {
1640     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1641   } else {
1642     PetscCall(MatMult(loc_divudotp, p, v));
1643   }
1644   PetscCall(VecDestroy(&p));
1645   if (vl2l) {
1646     Mat        lA;
1647     VecScatter sc;
1648     Vec        vins;
1649 
1650     PetscCall(MatISGetLocalMat(A, &lA));
1651     PetscCall(MatCreateVecs(lA, &vins, NULL));
1652     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1653     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1654     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1655     PetscCall(VecScatterDestroy(&sc));
1656     PetscCall(VecDestroy(&v));
1657     v = vins;
1658   }
1659 
1660   /* mask summation of interface values */
1661   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1662   const PetscInt *degree;
1663   PetscSF         msf;
1664 
1665   PetscCall(VecGetLocalSize(v, &n));
1666   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1667   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1668   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1669   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1670   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1671   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1672   for (PetscInt i = 0, c = 0; i < nr; i++) {
1673     mmask[c] = 1;
1674     c += degree[i];
1675   }
1676   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1677   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1678   PetscCall(VecGetArray(v, &array));
1679   for (PetscInt i = 0; i < n; i++) {
1680     array[i] *= mask[i];
1681     idxs[i] = i;
1682   }
1683   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1684   PetscCall(VecRestoreArray(v, &array));
1685   PetscCall(PetscFree3(mmask, mask, idxs));
1686   PetscCall(VecDestroy(&v));
1687   PetscCall(VecAssemblyBegin(quad_vec));
1688   PetscCall(VecAssemblyEnd(quad_vec));
1689   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1690   PetscCall(VecLockReadPush(quad_vec));
1691   PetscCall(VecDestroy(&quad_vec));
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1696 {
1697   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1698 
1699   PetscFunctionBegin;
1700   if (primalv) {
1701     if (pcbddc->user_primal_vertices_local) {
1702       IS list[2], newp;
1703 
1704       list[0] = primalv;
1705       list[1] = pcbddc->user_primal_vertices_local;
1706       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1707       PetscCall(ISSortRemoveDups(newp));
1708       PetscCall(ISDestroy(&list[1]));
1709       pcbddc->user_primal_vertices_local = newp;
1710     } else {
1711       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1712     }
1713   }
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1718 {
1719   PetscInt f, *comp = (PetscInt *)ctx;
1720 
1721   PetscFunctionBegin;
1722   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1723   PetscFunctionReturn(PETSC_SUCCESS);
1724 }
1725 
1726 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1727 {
1728   Vec       local, global;
1729   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1730   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1731   PetscBool monolithic = PETSC_FALSE;
1732 
1733   PetscFunctionBegin;
1734   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1735   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1736   PetscOptionsEnd();
1737   /* need to convert from global to local topology information and remove references to information in global ordering */
1738   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1739   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1740   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1741   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1742   if (monolithic) { /* just get block size to properly compute vertices */
1743     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1744     goto boundary;
1745   }
1746 
1747   if (pcbddc->user_provided_isfordofs) {
1748     if (pcbddc->n_ISForDofs) {
1749       PetscInt i;
1750 
1751       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1752       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1753         PetscInt bs;
1754 
1755         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1756         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1757         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1758         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1759       }
1760       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1761       pcbddc->n_ISForDofs      = 0;
1762       PetscCall(PetscFree(pcbddc->ISForDofs));
1763     }
1764   } else {
1765     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1766       DM dm;
1767 
1768       PetscCall(MatGetDM(pc->pmat, &dm));
1769       if (!dm) PetscCall(PCGetDM(pc, &dm));
1770       if (dm) {
1771         IS      *fields;
1772         PetscInt nf, i;
1773 
1774         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1775         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1776         for (i = 0; i < nf; i++) {
1777           PetscInt bs;
1778 
1779           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1780           PetscCall(ISGetBlockSize(fields[i], &bs));
1781           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1782           PetscCall(ISDestroy(&fields[i]));
1783         }
1784         PetscCall(PetscFree(fields));
1785         pcbddc->n_ISForDofsLocal = nf;
1786       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1787         PetscContainer c;
1788 
1789         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1790         if (c) {
1791           MatISLocalFields lf;
1792           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1793           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1794         } else { /* fallback, create the default fields if bs > 1 */
1795           PetscInt i, n = matis->A->rmap->n;
1796           PetscCall(MatGetBlockSize(pc->pmat, &i));
1797           if (i > 1) {
1798             pcbddc->n_ISForDofsLocal = i;
1799             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1800             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1801           }
1802         }
1803       }
1804     } else {
1805       PetscInt i;
1806       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1807     }
1808   }
1809 
1810 boundary:
1811   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1812     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1813   } else if (pcbddc->DirichletBoundariesLocal) {
1814     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1815   }
1816   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1817     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1818   } else if (pcbddc->NeumannBoundariesLocal) {
1819     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1820   }
1821   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1822   PetscCall(VecDestroy(&global));
1823   PetscCall(VecDestroy(&local));
1824   /* detect local disconnected subdomains if requested or needed */
1825   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1826     IS        primalv = NULL;
1827     PetscInt  nel;
1828     PetscBool filter = pcbddc->detect_disconnected_filter;
1829 
1830     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1831     PetscCall(PetscFree(pcbddc->local_subs));
1832     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1833     if (matis->allow_repeated && nel) {
1834       const PetscInt *elsizes;
1835 
1836       pcbddc->n_local_subs = nel;
1837       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1838       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1839       for (PetscInt i = 0, c = 0; i < nel; i++) {
1840         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1841         c += elsizes[i];
1842       }
1843     } else {
1844       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1845     }
1846     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1847     PetscCall(ISDestroy(&primalv));
1848   }
1849   /* early stage corner detection */
1850   {
1851     DM dm;
1852 
1853     PetscCall(MatGetDM(pc->pmat, &dm));
1854     if (!dm) PetscCall(PCGetDM(pc, &dm));
1855     if (dm) {
1856       PetscBool isda;
1857 
1858       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1859       if (isda) {
1860         ISLocalToGlobalMapping l2l;
1861         IS                     corners;
1862         Mat                    lA;
1863         PetscBool              gl, lo;
1864 
1865         {
1866           Vec                cvec;
1867           const PetscScalar *coords;
1868           PetscInt           dof, n, cdim;
1869           PetscBool          memc = PETSC_TRUE;
1870 
1871           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1872           PetscCall(DMGetCoordinates(dm, &cvec));
1873           PetscCall(VecGetLocalSize(cvec, &n));
1874           PetscCall(VecGetBlockSize(cvec, &cdim));
1875           n /= cdim;
1876           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1877           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1878           PetscCall(VecGetArrayRead(cvec, &coords));
1879 #if defined(PETSC_USE_COMPLEX)
1880           memc = PETSC_FALSE;
1881 #endif
1882           if (dof != 1) memc = PETSC_FALSE;
1883           if (memc) {
1884             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1885           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1886             PetscReal *bcoords = pcbddc->mat_graph->coords;
1887             PetscInt   i, b, d;
1888 
1889             for (i = 0; i < n; i++) {
1890               for (b = 0; b < dof; b++) {
1891                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1892               }
1893             }
1894           }
1895           PetscCall(VecRestoreArrayRead(cvec, &coords));
1896           pcbddc->mat_graph->cdim  = cdim;
1897           pcbddc->mat_graph->cnloc = dof * n;
1898           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1899         }
1900         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1901         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1902         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1903         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1904         lo = (PetscBool)(l2l && corners);
1905         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1906         if (gl) { /* From PETSc's DMDA */
1907           const PetscInt *idx;
1908           PetscInt        dof, bs, *idxout, n;
1909 
1910           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1911           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1912           PetscCall(ISGetLocalSize(corners, &n));
1913           PetscCall(ISGetIndices(corners, &idx));
1914           if (bs == dof) {
1915             PetscCall(PetscMalloc1(n, &idxout));
1916             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1917           } else { /* the original DMDA local-to-local map have been modified */
1918             PetscInt i, d;
1919 
1920             PetscCall(PetscMalloc1(dof * n, &idxout));
1921             for (i = 0; i < n; i++)
1922               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1923             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1924 
1925             bs = 1;
1926             n *= dof;
1927           }
1928           PetscCall(ISRestoreIndices(corners, &idx));
1929           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1930           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1931           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1932           PetscCall(ISDestroy(&corners));
1933           pcbddc->corner_selected  = PETSC_TRUE;
1934           pcbddc->corner_selection = PETSC_TRUE;
1935         }
1936         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1937       }
1938     }
1939   }
1940   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1941     DM dm;
1942 
1943     PetscCall(MatGetDM(pc->pmat, &dm));
1944     if (!dm) PetscCall(PCGetDM(pc, &dm));
1945     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1946       Vec          vcoords;
1947       PetscSection section;
1948       PetscReal   *coords;
1949       PetscInt     d, cdim, nl, nf, **ctxs;
1950       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1951       /* debug coordinates */
1952       PetscViewer       viewer;
1953       PetscBool         flg;
1954       PetscViewerFormat format;
1955       const char       *prefix;
1956 
1957       PetscCall(DMGetCoordinateDim(dm, &cdim));
1958       PetscCall(DMGetLocalSection(dm, &section));
1959       PetscCall(PetscSectionGetNumFields(section, &nf));
1960       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1961       PetscCall(VecGetLocalSize(vcoords, &nl));
1962       PetscCall(PetscMalloc1(nl * cdim, &coords));
1963       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1964       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1965       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1966       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1967 
1968       /* debug coordinates */
1969       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1970       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1971       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1972       for (d = 0; d < cdim; d++) {
1973         PetscInt           i;
1974         const PetscScalar *v;
1975         char               name[16];
1976 
1977         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1978         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
1979         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1980         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1981         if (flg) PetscCall(VecView(vcoords, viewer));
1982         PetscCall(VecGetArrayRead(vcoords, &v));
1983         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1984         PetscCall(VecRestoreArrayRead(vcoords, &v));
1985       }
1986       PetscCall(VecDestroy(&vcoords));
1987       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1988       PetscCall(PetscFree(coords));
1989       PetscCall(PetscFree(ctxs[0]));
1990       PetscCall(PetscFree2(funcs, ctxs));
1991       if (flg) {
1992         PetscCall(PetscViewerPopFormat(viewer));
1993         PetscCall(PetscViewerDestroy(&viewer));
1994       }
1995     }
1996   }
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
2001 {
2002   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
2003   IS              nis;
2004   const PetscInt *idxs;
2005   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
2006 
2007   PetscFunctionBegin;
2008   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
2009   if (mop == MPI_LAND) {
2010     /* init rootdata with true */
2011     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
2012   } else {
2013     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
2014   }
2015   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
2016   PetscCall(ISGetLocalSize(*is, &nd));
2017   PetscCall(ISGetIndices(*is, &idxs));
2018   for (i = 0; i < nd; i++)
2019     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
2020   PetscCall(ISRestoreIndices(*is, &idxs));
2021   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2022   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2023   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2024   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2025   if (mop == MPI_LAND) {
2026     PetscCall(PetscMalloc1(nd, &nidxs));
2027   } else {
2028     PetscCall(PetscMalloc1(n, &nidxs));
2029   }
2030   for (i = 0, nnd = 0; i < n; i++)
2031     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2032   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2033   PetscCall(ISDestroy(is));
2034   *is = nis;
2035   PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037 
2038 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2039 {
2040   PC_IS   *pcis   = (PC_IS *)pc->data;
2041   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2042 
2043   PetscFunctionBegin;
2044   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2045   if (pcbddc->ChangeOfBasisMatrix) {
2046     Vec swap;
2047 
2048     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2049     swap                = pcbddc->work_change;
2050     pcbddc->work_change = r;
2051     r                   = swap;
2052   }
2053   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2054   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2055   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2056   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2057   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2058   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2059   PetscCall(VecSet(z, 0.));
2060   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2061   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2062   if (pcbddc->ChangeOfBasisMatrix) {
2063     pcbddc->work_change = r;
2064     PetscCall(VecCopy(z, pcbddc->work_change));
2065     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2071 {
2072   PCBDDCBenignMatMult_ctx ctx;
2073   PetscBool               apply_right, apply_left, reset_x;
2074 
2075   PetscFunctionBegin;
2076   PetscCall(MatShellGetContext(A, &ctx));
2077   if (transpose) {
2078     apply_right = ctx->apply_left;
2079     apply_left  = ctx->apply_right;
2080   } else {
2081     apply_right = ctx->apply_right;
2082     apply_left  = ctx->apply_left;
2083   }
2084   reset_x = PETSC_FALSE;
2085   if (apply_right) {
2086     const PetscScalar *ax;
2087     PetscInt           nl, i;
2088 
2089     PetscCall(VecGetLocalSize(x, &nl));
2090     PetscCall(VecGetArrayRead(x, &ax));
2091     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2092     PetscCall(VecRestoreArrayRead(x, &ax));
2093     for (i = 0; i < ctx->benign_n; i++) {
2094       PetscScalar     sum, val;
2095       const PetscInt *idxs;
2096       PetscInt        nz, j;
2097       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2098       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2099       sum = 0.;
2100       if (ctx->apply_p0) {
2101         val = ctx->work[idxs[nz - 1]];
2102         for (j = 0; j < nz - 1; j++) {
2103           sum += ctx->work[idxs[j]];
2104           ctx->work[idxs[j]] += val;
2105         }
2106       } else {
2107         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2108       }
2109       ctx->work[idxs[nz - 1]] -= sum;
2110       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2111     }
2112     PetscCall(VecPlaceArray(x, ctx->work));
2113     reset_x = PETSC_TRUE;
2114   }
2115   if (transpose) {
2116     PetscCall(MatMultTranspose(ctx->A, x, y));
2117   } else {
2118     PetscCall(MatMult(ctx->A, x, y));
2119   }
2120   if (reset_x) PetscCall(VecResetArray(x));
2121   if (apply_left) {
2122     PetscScalar *ay;
2123     PetscInt     i;
2124 
2125     PetscCall(VecGetArray(y, &ay));
2126     for (i = 0; i < ctx->benign_n; i++) {
2127       PetscScalar     sum, val;
2128       const PetscInt *idxs;
2129       PetscInt        nz, j;
2130       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2131       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2132       val = -ay[idxs[nz - 1]];
2133       if (ctx->apply_p0) {
2134         sum = 0.;
2135         for (j = 0; j < nz - 1; j++) {
2136           sum += ay[idxs[j]];
2137           ay[idxs[j]] += val;
2138         }
2139         ay[idxs[nz - 1]] += sum;
2140       } else {
2141         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2142         ay[idxs[nz - 1]] = 0.;
2143       }
2144       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2145     }
2146     PetscCall(VecRestoreArray(y, &ay));
2147   }
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2152 {
2153   PetscFunctionBegin;
2154   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2159 {
2160   PetscFunctionBegin;
2161   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2162   PetscFunctionReturn(PETSC_SUCCESS);
2163 }
2164 
2165 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2166 {
2167   PC_IS                  *pcis   = (PC_IS *)pc->data;
2168   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2169   PCBDDCBenignMatMult_ctx ctx;
2170 
2171   PetscFunctionBegin;
2172   if (!restore) {
2173     Mat                A_IB, A_BI;
2174     PetscScalar       *work;
2175     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2176 
2177     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2178     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2179     PetscCall(PetscMalloc1(pcis->n, &work));
2180     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2181     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2182     PetscCall(MatSetType(A_IB, MATSHELL));
2183     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2184     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2185     PetscCall(PetscNew(&ctx));
2186     PetscCall(MatShellSetContext(A_IB, ctx));
2187     ctx->apply_left  = PETSC_TRUE;
2188     ctx->apply_right = PETSC_FALSE;
2189     ctx->apply_p0    = PETSC_FALSE;
2190     ctx->benign_n    = pcbddc->benign_n;
2191     if (reuse) {
2192       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2193       ctx->free                 = PETSC_FALSE;
2194     } else { /* TODO: could be optimized for successive solves */
2195       ISLocalToGlobalMapping N_to_D;
2196       PetscInt               i;
2197 
2198       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2199       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2200       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2201       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2202       ctx->free = PETSC_TRUE;
2203     }
2204     ctx->A    = pcis->A_IB;
2205     ctx->work = work;
2206     PetscCall(MatSetUp(A_IB));
2207     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2208     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2209     pcis->A_IB = A_IB;
2210 
2211     /* A_BI as A_IB^T */
2212     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2213     pcbddc->benign_original_mat = pcis->A_BI;
2214     pcis->A_BI                  = A_BI;
2215   } else {
2216     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2217     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2218     PetscCall(MatDestroy(&pcis->A_IB));
2219     pcis->A_IB = ctx->A;
2220     ctx->A     = NULL;
2221     PetscCall(MatDestroy(&pcis->A_BI));
2222     pcis->A_BI                  = pcbddc->benign_original_mat;
2223     pcbddc->benign_original_mat = NULL;
2224     if (ctx->free) {
2225       PetscInt i;
2226       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2227       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2228     }
2229     PetscCall(PetscFree(ctx->work));
2230     PetscCall(PetscFree(ctx));
2231   }
2232   PetscFunctionReturn(PETSC_SUCCESS);
2233 }
2234 
2235 /* used just in bddc debug mode */
2236 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2237 {
2238   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2239   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2240   Mat      An;
2241 
2242   PetscFunctionBegin;
2243   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2244   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2245   if (is1) {
2246     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2247     PetscCall(MatDestroy(&An));
2248   } else {
2249     *B = An;
2250   }
2251   PetscFunctionReturn(PETSC_SUCCESS);
2252 }
2253 
2254 /* TODO: add reuse flag */
2255 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2256 {
2257   Mat             Bt;
2258   PetscScalar    *a, *bdata;
2259   const PetscInt *ii, *ij;
2260   PetscInt        m, n, i, nnz, *bii, *bij;
2261   PetscBool       flg_row;
2262 
2263   PetscFunctionBegin;
2264   PetscCall(MatGetSize(A, &n, &m));
2265   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2266   PetscCall(MatSeqAIJGetArray(A, &a));
2267   nnz = n;
2268   for (i = 0; i < ii[n]; i++) {
2269     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2270   }
2271   PetscCall(PetscMalloc1(n + 1, &bii));
2272   PetscCall(PetscMalloc1(nnz, &bij));
2273   PetscCall(PetscMalloc1(nnz, &bdata));
2274   nnz    = 0;
2275   bii[0] = 0;
2276   for (i = 0; i < n; i++) {
2277     PetscInt j;
2278     for (j = ii[i]; j < ii[i + 1]; j++) {
2279       PetscScalar entry = a[j];
2280       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2281         bij[nnz]   = ij[j];
2282         bdata[nnz] = entry;
2283         nnz++;
2284       }
2285     }
2286     bii[i + 1] = nnz;
2287   }
2288   PetscCall(MatSeqAIJRestoreArray(A, &a));
2289   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2290   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2291   {
2292     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2293     b->free_a     = PETSC_TRUE;
2294     b->free_ij    = PETSC_TRUE;
2295   }
2296   if (*B == A) PetscCall(MatDestroy(&A));
2297   *B = Bt;
2298   PetscFunctionReturn(PETSC_SUCCESS);
2299 }
2300 
2301 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2302 {
2303   Mat                    B = NULL;
2304   DM                     dm;
2305   IS                     is_dummy, *cc_n;
2306   ISLocalToGlobalMapping l2gmap_dummy;
2307   PCBDDCGraph            graph;
2308   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2309   PetscInt               i, n;
2310   PetscInt              *xadj, *adjncy;
2311   PetscBool              isplex = PETSC_FALSE;
2312 
2313   PetscFunctionBegin;
2314   if (ncc) *ncc = 0;
2315   if (cc) *cc = NULL;
2316   if (primalv) *primalv = NULL;
2317   PetscCall(PCBDDCGraphCreate(&graph));
2318   PetscCall(MatGetDM(pc->pmat, &dm));
2319   if (!dm) PetscCall(PCGetDM(pc, &dm));
2320   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2321   if (filter) isplex = PETSC_FALSE;
2322 
2323   if (isplex) { /* this code has been modified from plexpartition.c */
2324     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2325     PetscInt       *adj = NULL;
2326     IS              cellNumbering;
2327     const PetscInt *cellNum;
2328     PetscBool       useCone, useClosure;
2329     PetscSection    section;
2330     PetscSegBuffer  adjBuffer;
2331     PetscSF         sfPoint;
2332 
2333     PetscCall(DMConvert(dm, DMPLEX, &dm));
2334     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2335     PetscCall(DMGetPointSF(dm, &sfPoint));
2336     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2337     /* Build adjacency graph via a section/segbuffer */
2338     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2339     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2340     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2341     /* Always use FVM adjacency to create partitioner graph */
2342     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2343     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2344     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2345     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2346     for (n = 0, p = pStart; p < pEnd; p++) {
2347       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       adjSize = PETSC_DETERMINE;
2352       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2353       for (a = 0; a < adjSize; ++a) {
2354         const PetscInt point = adj[a];
2355         if (pStart <= point && point < pEnd) {
2356           PetscInt *PETSC_RESTRICT pBuf;
2357           PetscCall(PetscSectionAddDof(section, p, 1));
2358           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2359           *pBuf = point;
2360         }
2361       }
2362       n++;
2363     }
2364     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2365     /* Derive CSR graph from section/segbuffer */
2366     PetscCall(PetscSectionSetUp(section));
2367     PetscCall(PetscSectionGetStorageSize(section, &size));
2368     PetscCall(PetscMalloc1(n + 1, &xadj));
2369     for (idx = 0, p = pStart; p < pEnd; p++) {
2370       if (nroots > 0) {
2371         if (cellNum[p] < 0) continue;
2372       }
2373       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2374     }
2375     xadj[n] = size;
2376     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2377     /* Clean up */
2378     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2379     PetscCall(PetscSectionDestroy(&section));
2380     PetscCall(PetscFree(adj));
2381     graph->xadj   = xadj;
2382     graph->adjncy = adjncy;
2383   } else {
2384     Mat       A;
2385     PetscBool isseqaij, flg_row;
2386 
2387     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2388     if (!A->rmap->N || !A->cmap->N) {
2389       PetscCall(PCBDDCGraphDestroy(&graph));
2390       PetscFunctionReturn(PETSC_SUCCESS);
2391     }
2392     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2393     if (!isseqaij && filter) {
2394       PetscBool isseqdense;
2395 
2396       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2397       if (!isseqdense) {
2398         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2399       } else { /* TODO: rectangular case and LDA */
2400         PetscScalar *array;
2401         PetscReal    chop = 1.e-6;
2402 
2403         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2404         PetscCall(MatDenseGetArray(B, &array));
2405         PetscCall(MatGetSize(B, &n, NULL));
2406         for (i = 0; i < n; i++) {
2407           PetscInt j;
2408           for (j = i + 1; j < n; j++) {
2409             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2410             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2411             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2412           }
2413         }
2414         PetscCall(MatDenseRestoreArray(B, &array));
2415         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2416       }
2417     } else {
2418       PetscCall(PetscObjectReference((PetscObject)A));
2419       B = A;
2420     }
2421     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2422 
2423     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2424     if (filter) {
2425       PetscScalar *data;
2426       PetscInt     j, cum;
2427 
2428       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2429       PetscCall(MatSeqAIJGetArray(B, &data));
2430       cum = 0;
2431       for (i = 0; i < n; i++) {
2432         PetscInt t;
2433 
2434         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2435           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2436           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2437         }
2438         t                = xadj_filtered[i];
2439         xadj_filtered[i] = cum;
2440         cum += t;
2441       }
2442       PetscCall(MatSeqAIJRestoreArray(B, &data));
2443       graph->xadj   = xadj_filtered;
2444       graph->adjncy = adjncy_filtered;
2445     } else {
2446       graph->xadj   = xadj;
2447       graph->adjncy = adjncy;
2448     }
2449   }
2450   /* compute local connected components using PCBDDCGraph */
2451   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2452   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2453   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2454   PetscCall(ISDestroy(&is_dummy));
2455   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2456   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2457   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2458   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2459 
2460   /* partial clean up */
2461   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2462   if (B) {
2463     PetscBool flg_row;
2464     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2465     PetscCall(MatDestroy(&B));
2466   }
2467   if (isplex) {
2468     PetscCall(PetscFree(xadj));
2469     PetscCall(PetscFree(adjncy));
2470   }
2471 
2472   /* get back data */
2473   if (isplex) {
2474     if (ncc) *ncc = graph->ncc;
2475     if (cc || primalv) {
2476       Mat          A;
2477       PetscBT      btv, btvt, btvc;
2478       PetscSection subSection;
2479       PetscInt    *ids, cum, cump, *cids, *pids;
2480       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2481 
2482       PetscCall(DMGetDimension(dm, &dim));
2483       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2484       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2485       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2486       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2487       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2488       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2489       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2490       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2491       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2492       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2493 
2494       /* First see if we find corners for the subdomains, i.e. a vertex
2495          shared by at least dim subdomain boundary faces. This does not
2496          cover all the possible cases with simplices but it is enough
2497          for tensor cells */
2498       if (vStart != fStart && dim <= 3) {
2499         for (PetscInt c = cStart; c < cEnd; c++) {
2500           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2501           const PetscInt *faces;
2502 
2503           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2504           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2505           PetscCall(DMPlexGetCone(dm, c, &faces));
2506           for (PetscInt f = 0; f < nf; f++) {
2507             PetscInt nc, ff;
2508 
2509             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2510             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2511             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2512           }
2513           if (cnt >= mcnt) {
2514             PetscInt size, *closure = NULL;
2515 
2516             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2517             for (PetscInt k = 0; k < 2 * size; k += 2) {
2518               PetscInt v = closure[k];
2519               if (v >= vStart && v < vEnd) {
2520                 PetscInt vsize, *vclosure = NULL;
2521 
2522                 cnt = 0;
2523                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2524                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2525                   PetscInt f = vclosure[vk];
2526                   if (f >= fStart && f < fEnd) {
2527                     PetscInt  nc, ff;
2528                     PetscBool valid = PETSC_FALSE;
2529 
2530                     for (PetscInt fk = 0; fk < nf; fk++)
2531                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2532                     if (!valid) continue;
2533                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2534                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2535                     if (nc == 1 && f == ff) cnt++;
2536                   }
2537                 }
2538                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2539                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2540               }
2541             }
2542             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2543           }
2544           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2545         }
2546       }
2547 
2548       cids[0] = 0;
2549       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2550         PetscInt j;
2551 
2552         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2553         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2554           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2555 
2556           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2557           for (k = 0; k < 2 * size; k += 2) {
2558             PetscInt s, pp, p = closure[k], off, dof, cdof;
2559 
2560             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2561             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2562             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2563             for (s = 0; s < dof - cdof; s++) {
2564               if (PetscBTLookupSet(btvt, off + s)) continue;
2565               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2566               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2567               else pids[cump++] = off + s; /* cross-vertex */
2568             }
2569             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2570             if (pp != p) {
2571               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2572               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2573               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2574               for (s = 0; s < dof - cdof; s++) {
2575                 if (PetscBTLookupSet(btvt, off + s)) continue;
2576                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2577                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2578                 else pids[cump++] = off + s; /* cross-vertex */
2579               }
2580             }
2581           }
2582           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2583         }
2584         cids[i + 1] = cum;
2585         /* mark dofs as already assigned */
2586         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2587       }
2588       if (cc) {
2589         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2590         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2591         *cc = cc_n;
2592       }
2593       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2594       PetscCall(PetscFree3(ids, cids, pids));
2595       PetscCall(PetscBTDestroy(&btv));
2596       PetscCall(PetscBTDestroy(&btvt));
2597       PetscCall(PetscBTDestroy(&btvc));
2598       PetscCall(DMDestroy(&dm));
2599     }
2600   } else {
2601     if (ncc) *ncc = graph->ncc;
2602     if (cc) {
2603       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2604       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2605       *cc = cc_n;
2606     }
2607   }
2608   /* clean up graph */
2609   graph->xadj   = NULL;
2610   graph->adjncy = NULL;
2611   PetscCall(PCBDDCGraphDestroy(&graph));
2612   PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614 
2615 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2616 {
2617   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2618   PC_IS   *pcis   = (PC_IS *)pc->data;
2619   IS       dirIS  = NULL;
2620   PetscInt i;
2621 
2622   PetscFunctionBegin;
2623   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2624   if (zerodiag) {
2625     Mat             A;
2626     Vec             vec3_N;
2627     PetscScalar    *vals;
2628     const PetscInt *idxs;
2629     PetscInt        nz, *count;
2630 
2631     /* p0 */
2632     PetscCall(VecSet(pcis->vec1_N, 0.));
2633     PetscCall(PetscMalloc1(pcis->n, &vals));
2634     PetscCall(ISGetLocalSize(zerodiag, &nz));
2635     PetscCall(ISGetIndices(zerodiag, &idxs));
2636     for (i = 0; i < nz; i++) vals[i] = 1.;
2637     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2638     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2639     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2640     /* v_I */
2641     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2642     for (i = 0; i < nz; i++) vals[i] = 0.;
2643     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2644     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2645     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2646     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2647     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2648     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2649     if (dirIS) {
2650       PetscInt n;
2651 
2652       PetscCall(ISGetLocalSize(dirIS, &n));
2653       PetscCall(ISGetIndices(dirIS, &idxs));
2654       for (i = 0; i < n; i++) vals[i] = 0.;
2655       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2656       PetscCall(ISRestoreIndices(dirIS, &idxs));
2657     }
2658     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2659     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2660     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2661     PetscCall(VecSet(vec3_N, 0.));
2662     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2663     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2664     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2665     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2666     PetscCall(PetscFree(vals));
2667     PetscCall(VecDestroy(&vec3_N));
2668 
2669     /* there should not be any pressure dofs lying on the interface */
2670     PetscCall(PetscCalloc1(pcis->n, &count));
2671     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2672     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2673     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2674     PetscCall(ISGetIndices(zerodiag, &idxs));
2675     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2676     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2677     PetscCall(PetscFree(count));
2678   }
2679   PetscCall(ISDestroy(&dirIS));
2680 
2681   /* check PCBDDCBenignGetOrSetP0 */
2682   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2683   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2684   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2685   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2686   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2687   for (i = 0; i < pcbddc->benign_n; i++) {
2688     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2689     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2695 {
2696   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2697   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2698   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2699   PetscInt  nz, n, benign_n, bsp = 1;
2700   PetscInt *interior_dofs, n_interior_dofs, nneu;
2701   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2702 
2703   PetscFunctionBegin;
2704   if (reuse) goto project_b0;
2705   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2706   PetscCall(MatDestroy(&pcbddc->benign_B0));
2707   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2708   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2709   has_null_pressures = PETSC_TRUE;
2710   have_null          = PETSC_TRUE;
2711   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2712      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2713      Checks if all the pressure dofs in each subdomain have a zero diagonal
2714      If not, a change of basis on pressures is not needed
2715      since the local Schur complements are already SPD
2716   */
2717   if (pcbddc->n_ISForDofsLocal) {
2718     IS        iP = NULL;
2719     PetscInt  p, *pp;
2720     PetscBool flg, blocked = PETSC_FALSE;
2721 
2722     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2723     n = pcbddc->n_ISForDofsLocal;
2724     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2725     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2726     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2727     PetscOptionsEnd();
2728     if (!flg) {
2729       n     = 1;
2730       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2731     }
2732 
2733     bsp = 0;
2734     for (p = 0; p < n; p++) {
2735       PetscInt bs = 1;
2736 
2737       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2738       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2739       bsp += bs;
2740     }
2741     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2742     bsp = 0;
2743     for (p = 0; p < n; p++) {
2744       const PetscInt *idxs;
2745       PetscInt        b, bs = 1, npl, *bidxs;
2746 
2747       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2748       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2749       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2750       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2751       for (b = 0; b < bs; b++) {
2752         PetscInt i;
2753 
2754         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2755         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2756         bsp++;
2757       }
2758       PetscCall(PetscFree(bidxs));
2759       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2760     }
2761     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2762 
2763     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2764     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2765     if (iP) {
2766       IS newpressures;
2767 
2768       PetscCall(ISDifference(pressures, iP, &newpressures));
2769       PetscCall(ISDestroy(&pressures));
2770       pressures = newpressures;
2771     }
2772     PetscCall(ISSorted(pressures, &sorted));
2773     if (!sorted) PetscCall(ISSort(pressures));
2774     PetscCall(PetscFree(pp));
2775   }
2776 
2777   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2778   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2779   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2780   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2781   PetscCall(ISSorted(zerodiag, &sorted));
2782   if (!sorted) PetscCall(ISSort(zerodiag));
2783   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2784   zerodiag_save = zerodiag;
2785   PetscCall(ISGetLocalSize(zerodiag, &nz));
2786   if (!nz) {
2787     if (n) have_null = PETSC_FALSE;
2788     has_null_pressures = PETSC_FALSE;
2789     PetscCall(ISDestroy(&zerodiag));
2790   }
2791   recompute_zerodiag = PETSC_FALSE;
2792 
2793   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2794   zerodiag_subs   = NULL;
2795   benign_n        = 0;
2796   n_interior_dofs = 0;
2797   interior_dofs   = NULL;
2798   nneu            = 0;
2799   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2800   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2801   if (checkb) { /* need to compute interior nodes */
2802     PetscInt               n, i;
2803     PetscInt              *count;
2804     ISLocalToGlobalMapping mapping;
2805 
2806     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2807     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2808     PetscCall(PetscMalloc1(n, &interior_dofs));
2809     for (i = 0; i < n; i++)
2810       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2811     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2812   }
2813   if (has_null_pressures) {
2814     IS             *subs;
2815     PetscInt        nsubs, i, j, nl;
2816     const PetscInt *idxs;
2817     PetscScalar    *array;
2818     Vec            *work;
2819 
2820     subs  = pcbddc->local_subs;
2821     nsubs = pcbddc->n_local_subs;
2822     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2823     if (checkb) {
2824       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2825       PetscCall(ISGetLocalSize(zerodiag, &nl));
2826       PetscCall(ISGetIndices(zerodiag, &idxs));
2827       /* work[0] = 1_p */
2828       PetscCall(VecSet(work[0], 0.));
2829       PetscCall(VecGetArray(work[0], &array));
2830       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2831       PetscCall(VecRestoreArray(work[0], &array));
2832       /* work[0] = 1_v */
2833       PetscCall(VecSet(work[1], 1.));
2834       PetscCall(VecGetArray(work[1], &array));
2835       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2836       PetscCall(VecRestoreArray(work[1], &array));
2837       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2838     }
2839 
2840     if (nsubs > 1 || bsp > 1) {
2841       IS      *is;
2842       PetscInt b, totb;
2843 
2844       totb  = bsp;
2845       is    = bsp > 1 ? bzerodiag : &zerodiag;
2846       nsubs = PetscMax(nsubs, 1);
2847       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2848       for (b = 0; b < totb; b++) {
2849         for (i = 0; i < nsubs; i++) {
2850           ISLocalToGlobalMapping l2g;
2851           IS                     t_zerodiag_subs;
2852           PetscInt               nl;
2853 
2854           if (subs) {
2855             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2856           } else {
2857             IS tis;
2858 
2859             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2860             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2861             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2862             PetscCall(ISDestroy(&tis));
2863           }
2864           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2865           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2866           if (nl) {
2867             PetscBool valid = PETSC_TRUE;
2868 
2869             if (checkb) {
2870               PetscCall(VecSet(matis->x, 0));
2871               PetscCall(ISGetLocalSize(subs[i], &nl));
2872               PetscCall(ISGetIndices(subs[i], &idxs));
2873               PetscCall(VecGetArray(matis->x, &array));
2874               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2875               PetscCall(VecRestoreArray(matis->x, &array));
2876               PetscCall(ISRestoreIndices(subs[i], &idxs));
2877               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2878               PetscCall(MatMult(matis->A, matis->x, matis->y));
2879               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2880               PetscCall(VecGetArray(matis->y, &array));
2881               for (j = 0; j < n_interior_dofs; j++) {
2882                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2883                   valid = PETSC_FALSE;
2884                   break;
2885                 }
2886               }
2887               PetscCall(VecRestoreArray(matis->y, &array));
2888             }
2889             if (valid && nneu) {
2890               const PetscInt *idxs;
2891               PetscInt        nzb;
2892 
2893               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2894               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2895               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2896               if (nzb) valid = PETSC_FALSE;
2897             }
2898             if (valid && pressures) {
2899               IS       t_pressure_subs, tmp;
2900               PetscInt i1, i2;
2901 
2902               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2903               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2904               PetscCall(ISGetLocalSize(tmp, &i1));
2905               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2906               if (i2 != i1) valid = PETSC_FALSE;
2907               PetscCall(ISDestroy(&t_pressure_subs));
2908               PetscCall(ISDestroy(&tmp));
2909             }
2910             if (valid) {
2911               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2912               benign_n++;
2913             } else recompute_zerodiag = PETSC_TRUE;
2914           }
2915           PetscCall(ISDestroy(&t_zerodiag_subs));
2916           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2917         }
2918       }
2919     } else { /* there's just one subdomain (or zero if they have not been detected */
2920       PetscBool valid = PETSC_TRUE;
2921 
2922       if (nneu) valid = PETSC_FALSE;
2923       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2924       if (valid && checkb) {
2925         PetscCall(MatMult(matis->A, work[0], matis->x));
2926         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2927         PetscCall(VecGetArray(matis->x, &array));
2928         for (j = 0; j < n_interior_dofs; j++) {
2929           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2930             valid = PETSC_FALSE;
2931             break;
2932           }
2933         }
2934         PetscCall(VecRestoreArray(matis->x, &array));
2935       }
2936       if (valid) {
2937         benign_n = 1;
2938         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2939         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2940         zerodiag_subs[0] = zerodiag;
2941       }
2942     }
2943     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2944   }
2945   PetscCall(PetscFree(interior_dofs));
2946 
2947   if (!benign_n) {
2948     PetscInt n;
2949 
2950     PetscCall(ISDestroy(&zerodiag));
2951     recompute_zerodiag = PETSC_FALSE;
2952     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2953     if (n) have_null = PETSC_FALSE;
2954   }
2955 
2956   /* final check for null pressures */
2957   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2958 
2959   if (recompute_zerodiag) {
2960     PetscCall(ISDestroy(&zerodiag));
2961     if (benign_n == 1) {
2962       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2963       zerodiag = zerodiag_subs[0];
2964     } else {
2965       PetscInt i, nzn, *new_idxs;
2966 
2967       nzn = 0;
2968       for (i = 0; i < benign_n; i++) {
2969         PetscInt ns;
2970         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2971         nzn += ns;
2972       }
2973       PetscCall(PetscMalloc1(nzn, &new_idxs));
2974       nzn = 0;
2975       for (i = 0; i < benign_n; i++) {
2976         PetscInt ns, *idxs;
2977         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2978         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2979         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2980         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2981         nzn += ns;
2982       }
2983       PetscCall(PetscSortInt(nzn, new_idxs));
2984       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2985     }
2986     have_null = PETSC_FALSE;
2987   }
2988 
2989   /* determines if the coarse solver will be singular or not */
2990   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2991 
2992   /* Prepare matrix to compute no-net-flux */
2993   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2994     Mat                    A, loc_divudotp;
2995     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2996     IS                     row, col, isused = NULL;
2997     PetscInt               M, N, n, st, n_isused;
2998 
2999     if (pressures) {
3000       isused = pressures;
3001     } else {
3002       isused = zerodiag_save;
3003     }
3004     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
3005     PetscCall(MatISGetLocalMat(pc->pmat, &A));
3006     PetscCall(MatGetLocalSize(A, &n, NULL));
3007     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
3008     n_isused = 0;
3009     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
3010     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
3011     st = st - n_isused;
3012     if (n) {
3013       const PetscInt *gidxs;
3014 
3015       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
3016       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
3017       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
3018       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3019       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
3020       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
3021     } else {
3022       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3023       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3024       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3025     }
3026     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3027     PetscCall(ISGetSize(row, &M));
3028     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3029     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3030     PetscCall(ISDestroy(&row));
3031     PetscCall(ISDestroy(&col));
3032     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3033     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3034     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3035     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3036     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3037     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3038     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3039     PetscCall(MatDestroy(&loc_divudotp));
3040     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3041     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3042   }
3043   PetscCall(ISDestroy(&zerodiag_save));
3044   PetscCall(ISDestroy(&pressures));
3045   if (bzerodiag) {
3046     PetscInt i;
3047 
3048     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3049     PetscCall(PetscFree(bzerodiag));
3050   }
3051   pcbddc->benign_n             = benign_n;
3052   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3053 
3054   /* determines if the problem has subdomains with 0 pressure block */
3055   have_null = (PetscBool)(!!pcbddc->benign_n);
3056   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3057 
3058 project_b0:
3059   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3060   /* change of basis and p0 dofs */
3061   if (pcbddc->benign_n) {
3062     PetscInt i, s, *nnz;
3063 
3064     /* local change of basis for pressures */
3065     PetscCall(MatDestroy(&pcbddc->benign_change));
3066     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3067     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3068     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3069     PetscCall(PetscMalloc1(n, &nnz));
3070     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3071     for (i = 0; i < pcbddc->benign_n; i++) {
3072       const PetscInt *idxs;
3073       PetscInt        nzs, j;
3074 
3075       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3076       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3077       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3078       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3079       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3080     }
3081     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3082     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3083     PetscCall(PetscFree(nnz));
3084     /* set identity by default */
3085     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3086     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3087     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3088     /* set change on pressures */
3089     for (s = 0; s < pcbddc->benign_n; s++) {
3090       PetscScalar    *array;
3091       const PetscInt *idxs;
3092       PetscInt        nzs;
3093 
3094       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3095       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3096       for (i = 0; i < nzs - 1; i++) {
3097         PetscScalar vals[2];
3098         PetscInt    cols[2];
3099 
3100         cols[0] = idxs[i];
3101         cols[1] = idxs[nzs - 1];
3102         vals[0] = 1.;
3103         vals[1] = 1.;
3104         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3105       }
3106       PetscCall(PetscMalloc1(nzs, &array));
3107       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3108       array[nzs - 1] = 1.;
3109       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3110       /* store local idxs for p0 */
3111       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3112       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3113       PetscCall(PetscFree(array));
3114     }
3115     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3116     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3117 
3118     /* project if needed */
3119     if (pcbddc->benign_change_explicit) {
3120       Mat M;
3121 
3122       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3123       PetscCall(MatDestroy(&pcbddc->local_mat));
3124       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3125       PetscCall(MatDestroy(&M));
3126     }
3127     /* store global idxs for p0 */
3128     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3129   }
3130   *zerodiaglocal = zerodiag;
3131   PetscFunctionReturn(PETSC_SUCCESS);
3132 }
3133 
3134 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3135 {
3136   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3137   PetscScalar *array;
3138 
3139   PetscFunctionBegin;
3140   if (!pcbddc->benign_sf) {
3141     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3142     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3143   }
3144   if (get) {
3145     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3146     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3147     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3148     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3149   } else {
3150     PetscCall(VecGetArray(v, &array));
3151     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3152     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3153     PetscCall(VecRestoreArray(v, &array));
3154   }
3155   PetscFunctionReturn(PETSC_SUCCESS);
3156 }
3157 
3158 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3159 {
3160   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3161 
3162   PetscFunctionBegin;
3163   /* TODO: add error checking
3164     - avoid nested pop (or push) calls.
3165     - cannot push before pop.
3166     - cannot call this if pcbddc->local_mat is NULL
3167   */
3168   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3169   if (pop) {
3170     if (pcbddc->benign_change_explicit) {
3171       IS       is_p0;
3172       MatReuse reuse;
3173 
3174       /* extract B_0 */
3175       reuse = MAT_INITIAL_MATRIX;
3176       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3177       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3178       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3179       /* remove rows and cols from local problem */
3180       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3181       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3182       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3183       PetscCall(ISDestroy(&is_p0));
3184     } else {
3185       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3186       PetscScalar *vals;
3187       PetscInt     i, n, *idxs_ins;
3188 
3189       PetscCall(VecGetLocalSize(matis->y, &n));
3190       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3191       if (!pcbddc->benign_B0) {
3192         PetscInt *nnz;
3193         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3194         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3195         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3196         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3197         for (i = 0; i < pcbddc->benign_n; i++) {
3198           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3199           nnz[i] = n - nnz[i];
3200         }
3201         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3202         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3203         PetscCall(PetscFree(nnz));
3204       }
3205 
3206       for (i = 0; i < pcbddc->benign_n; i++) {
3207         PetscScalar *array;
3208         PetscInt    *idxs, j, nz, cum;
3209 
3210         PetscCall(VecSet(matis->x, 0.));
3211         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3212         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3213         for (j = 0; j < nz; j++) vals[j] = 1.;
3214         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3215         PetscCall(VecAssemblyBegin(matis->x));
3216         PetscCall(VecAssemblyEnd(matis->x));
3217         PetscCall(VecSet(matis->y, 0.));
3218         PetscCall(MatMult(matis->A, matis->x, matis->y));
3219         PetscCall(VecGetArray(matis->y, &array));
3220         cum = 0;
3221         for (j = 0; j < n; j++) {
3222           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3223             vals[cum]     = array[j];
3224             idxs_ins[cum] = j;
3225             cum++;
3226           }
3227         }
3228         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3229         PetscCall(VecRestoreArray(matis->y, &array));
3230         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3231       }
3232       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3233       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3234       PetscCall(PetscFree2(idxs_ins, vals));
3235     }
3236   } else { /* push */
3237 
3238     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3239     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3240       PetscScalar *B0_vals;
3241       PetscInt    *B0_cols, B0_ncol;
3242 
3243       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3244       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3245       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3246       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3247       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3248     }
3249     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3250     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3251   }
3252   PetscFunctionReturn(PETSC_SUCCESS);
3253 }
3254 
3255 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3256 {
3257   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3258   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3259   PetscBLASInt    B_neigs, B_ierr, B_lwork;
3260   PetscBLASInt   *B_iwork, *B_ifail;
3261   PetscScalar    *work, lwork;
3262   PetscScalar    *St, *S, *eigv;
3263   PetscScalar    *Sarray, *Starray;
3264   PetscReal      *eigs, thresh, lthresh, uthresh;
3265   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3266   PetscBool       allocated_S_St, upart;
3267 #if defined(PETSC_USE_COMPLEX)
3268   PetscReal *rwork;
3269 #endif
3270 
3271   PetscFunctionBegin;
3272   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3273   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3274   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3275   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3276              sub_schurs->is_posdef);
3277   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3278 
3279   if (pcbddc->dbg_flag) {
3280     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3281     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3282     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3283     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3284     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3285   }
3286 
3287   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3288 
3289   /* max size of subsets */
3290   mss = 0;
3291   for (i = 0; i < sub_schurs->n_subs; i++) {
3292     PetscInt subset_size;
3293 
3294     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3295     mss = PetscMax(mss, subset_size);
3296   }
3297 
3298   /* min/max and threshold */
3299   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3300   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3301   nmax           = PetscMax(nmin, nmax);
3302   allocated_S_St = PETSC_FALSE;
3303   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3304     allocated_S_St = PETSC_TRUE;
3305   }
3306 
3307   /* allocate lapack workspace */
3308   cum = cum2 = 0;
3309   maxneigs   = 0;
3310   for (i = 0; i < sub_schurs->n_subs; i++) {
3311     PetscInt n, subset_size;
3312 
3313     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3314     n = PetscMin(subset_size, nmax);
3315     cum += subset_size;
3316     cum2 += subset_size * n;
3317     maxneigs = PetscMax(maxneigs, n);
3318   }
3319   lwork = 0;
3320   if (mss) {
3321     PetscScalar  sdummy  = 0.;
3322     PetscBLASInt B_itype = 1;
3323     PetscBLASInt B_N, idummy = 0;
3324     PetscReal    rdummy = 0., zero = 0.0;
3325     PetscReal    eps = 0.0; /* dlamch? */
3326 
3327     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3328     PetscCall(PetscBLASIntCast(mss, &B_N));
3329     B_lwork = -1;
3330     /* some implementations may complain about NULL pointers, even if we are querying */
3331     S       = &sdummy;
3332     St      = &sdummy;
3333     eigs    = &rdummy;
3334     eigv    = &sdummy;
3335     B_iwork = &idummy;
3336     B_ifail = &idummy;
3337 #if defined(PETSC_USE_COMPLEX)
3338     rwork = &rdummy;
3339 #endif
3340     thresh = 1.0;
3341     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3342 #if defined(PETSC_USE_COMPLEX)
3343     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3344 #else
3345     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3346 #endif
3347     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3348     PetscCall(PetscFPTrapPop());
3349   }
3350 
3351   nv = 0;
3352   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3353     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3354   }
3355   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3356   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3357   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3358 #if defined(PETSC_USE_COMPLEX)
3359   PetscCall(PetscMalloc1(7 * mss, &rwork));
3360 #endif
3361   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3362                          &pcbddc->adaptive_constraints_data));
3363   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3364 
3365   maxneigs = 0;
3366   cum = cumarray                           = 0;
3367   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3368   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3369   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3370     const PetscInt *idxs;
3371 
3372     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3373     for (cum = 0; cum < nv; cum++) {
3374       pcbddc->adaptive_constraints_n[cum]            = 1;
3375       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3376       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3377       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3378       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3379     }
3380     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3381   }
3382 
3383   if (mss) { /* multilevel */
3384     if (sub_schurs->gdsw) {
3385       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3386       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3387     } else {
3388       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3389       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3390     }
3391   }
3392 
3393   lthresh = pcbddc->adaptive_threshold[0];
3394   uthresh = pcbddc->adaptive_threshold[1];
3395   upart   = pcbddc->use_deluxe_scaling;
3396   for (i = 0; i < sub_schurs->n_subs; i++) {
3397     const PetscInt *idxs;
3398     PetscReal       upper, lower;
3399     PetscInt        j, subset_size, eigs_start = 0;
3400     PetscBLASInt    B_N;
3401     PetscBool       same_data = PETSC_FALSE;
3402     PetscBool       scal      = PETSC_FALSE;
3403 
3404     if (upart) {
3405       upper = PETSC_MAX_REAL;
3406       lower = uthresh;
3407     } else {
3408       if (sub_schurs->gdsw) {
3409         upper = uthresh;
3410         lower = PETSC_MIN_REAL;
3411       } else {
3412         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3413         upper = 1. / uthresh;
3414         lower = 0.;
3415       }
3416     }
3417     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3418     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3419     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3420     /* this is experimental: we assume the dofs have been properly grouped to have
3421        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3422     if (!sub_schurs->is_posdef) {
3423       Mat T;
3424 
3425       for (j = 0; j < subset_size; j++) {
3426         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3427           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3428           PetscCall(MatScale(T, -1.0));
3429           PetscCall(MatDestroy(&T));
3430           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3431           PetscCall(MatScale(T, -1.0));
3432           PetscCall(MatDestroy(&T));
3433           if (sub_schurs->change_primal_sub) {
3434             PetscInt        nz, k;
3435             const PetscInt *idxs;
3436 
3437             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3438             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3439             for (k = 0; k < nz; k++) {
3440               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3441               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3442             }
3443             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3444           }
3445           scal = PETSC_TRUE;
3446           break;
3447         }
3448       }
3449     }
3450 
3451     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3452       if (sub_schurs->is_symmetric) {
3453         PetscInt j, k;
3454         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3455           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3456           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3457         }
3458         for (j = 0; j < subset_size; j++) {
3459           for (k = j; k < subset_size; k++) {
3460             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3461             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3462           }
3463         }
3464       } else {
3465         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3466         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3467       }
3468     } else {
3469       S  = Sarray + cumarray;
3470       St = Starray + cumarray;
3471     }
3472     /* see if we can save some work */
3473     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3474 
3475     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3476       B_neigs = 0;
3477     } else {
3478       PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3479       PetscReal    eps = -1.0; /* dlamch? */
3480       PetscInt     nmin_s;
3481       PetscBool    compute_range;
3482 
3483       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3484       B_neigs       = 0;
3485       compute_range = (PetscBool)!same_data;
3486       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3487 
3488       if (pcbddc->dbg_flag) {
3489         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3490 
3491         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3492         PetscCall(
3493           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3494       }
3495 
3496       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3497       if (compute_range) {
3498         /* ask for eigenvalues larger than thresh */
3499         if (sub_schurs->is_posdef) {
3500 #if defined(PETSC_USE_COMPLEX)
3501           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3502 #else
3503           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3504 #endif
3505           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3506         } else { /* no theory so far, but it works nicely */
3507           PetscInt  recipe = 0, recipe_m = 1;
3508           PetscReal bb[2];
3509 
3510           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3511           switch (recipe) {
3512           case 0:
3513             if (scal) {
3514               bb[0] = PETSC_MIN_REAL;
3515               bb[1] = lthresh;
3516             } else {
3517               bb[0] = uthresh;
3518               bb[1] = PETSC_MAX_REAL;
3519             }
3520 #if defined(PETSC_USE_COMPLEX)
3521             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3522 #else
3523             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3524 #endif
3525             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3526             break;
3527           case 1:
3528             bb[0] = PETSC_MIN_REAL;
3529             bb[1] = lthresh * lthresh;
3530 #if defined(PETSC_USE_COMPLEX)
3531             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3532 #else
3533             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3534 #endif
3535             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3536             if (!scal) {
3537               PetscBLASInt B_neigs2 = 0;
3538 
3539               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3540               bb[1] = PETSC_MAX_REAL;
3541               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3542               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3543 #if defined(PETSC_USE_COMPLEX)
3544               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3545 #else
3546               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3547 #endif
3548               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3549               B_neigs += B_neigs2;
3550             }
3551             break;
3552           case 2:
3553             if (scal) {
3554               bb[0] = PETSC_MIN_REAL;
3555               bb[1] = 0;
3556 #if defined(PETSC_USE_COMPLEX)
3557               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3558 #else
3559               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3560 #endif
3561               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3562             } else {
3563               PetscBLASInt B_neigs2 = 0;
3564               PetscBool    do_copy  = PETSC_FALSE;
3565 
3566               lthresh = PetscMax(lthresh, 0.0);
3567               if (lthresh > 0.0) {
3568                 bb[0] = PETSC_MIN_REAL;
3569                 bb[1] = lthresh * lthresh;
3570 
3571                 do_copy = PETSC_TRUE;
3572 #if defined(PETSC_USE_COMPLEX)
3573                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3574 #else
3575                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3576 #endif
3577                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3578               }
3579               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3580               bb[1] = PETSC_MAX_REAL;
3581               if (do_copy) {
3582                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3583                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3584               }
3585 #if defined(PETSC_USE_COMPLEX)
3586               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3587 #else
3588               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3589 #endif
3590               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3591               B_neigs += B_neigs2;
3592             }
3593             break;
3594           case 3:
3595             if (scal) {
3596               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3597             } else {
3598               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3599             }
3600             if (!scal) {
3601               bb[0] = uthresh;
3602               bb[1] = PETSC_MAX_REAL;
3603 #if defined(PETSC_USE_COMPLEX)
3604               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3605 #else
3606               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3607 #endif
3608               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3609             }
3610             if (recipe_m > 0 && B_N - B_neigs > 0) {
3611               PetscBLASInt B_neigs2 = 0;
3612 
3613               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3614               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3615               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3616 #if defined(PETSC_USE_COMPLEX)
3617               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3618 #else
3619               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3620 #endif
3621               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3622               B_neigs += B_neigs2;
3623             }
3624             break;
3625           case 4:
3626             bb[0] = PETSC_MIN_REAL;
3627             bb[1] = lthresh;
3628 #if defined(PETSC_USE_COMPLEX)
3629             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3630 #else
3631             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3632 #endif
3633             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3634             {
3635               PetscBLASInt B_neigs2 = 0;
3636 
3637               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3638               bb[1] = PETSC_MAX_REAL;
3639               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3640               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3641 #if defined(PETSC_USE_COMPLEX)
3642               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3643 #else
3644               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3645 #endif
3646               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3647               B_neigs += B_neigs2;
3648             }
3649             break;
3650           case 5: /* same as before: first compute all eigenvalues, then filter */
3651 #if defined(PETSC_USE_COMPLEX)
3652             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3653 #else
3654             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3655 #endif
3656             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3657             {
3658               PetscInt e, k, ne;
3659               for (e = 0, ne = 0; e < B_neigs; e++) {
3660                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3661                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3662                   eigs[ne] = eigs[e];
3663                   ne++;
3664                 }
3665               }
3666               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3667               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3668             }
3669             break;
3670           default:
3671             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3672           }
3673         }
3674       } else if (!same_data) { /* this is just to see all the eigenvalues */
3675         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3676 #if defined(PETSC_USE_COMPLEX)
3677         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3678 #else
3679         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3680 #endif
3681         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3682       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3683         PetscInt k;
3684         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3685         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3686         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3687         nmin = nmax;
3688         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3689         for (k = 0; k < nmax; k++) {
3690           eigs[k]                     = 1. / PETSC_SMALL;
3691           eigv[k * (subset_size + 1)] = 1.0;
3692         }
3693       }
3694       PetscCall(PetscFPTrapPop());
3695       if (B_ierr) {
3696         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3697         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3698         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3699       }
3700 
3701       if (B_neigs > nmax) {
3702         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3703         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3704         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3705       }
3706 
3707       nmin_s = PetscMin(nmin, B_N);
3708       if (B_neigs < nmin_s) {
3709         PetscBLASInt B_neigs2 = 0;
3710 
3711         if (upart) {
3712           if (scal) {
3713             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3714             B_IL = B_neigs + 1;
3715           } else {
3716             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3717             B_IU = B_N - B_neigs;
3718           }
3719         } else {
3720           B_IL = B_neigs + 1;
3721           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3722         }
3723         if (pcbddc->dbg_flag) {
3724           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3725         }
3726         if (sub_schurs->is_symmetric) {
3727           PetscInt j, k;
3728           for (j = 0; j < subset_size; j++) {
3729             for (k = j; k < subset_size; k++) {
3730               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3731               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3732             }
3733           }
3734         } else {
3735           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3736           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3737         }
3738         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3739 #if defined(PETSC_USE_COMPLEX)
3740         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3741 #else
3742         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3743 #endif
3744         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3745         PetscCall(PetscFPTrapPop());
3746         B_neigs += B_neigs2;
3747       }
3748       if (B_ierr) {
3749         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3750         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3751         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3752       }
3753       if (pcbddc->dbg_flag) {
3754         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3755         for (j = 0; j < B_neigs; j++) {
3756           if (!sub_schurs->gdsw) {
3757             if (eigs[j] == 0.0) {
3758               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3759             } else {
3760               if (upart) {
3761                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3762               } else {
3763                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3764               }
3765             }
3766           } else {
3767             double pg = (double)eigs[j + eigs_start];
3768             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3769             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3770           }
3771         }
3772       }
3773     }
3774     /* change the basis back to the original one */
3775     if (sub_schurs->change) {
3776       Mat change, phi, phit;
3777 
3778       if (pcbddc->dbg_flag > 2) {
3779         PetscInt ii;
3780         for (ii = 0; ii < B_neigs; ii++) {
3781           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3782           for (j = 0; j < B_N; j++) {
3783 #if defined(PETSC_USE_COMPLEX)
3784             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3785             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3786             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3787 #else
3788             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3789 #endif
3790           }
3791         }
3792       }
3793       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3794       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3795       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3796       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3797       PetscCall(MatDestroy(&phit));
3798       PetscCall(MatDestroy(&phi));
3799     }
3800     maxneigs                               = PetscMax(B_neigs, maxneigs);
3801     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3802     if (B_neigs) {
3803       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3804 
3805       if (pcbddc->dbg_flag > 1) {
3806         PetscInt ii;
3807         for (ii = 0; ii < B_neigs; ii++) {
3808           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3809           for (j = 0; j < B_N; j++) {
3810 #if defined(PETSC_USE_COMPLEX)
3811             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3812             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3813             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3814 #else
3815             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3816 #endif
3817           }
3818         }
3819       }
3820       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3821       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3822       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3823       cum++;
3824     }
3825     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3826     /* shift for next computation */
3827     cumarray += subset_size * subset_size;
3828   }
3829   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3830 
3831   if (mss) {
3832     if (sub_schurs->gdsw) {
3833       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3834       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3835     } else {
3836       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3837       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3838       /* destroy matrices (junk) */
3839       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3840       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3841     }
3842   }
3843   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3844   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3845 #if defined(PETSC_USE_COMPLEX)
3846   PetscCall(PetscFree(rwork));
3847 #endif
3848   if (pcbddc->dbg_flag) {
3849     PetscInt maxneigs_r;
3850     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3851     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3852   }
3853   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3854   PetscFunctionReturn(PETSC_SUCCESS);
3855 }
3856 
3857 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3858 {
3859   Mat coarse_submat;
3860 
3861   PetscFunctionBegin;
3862   /* Setup local scatters R_to_B and (optionally) R_to_D */
3863   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3864   PetscCall(PCBDDCSetUpLocalScatters(pc));
3865 
3866   /* Setup local neumann solver ksp_R */
3867   /* PCBDDCSetUpLocalScatters should be called first! */
3868   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3869 
3870   /*
3871      Setup local correction and local part of coarse basis.
3872      Gives back the dense local part of the coarse matrix in column major ordering
3873   */
3874   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3875 
3876   /* Compute total number of coarse nodes and setup coarse solver */
3877   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3878   PetscCall(MatDestroy(&coarse_submat));
3879   PetscFunctionReturn(PETSC_SUCCESS);
3880 }
3881 
3882 PetscErrorCode PCBDDCResetCustomization(PC pc)
3883 {
3884   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3885 
3886   PetscFunctionBegin;
3887   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3888   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3889   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3890   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3891   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3892   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3893   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3894   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3895   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3896   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3897   PetscFunctionReturn(PETSC_SUCCESS);
3898 }
3899 
3900 PetscErrorCode PCBDDCResetTopography(PC pc)
3901 {
3902   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3903   PetscInt i;
3904 
3905   PetscFunctionBegin;
3906   PetscCall(MatDestroy(&pcbddc->nedcG));
3907   PetscCall(ISDestroy(&pcbddc->nedclocal));
3908   PetscCall(MatDestroy(&pcbddc->discretegradient));
3909   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3910   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3911   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3912   PetscCall(VecDestroy(&pcbddc->work_change));
3913   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3914   PetscCall(MatDestroy(&pcbddc->divudotp));
3915   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3916   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3917   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3918   pcbddc->n_local_subs = 0;
3919   PetscCall(PetscFree(pcbddc->local_subs));
3920   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3921   pcbddc->graphanalyzed        = PETSC_FALSE;
3922   pcbddc->recompute_topography = PETSC_TRUE;
3923   pcbddc->corner_selected      = PETSC_FALSE;
3924   PetscFunctionReturn(PETSC_SUCCESS);
3925 }
3926 
3927 PetscErrorCode PCBDDCResetSolvers(PC pc)
3928 {
3929   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3930 
3931   PetscFunctionBegin;
3932   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3933   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3934   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3935   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3936   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3937   PetscCall(VecDestroy(&pcbddc->vec1_P));
3938   PetscCall(VecDestroy(&pcbddc->vec1_C));
3939   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3940   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3941   PetscCall(VecDestroy(&pcbddc->vec1_R));
3942   PetscCall(VecDestroy(&pcbddc->vec2_R));
3943   PetscCall(ISDestroy(&pcbddc->is_R_local));
3944   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3945   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3946   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3947   PetscCall(KSPReset(pcbddc->ksp_D));
3948   PetscCall(KSPReset(pcbddc->ksp_R));
3949   PetscCall(KSPReset(pcbddc->coarse_ksp));
3950   PetscCall(MatDestroy(&pcbddc->local_mat));
3951   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3952   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3953   PetscCall(PetscFree(pcbddc->global_primal_indices));
3954   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3955   PetscCall(MatDestroy(&pcbddc->benign_change));
3956   PetscCall(VecDestroy(&pcbddc->benign_vec));
3957   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3958   PetscCall(MatDestroy(&pcbddc->benign_B0));
3959   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3960   if (pcbddc->benign_zerodiag_subs) {
3961     PetscInt i;
3962     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3963     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3964   }
3965   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3966   PetscFunctionReturn(PETSC_SUCCESS);
3967 }
3968 
3969 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3970 {
3971   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3972   PC_IS   *pcis   = (PC_IS *)pc->data;
3973   VecType  impVecType;
3974   PetscInt n_constraints, n_R, old_size;
3975 
3976   PetscFunctionBegin;
3977   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3978   n_R           = pcis->n - pcbddc->n_vertices;
3979   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3980   /* local work vectors (try to avoid unneeded work)*/
3981   /* R nodes */
3982   old_size = -1;
3983   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3984   if (n_R != old_size) {
3985     PetscCall(VecDestroy(&pcbddc->vec1_R));
3986     PetscCall(VecDestroy(&pcbddc->vec2_R));
3987     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3988     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3989     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3990     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3991   }
3992   /* local primal dofs */
3993   old_size = -1;
3994   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3995   if (pcbddc->local_primal_size != old_size) {
3996     PetscCall(VecDestroy(&pcbddc->vec1_P));
3997     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3998     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3999     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
4000   }
4001   /* local explicit constraints */
4002   old_size = -1;
4003   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
4004   if (n_constraints && n_constraints != old_size) {
4005     PetscCall(VecDestroy(&pcbddc->vec1_C));
4006     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
4007     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
4008     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
4009   }
4010   PetscFunctionReturn(PETSC_SUCCESS);
4011 }
4012 
4013 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
4014 {
4015   PetscBool          flg;
4016   const PetscScalar *a;
4017 
4018   PetscFunctionBegin;
4019   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4020   if (flg) {
4021     PetscCall(MatDenseGetArrayRead(S, &a));
4022     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4023     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4024     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4025     PetscCall(MatDenseRestoreArrayRead(S, &a));
4026   } else {
4027     const PetscInt *ii, *jj;
4028     PetscInt        n;
4029     PetscInt        buf[8192], *bufc = NULL;
4030     PetscBool       freeb = PETSC_FALSE;
4031     Mat             Sm    = S;
4032 
4033     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4034     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4035     else PetscCall(PetscObjectReference((PetscObject)S));
4036     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4037     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4038     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4039     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4040       bufc = buf;
4041     } else {
4042       PetscCall(PetscMalloc1(nc, &bufc));
4043       freeb = PETSC_TRUE;
4044     }
4045 
4046     for (PetscInt i = 0; i < n; i++) {
4047       const PetscInt nci = ii[i + 1] - ii[i];
4048 
4049       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4050       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4051     }
4052     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4053     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4054     PetscCall(MatDestroy(&Sm));
4055     if (freeb) PetscCall(PetscFree(bufc));
4056   }
4057   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4058   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4059   PetscFunctionReturn(PETSC_SUCCESS);
4060 }
4061 
4062 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4063 {
4064   Mat_SeqAIJ        *aij;
4065   PetscInt          *ii, *jj;
4066   PetscScalar       *aa;
4067   PetscInt           nnz = 0, m, nc;
4068   const PetscScalar *a;
4069   const PetscScalar  zero = 0.0;
4070 
4071   PetscFunctionBegin;
4072   PetscCall(MatGetLocalSize(D, &m, &nc));
4073   PetscCall(MatDenseGetArrayRead(D, &a));
4074   PetscCall(PetscMalloc1(m + 1, &ii));
4075   PetscCall(PetscMalloc1(m * nc, &jj));
4076   PetscCall(PetscMalloc1(m * nc, &aa));
4077   ii[0] = 0;
4078   for (PetscInt k = 0; k < m; k++) {
4079     for (PetscInt s = 0; s < nc; s++) {
4080       const PetscInt    c = s + k * nc;
4081       const PetscScalar v = a[k + s * m];
4082 
4083       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4084       jj[nnz] = j[c];
4085       aa[nnz] = a[k + s * m];
4086       nnz++;
4087     }
4088     ii[k + 1] = nnz;
4089   }
4090 
4091   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4092   PetscCall(MatDenseRestoreArrayRead(D, &a));
4093 
4094   aij          = (Mat_SeqAIJ *)(*mat)->data;
4095   aij->free_a  = PETSC_TRUE;
4096   aij->free_ij = PETSC_TRUE;
4097   PetscFunctionReturn(PETSC_SUCCESS);
4098 }
4099 
4100 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4101 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4102 {
4103   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4104   const PetscBool allowzeropivot    = PETSC_FALSE;
4105   PetscBool       zeropivotdetected = PETSC_FALSE;
4106   const PetscReal shift             = 0.0;
4107   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4108   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4109   PetscLogDouble  flops = 0.0;
4110 
4111   PetscFunctionBegin;
4112   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4113   for (PetscInt i = 0; i < nblocks; i++) {
4114     ncnt += bsizes[i];
4115     ncnt2 += PetscSqr(bsizes[i]);
4116   }
4117   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4118   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4119   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4120 
4121   PetscCall(PetscMalloc1(n + 1, &ii));
4122   PetscCall(PetscMalloc1(ncnt2, &jj));
4123   PetscCall(PetscCalloc1(ncnt2, &aa));
4124 
4125   ncnt  = 0;
4126   ii[0] = 0;
4127   indi  = ii;
4128   indj  = jj;
4129   diag  = aa;
4130   for (PetscInt i = 0; i < nblocks; i++) {
4131     const PetscInt bs = bsizes[i];
4132 
4133     for (PetscInt k = 0; k < bs; k++) {
4134       indi[k + 1] = indi[k] + bs;
4135       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4136     }
4137     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4138     switch (bs) {
4139     case 1:
4140       *diag = 1.0 / (*diag);
4141       break;
4142     case 2:
4143       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4144       break;
4145     case 3:
4146       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4147       break;
4148     case 4:
4149       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4150       break;
4151     case 5:
4152       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4153       break;
4154     case 6:
4155       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4156       break;
4157     case 7:
4158       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4159       break;
4160     default:
4161       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4162     }
4163     ncnt += bs;
4164     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4165     diag += bs * bs;
4166     indj += bs * bs;
4167     indi += bs;
4168   }
4169   PetscCall(PetscLogFlops(flops));
4170   PetscCall(PetscFree2(v_work, v_pivots));
4171   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4172   {
4173     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4174     aij->free_a     = PETSC_TRUE;
4175     aij->free_ij    = PETSC_TRUE;
4176   }
4177   PetscFunctionReturn(PETSC_SUCCESS);
4178 }
4179 
4180 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4181 {
4182   const PetscScalar *rarr;
4183   PetscScalar       *larr;
4184   PetscSF            vsf;
4185   PetscInt           n, rld, lld;
4186 
4187   PetscFunctionBegin;
4188   PetscCall(MatGetSize(A, NULL, &n));
4189   PetscCall(MatDenseGetLDA(A, &rld));
4190   PetscCall(MatDenseGetLDA(B, &lld));
4191   PetscCall(MatDenseGetArrayRead(A, &rarr));
4192   PetscCall(MatDenseGetArrayWrite(B, &larr));
4193   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4194   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4195   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4196   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4197   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4198   PetscCall(PetscSFDestroy(&vsf));
4199   PetscFunctionReturn(PETSC_SUCCESS);
4200 }
4201 
4202 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4203 {
4204   PC_IS          *pcis       = (PC_IS *)pc->data;
4205   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4206   PCBDDCGraph     graph      = pcbddc->mat_graph;
4207   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4208   /* submatrices of local problem */
4209   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4210   /* submatrices of local coarse problem */
4211   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4212   /* working matrices */
4213   Mat C_CR;
4214 
4215   /* additional working stuff */
4216   PC              pc_R;
4217   IS              is_R, is_V, is_C;
4218   const PetscInt *idx_V, *idx_C;
4219   Mat             F, Brhs = NULL;
4220   Vec             dummy_vec;
4221   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4222   PetscInt       *idx_V_B;
4223   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4224   PetscInt        n_eff_vertices, n_eff_constraints;
4225   PetscInt        i, n_R, n_D, n_B;
4226   PetscScalar     one = 1.0, m_one = -1.0;
4227 
4228   /* Multi-element support */
4229   PetscBool multi_element = graph->multi_element;
4230   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4231   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4232   IS        is_C_perm = NULL;
4233   PetscInt  n_C_bss = 0, *C_bss = NULL;
4234   Mat       coarse_phi_multi;
4235 
4236   PetscFunctionBegin;
4237   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4238   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4239 
4240   /* Set Non-overlapping dimensions */
4241   n_vertices    = pcbddc->n_vertices;
4242   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4243   n_B           = pcis->n_B;
4244   n_D           = pcis->n - n_B;
4245   n_R           = pcis->n - n_vertices;
4246 
4247   /* vertices in boundary numbering */
4248   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4249   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4250   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4251 
4252   /* these two cases still need to be optimized */
4253   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4254 
4255   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4256   if (multi_element) {
4257     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4258 
4259     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4260     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4261     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4262     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4263     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4264 
4265     /* group vertices and constraints by subdomain id */
4266     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4267     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4268     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4269     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4270 
4271     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4272     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4273     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4274     for (PetscInt i = 0; i < n_vertices; i++) {
4275       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4276 
4277       V_to_eff_V[i] = count_eff[s];
4278       count_eff[s] += 1;
4279     }
4280     for (PetscInt i = 0; i < n_constraints; i++) {
4281       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4282 
4283       C_to_eff_C[i] = count_eff[s];
4284       count_eff[s] += 1;
4285     }
4286 
4287     /* preallocation */
4288     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4289     for (PetscInt i = 0; i < n_vertices; i++) {
4290       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4291 
4292       nnz[i] = count_eff[s] + count_eff[s + 1];
4293     }
4294     for (PetscInt i = 0; i < n_constraints; i++) {
4295       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4296 
4297       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4298     }
4299     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4300     PetscCall(PetscFree(nnz));
4301 
4302     n_eff_vertices    = 0;
4303     n_eff_constraints = 0;
4304     for (PetscInt i = 0; i < n_el; i++) {
4305       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4306       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4307       count_eff[2 * i]     = 0;
4308       count_eff[2 * i + 1] = 0;
4309     }
4310 
4311     const PetscInt *idx;
4312     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4313 
4314     for (PetscInt i = 0; i < n_vertices; i++) {
4315       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4316       const PetscInt s = 2 * e;
4317 
4318       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4319       count_eff[s] += 1;
4320     }
4321     for (PetscInt i = 0; i < n_constraints; i++) {
4322       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4323       const PetscInt s = 2 * e + 1;
4324 
4325       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4326       count_eff[s] += 1;
4327     }
4328 
4329     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4330     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4331     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4332     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4333     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4334     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4335     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4336     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4337 
4338     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4339     for (PetscInt i = 0; i < n_R; i++) {
4340       const PetscInt e = graph->nodes[idx[i]].local_sub;
4341       const PetscInt s = 2 * e;
4342       PetscInt       j;
4343 
4344       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4345       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4346     }
4347     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4348     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4349     for (PetscInt i = 0; i < n_B; i++) {
4350       const PetscInt e = graph->nodes[idx[i]].local_sub;
4351       const PetscInt s = 2 * e;
4352       PetscInt       j;
4353 
4354       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4355       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4356     }
4357     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4358 
4359     /* permutation and blocksizes for block invert of S_CC */
4360     PetscInt *idxp;
4361 
4362     PetscCall(PetscMalloc1(n_constraints, &idxp));
4363     PetscCall(PetscMalloc1(n_el, &C_bss));
4364     n_C_bss = 0;
4365     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4366       const PetscInt nc = count_eff[2 * e + 1];
4367 
4368       if (nc) C_bss[n_C_bss++] = nc;
4369       for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4370       cnt += nc;
4371     }
4372 
4373     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4374 
4375     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4376     PetscCall(PetscFree(count_eff));
4377   } else {
4378     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4379     n_eff_constraints = n_constraints;
4380     n_eff_vertices    = n_vertices;
4381   }
4382 
4383   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4384   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4385   PetscCall(PCSetUp(pc_R));
4386   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4387   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4388   lda_rhs                = n_R;
4389   need_benign_correction = PETSC_FALSE;
4390   if (isLU || isCHOL) {
4391     PetscCall(PCFactorGetMatrix(pc_R, &F));
4392   } else if (sub_schurs && sub_schurs->reuse_solver) {
4393     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4394     MatFactorType      type;
4395 
4396     F = reuse_solver->F;
4397     PetscCall(MatGetFactorType(F, &type));
4398     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4399     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4400     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4401     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4402   } else F = NULL;
4403 
4404   /* determine if we can use a sparse right-hand side */
4405   sparserhs = PETSC_FALSE;
4406   if (F && !multi_element) {
4407     MatSolverType solver;
4408 
4409     PetscCall(MatFactorGetSolverType(F, &solver));
4410     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4411   }
4412 
4413   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4414   dummy_vec = NULL;
4415   if (need_benign_correction && lda_rhs != n_R && F) {
4416     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4417     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4418     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4419   }
4420 
4421   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4422   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4423 
4424   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4425   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4426   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4427   PetscCall(ISGetIndices(is_V, &idx_V));
4428   PetscCall(ISGetIndices(is_C, &idx_C));
4429 
4430   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4431   if (n_constraints) {
4432     Mat C_B;
4433 
4434     /* Extract constraints on R nodes: C_{CR}  */
4435     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4436     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4437 
4438     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4439     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4440     if (!sparserhs) {
4441       PetscScalar *marr;
4442 
4443       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4444       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4445       for (i = 0; i < n_constraints; i++) {
4446         const PetscScalar *row_cmat_values;
4447         const PetscInt    *row_cmat_indices;
4448         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4449 
4450         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4451         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4452         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4453       }
4454       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4455     } else {
4456       Mat tC_CR;
4457 
4458       PetscCall(MatScale(C_CR, -1.0));
4459       if (lda_rhs != n_R) {
4460         PetscScalar *aa;
4461         PetscInt     r, *ii, *jj;
4462         PetscBool    done;
4463 
4464         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4465         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4466         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4467         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4468         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4469         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4470       } else {
4471         PetscCall(PetscObjectReference((PetscObject)C_CR));
4472         tC_CR = C_CR;
4473       }
4474       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4475       PetscCall(MatDestroy(&tC_CR));
4476     }
4477     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4478     if (F) {
4479       if (need_benign_correction) {
4480         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4481 
4482         /* rhs is already zero on interior dofs, no need to change the rhs */
4483         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4484       }
4485       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4486       if (need_benign_correction) {
4487         PetscScalar       *marr;
4488         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4489 
4490         /* XXX multi_element? */
4491         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4492         if (lda_rhs != n_R) {
4493           for (i = 0; i < n_eff_constraints; i++) {
4494             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4495             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4496             PetscCall(VecResetArray(dummy_vec));
4497           }
4498         } else {
4499           for (i = 0; i < n_eff_constraints; i++) {
4500             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4501             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4502             PetscCall(VecResetArray(pcbddc->vec1_R));
4503           }
4504         }
4505         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4506       }
4507     } else {
4508       const PetscScalar *barr;
4509       PetscScalar       *marr;
4510 
4511       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4512       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4513       for (i = 0; i < n_eff_constraints; i++) {
4514         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4515         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4516         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4517         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4518         PetscCall(VecResetArray(pcbddc->vec1_R));
4519         PetscCall(VecResetArray(pcbddc->vec2_R));
4520       }
4521       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4522       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4523     }
4524     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4525     PetscCall(MatDestroy(&Brhs));
4526     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4527     if (!pcbddc->switch_static) {
4528       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4529       for (i = 0; i < n_eff_constraints; i++) {
4530         Vec r, b;
4531         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4532         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4533         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4534         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4535         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4536         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4537       }
4538       if (multi_element) {
4539         Mat T;
4540 
4541         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4542         PetscCall(MatDestroy(&local_auxmat2_R));
4543         local_auxmat2_R = T;
4544         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4545         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4546         pcbddc->local_auxmat2 = T;
4547       }
4548       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4549     } else {
4550       if (multi_element) {
4551         Mat T;
4552 
4553         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4554         PetscCall(MatDestroy(&local_auxmat2_R));
4555         local_auxmat2_R = T;
4556       }
4557       if (lda_rhs != n_R) {
4558         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4559       } else {
4560         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4561         pcbddc->local_auxmat2 = local_auxmat2_R;
4562       }
4563       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4564     }
4565     PetscCall(MatScale(S_CC, m_one));
4566     if (multi_element) {
4567       Mat T, T2;
4568       IS  isp, ispi;
4569 
4570       isp = is_C_perm;
4571 
4572       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4573       PetscCall(MatPermute(S_CC, isp, isp, &T));
4574       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4575       PetscCall(MatDestroy(&T));
4576       PetscCall(MatDestroy(&S_CC));
4577       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4578       PetscCall(MatDestroy(&T2));
4579       PetscCall(ISDestroy(&ispi));
4580     } else {
4581       if (isCHOL) {
4582         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4583       } else {
4584         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4585       }
4586       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4587     }
4588     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4589     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4590     PetscCall(MatDestroy(&C_B));
4591     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4592   }
4593 
4594   /* Get submatrices from subdomain matrix */
4595   if (n_vertices) {
4596 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4597     PetscBool oldpin;
4598 #endif
4599     IS is_aux;
4600 
4601     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4602       IS tis;
4603 
4604       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4605       PetscCall(ISSort(tis));
4606       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4607       PetscCall(ISDestroy(&tis));
4608     } else {
4609       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4610     }
4611 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4612     oldpin = pcbddc->local_mat->boundtocpu;
4613 #endif
4614     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4615     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4616     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4617     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4618     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4619     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4620 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4621     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4622 #endif
4623     PetscCall(ISDestroy(&is_aux));
4624   }
4625   PetscCall(ISDestroy(&is_C_perm));
4626   PetscCall(PetscFree(C_bss));
4627 
4628   p0_lidx_I = NULL;
4629   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4630     const PetscInt *idxs;
4631 
4632     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4633     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4634     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4635     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4636   }
4637 
4638   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4639 
4640   /* Matrices of coarse basis functions (local) */
4641   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4642   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4643   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4644   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4645   if (!multi_element) {
4646     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4647     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4648     coarse_phi_multi = NULL;
4649   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4650     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4651     IS is_cols[2] = {is_V, is_C};
4652 
4653     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4654     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4655     PetscCall(ISDestroy(&is_rows[1]));
4656   }
4657 
4658   /* vertices */
4659   if (n_vertices) {
4660     PetscBool restoreavr = PETSC_FALSE;
4661     Mat       A_RRmA_RV  = NULL;
4662 
4663     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4664     PetscCall(MatDestroy(&A_VV));
4665 
4666     if (n_R) {
4667       Mat A_RV_bcorr = NULL, S_VV;
4668 
4669       PetscCall(MatScale(A_RV, m_one));
4670       if (need_benign_correction) {
4671         ISLocalToGlobalMapping RtoN;
4672         IS                     is_p0;
4673         PetscInt              *idxs_p0, n;
4674 
4675         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4676         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4677         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4678         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4679         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4680         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4681         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4682         PetscCall(ISDestroy(&is_p0));
4683       }
4684 
4685       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4686       if (!sparserhs || need_benign_correction) {
4687         if (lda_rhs == n_R && !multi_element) {
4688           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4689         } else {
4690           Mat             T;
4691           PetscScalar    *av, *array;
4692           const PetscInt *xadj, *adjncy;
4693           PetscInt        n;
4694           PetscBool       flg_row;
4695 
4696           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4697           PetscCall(MatDenseGetArrayWrite(T, &array));
4698           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4699           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4700           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4701           for (i = 0; i < n; i++) {
4702             PetscInt j;
4703             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4704           }
4705           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4706           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4707           PetscCall(MatDestroy(&A_RV));
4708           A_RV = T;
4709         }
4710         if (need_benign_correction) {
4711           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4712           PetscScalar       *marr;
4713 
4714           /* XXX multi_element */
4715           PetscCall(MatDenseGetArray(A_RV, &marr));
4716           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4717 
4718                  | 0 0  0 | (V)
4719              L = | 0 0 -1 | (P-p0)
4720                  | 0 0 -1 | (p0)
4721 
4722           */
4723           for (i = 0; i < reuse_solver->benign_n; i++) {
4724             const PetscScalar *vals;
4725             const PetscInt    *idxs, *idxs_zero;
4726             PetscInt           n, j, nz;
4727 
4728             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4729             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4730             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4731             for (j = 0; j < n; j++) {
4732               PetscScalar val = vals[j];
4733               PetscInt    k, col = idxs[j];
4734               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4735             }
4736             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4737             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4738           }
4739           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4740         }
4741         PetscCall(PetscObjectReference((PetscObject)A_RV));
4742         Brhs = A_RV;
4743       } else {
4744         Mat tA_RVT, A_RVT;
4745 
4746         if (!pcbddc->symmetric_primal) {
4747           /* A_RV already scaled by -1 */
4748           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4749         } else {
4750           restoreavr = PETSC_TRUE;
4751           PetscCall(MatScale(A_VR, -1.0));
4752           PetscCall(PetscObjectReference((PetscObject)A_VR));
4753           A_RVT = A_VR;
4754         }
4755         if (lda_rhs != n_R) {
4756           PetscScalar *aa;
4757           PetscInt     r, *ii, *jj;
4758           PetscBool    done;
4759 
4760           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4761           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4762           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4763           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4764           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4765           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4766         } else {
4767           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4768           tA_RVT = A_RVT;
4769         }
4770         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4771         PetscCall(MatDestroy(&tA_RVT));
4772         PetscCall(MatDestroy(&A_RVT));
4773       }
4774       if (F) {
4775         /* need to correct the rhs */
4776         if (need_benign_correction) {
4777           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4778           PetscScalar       *marr;
4779 
4780           PetscCall(MatDenseGetArray(Brhs, &marr));
4781           if (lda_rhs != n_R) {
4782             for (i = 0; i < n_eff_vertices; i++) {
4783               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4784               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4785               PetscCall(VecResetArray(dummy_vec));
4786             }
4787           } else {
4788             for (i = 0; i < n_eff_vertices; i++) {
4789               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4790               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4791               PetscCall(VecResetArray(pcbddc->vec1_R));
4792             }
4793           }
4794           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4795         }
4796         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4797         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4798         /* need to correct the solution */
4799         if (need_benign_correction) {
4800           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4801           PetscScalar       *marr;
4802 
4803           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4804           if (lda_rhs != n_R) {
4805             for (i = 0; i < n_eff_vertices; i++) {
4806               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4807               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4808               PetscCall(VecResetArray(dummy_vec));
4809             }
4810           } else {
4811             for (i = 0; i < n_eff_vertices; i++) {
4812               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4813               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4814               PetscCall(VecResetArray(pcbddc->vec1_R));
4815             }
4816           }
4817           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4818         }
4819       } else {
4820         const PetscScalar *barr;
4821         PetscScalar       *marr;
4822 
4823         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4824         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4825         for (i = 0; i < n_eff_vertices; i++) {
4826           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4827           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4828           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4829           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4830           PetscCall(VecResetArray(pcbddc->vec1_R));
4831           PetscCall(VecResetArray(pcbddc->vec2_R));
4832         }
4833         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4834         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4835       }
4836       PetscCall(MatDestroy(&A_RV));
4837       PetscCall(MatDestroy(&Brhs));
4838       /* S_VV and S_CV */
4839       if (n_constraints) {
4840         Mat B;
4841 
4842         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4843         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4844 
4845         /* S_CV = pcbddc->local_auxmat1 * B */
4846         if (multi_element) {
4847           Mat T;
4848 
4849           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4850           PetscCall(MatDestroy(&B));
4851           B = T;
4852         }
4853         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4854         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4855         PetscCall(MatProductSetFromOptions(S_CV));
4856         PetscCall(MatProductSymbolic(S_CV));
4857         PetscCall(MatProductNumeric(S_CV));
4858         PetscCall(MatProductClear(S_CV));
4859         PetscCall(MatDestroy(&B));
4860 
4861         /* B = local_auxmat2_R * S_CV */
4862         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4863         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4864         PetscCall(MatProductSetFromOptions(B));
4865         PetscCall(MatProductSymbolic(B));
4866         PetscCall(MatProductNumeric(B));
4867 
4868         PetscCall(MatScale(S_CV, m_one));
4869         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4870 
4871         if (multi_element) {
4872           Mat T;
4873 
4874           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4875           PetscCall(MatDestroy(&A_RRmA_RV));
4876           A_RRmA_RV = T;
4877         }
4878         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4879         PetscCall(MatDestroy(&B));
4880       } else if (multi_element) {
4881         Mat T;
4882 
4883         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4884         PetscCall(MatDestroy(&A_RRmA_RV));
4885         A_RRmA_RV = T;
4886       }
4887 
4888       if (lda_rhs != n_R) {
4889         Mat T;
4890 
4891         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4892         PetscCall(MatDestroy(&A_RRmA_RV));
4893         A_RRmA_RV = T;
4894       }
4895 
4896       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4897       if (need_benign_correction) { /* XXX SPARSE */
4898         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4899         PetscScalar       *sums;
4900         const PetscScalar *marr;
4901 
4902         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4903         PetscCall(PetscMalloc1(n_vertices, &sums));
4904         for (i = 0; i < reuse_solver->benign_n; i++) {
4905           const PetscScalar *vals;
4906           const PetscInt    *idxs, *idxs_zero;
4907           PetscInt           n, j, nz;
4908 
4909           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4910           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4911           for (j = 0; j < n_vertices; j++) {
4912             sums[j] = 0.;
4913             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4914           }
4915           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4916           for (j = 0; j < n; j++) {
4917             PetscScalar val = vals[j];
4918             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4919           }
4920           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4921           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4922         }
4923         PetscCall(PetscFree(sums));
4924         PetscCall(MatDestroy(&A_RV_bcorr));
4925         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4926       }
4927 
4928       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4929       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4930       PetscCall(MatDestroy(&S_VV));
4931     }
4932 
4933     /* coarse basis functions */
4934     if (coarse_phi_multi) {
4935       Mat Vid;
4936 
4937       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4938       PetscCall(MatShift_Basic(Vid, 1.0));
4939       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4940       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4941       PetscCall(MatDestroy(&Vid));
4942     } else {
4943       if (A_RRmA_RV) {
4944         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4945         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4946           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4947           if (pcbddc->benign_n) {
4948             for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
4949             PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4950             PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4951           }
4952         }
4953       }
4954       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4955       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4956       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4957     }
4958     PetscCall(MatDestroy(&A_RRmA_RV));
4959   }
4960   PetscCall(MatDestroy(&A_RV));
4961   PetscCall(VecDestroy(&dummy_vec));
4962 
4963   if (n_constraints) {
4964     Mat B, B2;
4965 
4966     PetscCall(MatScale(S_CC, m_one));
4967     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4968     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4969     PetscCall(MatProductSetFromOptions(B));
4970     PetscCall(MatProductSymbolic(B));
4971     PetscCall(MatProductNumeric(B));
4972 
4973     if (n_vertices) {
4974       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4975         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4976       } else {
4977         if (lda_rhs != n_R) {
4978           Mat tB;
4979 
4980           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4981           PetscCall(MatDestroy(&B));
4982           B = tB;
4983         }
4984         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
4985       }
4986       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4987     }
4988 
4989     /* coarse basis functions */
4990     if (coarse_phi_multi) {
4991       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4992     } else {
4993       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4994       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4995       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4996       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4997         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4998         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4999         if (pcbddc->benign_n) {
5000           for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5001         }
5002         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
5003       }
5004     }
5005     PetscCall(MatDestroy(&B));
5006   }
5007 
5008   /* assemble sparse coarse basis functions */
5009   if (coarse_phi_multi) {
5010     Mat T;
5011 
5012     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
5013     PetscCall(MatDestroy(&coarse_phi_multi));
5014     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
5015     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
5016     PetscCall(MatDestroy(&T));
5017   }
5018   PetscCall(MatDestroy(&local_auxmat2_R));
5019   PetscCall(PetscFree(p0_lidx_I));
5020 
5021   /* coarse matrix entries relative to B_0 */
5022   if (pcbddc->benign_n) {
5023     Mat                B0_B, B0_BPHI;
5024     IS                 is_dummy;
5025     const PetscScalar *data;
5026     PetscInt           j;
5027 
5028     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5029     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5030     PetscCall(ISDestroy(&is_dummy));
5031     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5032     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5033     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5034     for (j = 0; j < pcbddc->benign_n; j++) {
5035       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5036       for (i = 0; i < pcbddc->local_primal_size; i++) {
5037         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5038         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5039       }
5040     }
5041     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5042     PetscCall(MatDestroy(&B0_B));
5043     PetscCall(MatDestroy(&B0_BPHI));
5044   }
5045 
5046   /* compute other basis functions for non-symmetric problems */
5047   if (!pcbddc->symmetric_primal) {
5048     Mat          B_V = NULL, B_C = NULL;
5049     PetscScalar *marray, *work;
5050 
5051     /* TODO multi_element MatDenseScatter */
5052     if (n_constraints) {
5053       Mat S_CCT, C_CRT;
5054 
5055       PetscCall(MatScale(S_CC, m_one));
5056       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5057       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5058       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5059       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5060       PetscCall(MatDestroy(&S_CCT));
5061       if (n_vertices) {
5062         Mat S_VCT;
5063 
5064         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5065         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5066         PetscCall(MatDestroy(&S_VCT));
5067         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5068       }
5069       PetscCall(MatDestroy(&C_CRT));
5070     } else {
5071       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5072     }
5073     if (n_vertices && n_R) {
5074       PetscScalar    *av, *marray;
5075       const PetscInt *xadj, *adjncy;
5076       PetscInt        n;
5077       PetscBool       flg_row;
5078 
5079       /* B_V = B_V - A_VR^T */
5080       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5081       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5082       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5083       PetscCall(MatDenseGetArray(B_V, &marray));
5084       for (i = 0; i < n; i++) {
5085         PetscInt j;
5086         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5087       }
5088       PetscCall(MatDenseRestoreArray(B_V, &marray));
5089       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5090       PetscCall(MatDestroy(&A_VR));
5091     }
5092 
5093     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5094     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5095     if (n_vertices) {
5096       PetscCall(MatDenseGetArray(B_V, &marray));
5097       for (i = 0; i < n_vertices; i++) {
5098         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5099         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5100         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5101         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5102         PetscCall(VecResetArray(pcbddc->vec1_R));
5103         PetscCall(VecResetArray(pcbddc->vec2_R));
5104       }
5105       PetscCall(MatDenseRestoreArray(B_V, &marray));
5106     }
5107     if (B_C) {
5108       PetscCall(MatDenseGetArray(B_C, &marray));
5109       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5110         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5111         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5112         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5113         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5114         PetscCall(VecResetArray(pcbddc->vec1_R));
5115         PetscCall(VecResetArray(pcbddc->vec2_R));
5116       }
5117       PetscCall(MatDenseRestoreArray(B_C, &marray));
5118     }
5119     /* coarse basis functions */
5120     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5121     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5122     for (i = 0; i < pcbddc->local_primal_size; i++) {
5123       Vec v;
5124 
5125       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5126       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5127       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5128       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5129       if (i < n_vertices) {
5130         PetscScalar one = 1.0;
5131         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5132         PetscCall(VecAssemblyBegin(v));
5133         PetscCall(VecAssemblyEnd(v));
5134       }
5135       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5136 
5137       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5138         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5139         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5140         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5141         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5142       }
5143       PetscCall(VecResetArray(pcbddc->vec1_R));
5144     }
5145     PetscCall(MatDestroy(&B_V));
5146     PetscCall(MatDestroy(&B_C));
5147     PetscCall(PetscFree(work));
5148   } else {
5149     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5150     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5151     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5152     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5153   }
5154   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5155   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5156 
5157   /* free memory */
5158   PetscCall(PetscFree(V_to_eff_V));
5159   PetscCall(PetscFree(C_to_eff_C));
5160   PetscCall(PetscFree(R_eff_V_J));
5161   PetscCall(PetscFree(R_eff_C_J));
5162   PetscCall(PetscFree(B_eff_V_J));
5163   PetscCall(PetscFree(B_eff_C_J));
5164   PetscCall(ISDestroy(&is_R));
5165   PetscCall(ISRestoreIndices(is_V, &idx_V));
5166   PetscCall(ISRestoreIndices(is_C, &idx_C));
5167   PetscCall(ISDestroy(&is_V));
5168   PetscCall(ISDestroy(&is_C));
5169   PetscCall(PetscFree(idx_V_B));
5170   PetscCall(MatDestroy(&S_CV));
5171   PetscCall(MatDestroy(&S_VC));
5172   PetscCall(MatDestroy(&S_CC));
5173   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5174   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5175   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5176 
5177   /* Checking coarse_sub_mat and coarse basis functions */
5178   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5179   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5180   if (pcbddc->dbg_flag) {
5181     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5182     Mat       coarse_phi_D, coarse_phi_B;
5183     Mat       coarse_psi_D, coarse_psi_B;
5184     Mat       A_II, A_BB, A_IB, A_BI;
5185     Mat       C_B, CPHI;
5186     IS        is_dummy;
5187     Vec       mones;
5188     MatType   checkmattype = MATSEQAIJ;
5189     PetscReal real_value;
5190 
5191     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5192       Mat A;
5193       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5194       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5195       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5196       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5197       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5198       PetscCall(MatDestroy(&A));
5199     } else {
5200       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5201       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5202       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5203       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5204     }
5205     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5206     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5207     if (!pcbddc->symmetric_primal) {
5208       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5209       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5210     }
5211     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5212     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5213     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5214     if (!pcbddc->symmetric_primal) {
5215       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5216       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5217       PetscCall(MatDestroy(&AUXMAT));
5218       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5219       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5220       PetscCall(MatDestroy(&AUXMAT));
5221       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5222       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5223       PetscCall(MatDestroy(&AUXMAT));
5224       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5225       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5226       PetscCall(MatDestroy(&AUXMAT));
5227     } else {
5228       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5229       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5230       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5231       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5232       PetscCall(MatDestroy(&AUXMAT));
5233       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5234       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5235       PetscCall(MatDestroy(&AUXMAT));
5236     }
5237     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5238     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5239     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5240     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5241     if (pcbddc->benign_n) {
5242       Mat                B0_B, B0_BPHI;
5243       const PetscScalar *data2;
5244       PetscScalar       *data;
5245       PetscInt           j;
5246 
5247       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5248       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5249       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5250       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5251       PetscCall(MatDenseGetArray(TM1, &data));
5252       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5253       for (j = 0; j < pcbddc->benign_n; j++) {
5254         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5255         for (i = 0; i < pcbddc->local_primal_size; i++) {
5256           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5257           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5258         }
5259       }
5260       PetscCall(MatDenseRestoreArray(TM1, &data));
5261       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5262       PetscCall(MatDestroy(&B0_B));
5263       PetscCall(ISDestroy(&is_dummy));
5264       PetscCall(MatDestroy(&B0_BPHI));
5265     }
5266     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5267     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5268     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5269     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5270 
5271     /* check constraints */
5272     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5273     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5274     if (!pcbddc->benign_n) { /* TODO: add benign case */
5275       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5276     } else {
5277       PetscScalar *data;
5278       Mat          tmat;
5279       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5280       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5281       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5282       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5283       PetscCall(MatDestroy(&tmat));
5284     }
5285     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5286     PetscCall(VecSet(mones, -1.0));
5287     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5288     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5289     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5290     if (!pcbddc->symmetric_primal) {
5291       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5292       PetscCall(VecSet(mones, -1.0));
5293       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5294       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5295       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5296     }
5297     PetscCall(MatDestroy(&C_B));
5298     PetscCall(MatDestroy(&CPHI));
5299     PetscCall(ISDestroy(&is_dummy));
5300     PetscCall(VecDestroy(&mones));
5301     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5302     PetscCall(MatDestroy(&A_II));
5303     PetscCall(MatDestroy(&A_BB));
5304     PetscCall(MatDestroy(&A_IB));
5305     PetscCall(MatDestroy(&A_BI));
5306     PetscCall(MatDestroy(&TM1));
5307     PetscCall(MatDestroy(&TM2));
5308     PetscCall(MatDestroy(&TM3));
5309     PetscCall(MatDestroy(&TM4));
5310     PetscCall(MatDestroy(&coarse_phi_D));
5311     PetscCall(MatDestroy(&coarse_phi_B));
5312     if (!pcbddc->symmetric_primal) {
5313       PetscCall(MatDestroy(&coarse_psi_D));
5314       PetscCall(MatDestroy(&coarse_psi_B));
5315     }
5316   }
5317 
5318 #if 0
5319   {
5320     PetscViewer viewer;
5321     char filename[256];
5322 
5323     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5324     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5325     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5326     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5327     PetscCall(MatView(*coarse_submat,viewer));
5328     if (pcbddc->coarse_phi_B) {
5329       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5330       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5331     }
5332     if (pcbddc->coarse_phi_D) {
5333       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5334       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5335     }
5336     if (pcbddc->coarse_psi_B) {
5337       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5338       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5339     }
5340     if (pcbddc->coarse_psi_D) {
5341       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5342       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5343     }
5344     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5345     PetscCall(MatView(pcbddc->local_mat,viewer));
5346     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5347     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5348     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5349     PetscCall(ISView(pcis->is_I_local,viewer));
5350     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5351     PetscCall(ISView(pcis->is_B_local,viewer));
5352     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5353     PetscCall(ISView(pcbddc->is_R_local,viewer));
5354     PetscCall(PetscViewerDestroy(&viewer));
5355   }
5356 #endif
5357 
5358   /* device support */
5359   {
5360     PetscBool iscuda, iship, iskokkos;
5361     MatType   mtype = NULL;
5362 
5363     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5364     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5365     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5366     if (iskokkos) {
5367       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5368       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5369     }
5370     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5371     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5372     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5373     if (mtype) {
5374       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5375       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5376       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5377       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5378       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5379       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5380     }
5381   }
5382   PetscFunctionReturn(PETSC_SUCCESS);
5383 }
5384 
5385 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5386 {
5387   Mat      *work_mat;
5388   IS        isrow_s, iscol_s;
5389   PetscBool rsorted, csorted;
5390   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5391 
5392   PetscFunctionBegin;
5393   PetscCall(ISSorted(isrow, &rsorted));
5394   PetscCall(ISSorted(iscol, &csorted));
5395   PetscCall(ISGetLocalSize(isrow, &rsize));
5396   PetscCall(ISGetLocalSize(iscol, &csize));
5397 
5398   if (!rsorted) {
5399     const PetscInt *idxs;
5400     PetscInt       *idxs_sorted, i;
5401 
5402     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5403     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5404     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5405     PetscCall(ISGetIndices(isrow, &idxs));
5406     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5407     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5408     PetscCall(ISRestoreIndices(isrow, &idxs));
5409     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5410   } else {
5411     PetscCall(PetscObjectReference((PetscObject)isrow));
5412     isrow_s = isrow;
5413   }
5414 
5415   if (!csorted) {
5416     if (isrow == iscol) {
5417       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5418       iscol_s = isrow_s;
5419     } else {
5420       const PetscInt *idxs;
5421       PetscInt       *idxs_sorted, i;
5422 
5423       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5424       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5425       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5426       PetscCall(ISGetIndices(iscol, &idxs));
5427       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5428       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5429       PetscCall(ISRestoreIndices(iscol, &idxs));
5430       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5431     }
5432   } else {
5433     PetscCall(PetscObjectReference((PetscObject)iscol));
5434     iscol_s = iscol;
5435   }
5436 
5437   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5438 
5439   if (!rsorted || !csorted) {
5440     Mat new_mat;
5441     IS  is_perm_r, is_perm_c;
5442 
5443     if (!rsorted) {
5444       PetscInt *idxs_r, i;
5445       PetscCall(PetscMalloc1(rsize, &idxs_r));
5446       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5447       PetscCall(PetscFree(idxs_perm_r));
5448       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5449     } else {
5450       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5451     }
5452     PetscCall(ISSetPermutation(is_perm_r));
5453 
5454     if (!csorted) {
5455       if (isrow_s == iscol_s) {
5456         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5457         is_perm_c = is_perm_r;
5458       } else {
5459         PetscInt *idxs_c, i;
5460         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5461         PetscCall(PetscMalloc1(csize, &idxs_c));
5462         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5463         PetscCall(PetscFree(idxs_perm_c));
5464         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5465       }
5466     } else {
5467       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5468     }
5469     PetscCall(ISSetPermutation(is_perm_c));
5470 
5471     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5472     PetscCall(MatDestroy(&work_mat[0]));
5473     work_mat[0] = new_mat;
5474     PetscCall(ISDestroy(&is_perm_r));
5475     PetscCall(ISDestroy(&is_perm_c));
5476   }
5477 
5478   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5479   *B = work_mat[0];
5480   PetscCall(MatDestroyMatrices(1, &work_mat));
5481   PetscCall(ISDestroy(&isrow_s));
5482   PetscCall(ISDestroy(&iscol_s));
5483   PetscFunctionReturn(PETSC_SUCCESS);
5484 }
5485 
5486 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5487 {
5488   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5489   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5490   Mat       new_mat, lA;
5491   IS        is_local, is_global;
5492   PetscInt  local_size;
5493   PetscBool isseqaij, issym, isset;
5494 
5495   PetscFunctionBegin;
5496   PetscCall(MatDestroy(&pcbddc->local_mat));
5497   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5498   if (pcbddc->mat_graph->multi_element) {
5499     Mat     *mats, *bdiags;
5500     IS      *gsubs;
5501     PetscInt nsubs = pcbddc->n_local_subs;
5502 
5503     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5504     PetscCall(PetscMalloc1(nsubs, &gsubs));
5505     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5506     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5507     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5508     PetscCall(PetscFree(gsubs));
5509 
5510     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5511     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5512     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5513     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5514     PetscCall(PetscFree(mats));
5515   } else {
5516     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5517     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5518     PetscCall(ISDestroy(&is_local));
5519     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5520     PetscCall(ISDestroy(&is_global));
5521   }
5522   if (pcbddc->dbg_flag) {
5523     Vec       x, x_change;
5524     PetscReal error;
5525 
5526     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5527     PetscCall(VecSetRandom(x, NULL));
5528     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5529     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5530     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5531     PetscCall(MatMult(new_mat, matis->x, matis->y));
5532     if (!pcbddc->change_interior) {
5533       const PetscScalar *x, *y, *v;
5534       PetscReal          lerror = 0.;
5535       PetscInt           i;
5536 
5537       PetscCall(VecGetArrayRead(matis->x, &x));
5538       PetscCall(VecGetArrayRead(matis->y, &y));
5539       PetscCall(VecGetArrayRead(matis->counter, &v));
5540       for (i = 0; i < local_size; i++)
5541         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5542       PetscCall(VecRestoreArrayRead(matis->x, &x));
5543       PetscCall(VecRestoreArrayRead(matis->y, &y));
5544       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5545       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5546       if (error > PETSC_SMALL) {
5547         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5548           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5549         } else {
5550           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5551         }
5552       }
5553     }
5554     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5555     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5556     PetscCall(VecAXPY(x, -1.0, x_change));
5557     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5558     if (error > PETSC_SMALL) {
5559       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5560         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5561       } else {
5562         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5563       }
5564     }
5565     PetscCall(VecDestroy(&x));
5566     PetscCall(VecDestroy(&x_change));
5567   }
5568 
5569   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5570   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5571 
5572   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5573   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5574   if (isseqaij) {
5575     PetscCall(MatDestroy(&pcbddc->local_mat));
5576     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5577     if (lA) {
5578       Mat work;
5579       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5580       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5581       PetscCall(MatDestroy(&work));
5582     }
5583   } else {
5584     Mat work_mat;
5585 
5586     PetscCall(MatDestroy(&pcbddc->local_mat));
5587     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5588     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5589     PetscCall(MatDestroy(&work_mat));
5590     if (lA) {
5591       Mat work;
5592       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5593       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5594       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5595       PetscCall(MatDestroy(&work));
5596     }
5597   }
5598   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5599   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5600   PetscCall(MatDestroy(&new_mat));
5601   PetscFunctionReturn(PETSC_SUCCESS);
5602 }
5603 
5604 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5605 {
5606   PC_IS          *pcis        = (PC_IS *)pc->data;
5607   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5608   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5609   PetscInt       *idx_R_local = NULL;
5610   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5611   PetscInt        vbs, bs;
5612   PetscBT         bitmask = NULL;
5613 
5614   PetscFunctionBegin;
5615   /*
5616     No need to setup local scatters if
5617       - primal space is unchanged
5618         AND
5619       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5620         AND
5621       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5622   */
5623   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5624   /* destroy old objects */
5625   PetscCall(ISDestroy(&pcbddc->is_R_local));
5626   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5627   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5628   /* Set Non-overlapping dimensions */
5629   n_B        = pcis->n_B;
5630   n_D        = pcis->n - n_B;
5631   n_vertices = pcbddc->n_vertices;
5632 
5633   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5634 
5635   /* create auxiliary bitmask and allocate workspace */
5636   if (!sub_schurs || !sub_schurs->reuse_solver) {
5637     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5638     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5639     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5640 
5641     for (i = 0, n_R = 0; i < pcis->n; i++) {
5642       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5643     }
5644   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5645     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5646 
5647     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5648     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5649   }
5650 
5651   /* Block code */
5652   vbs = 1;
5653   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5654   if (bs > 1 && !(n_vertices % bs)) {
5655     PetscBool is_blocked = PETSC_TRUE;
5656     PetscInt *vary;
5657     if (!sub_schurs || !sub_schurs->reuse_solver) {
5658       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5659       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5660       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5661       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5662       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5663       for (i = 0; i < pcis->n / bs; i++) {
5664         if (vary[i] != 0 && vary[i] != bs) {
5665           is_blocked = PETSC_FALSE;
5666           break;
5667         }
5668       }
5669       PetscCall(PetscFree(vary));
5670     } else {
5671       /* Verify directly the R set */
5672       for (i = 0; i < n_R / bs; i++) {
5673         PetscInt j, node = idx_R_local[bs * i];
5674         for (j = 1; j < bs; j++) {
5675           if (node != idx_R_local[bs * i + j] - j) {
5676             is_blocked = PETSC_FALSE;
5677             break;
5678           }
5679         }
5680       }
5681     }
5682     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5683       vbs = bs;
5684       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5685     }
5686   }
5687   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5688   if (sub_schurs && sub_schurs->reuse_solver) {
5689     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5690 
5691     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5692     PetscCall(ISDestroy(&reuse_solver->is_R));
5693     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5694     reuse_solver->is_R = pcbddc->is_R_local;
5695   } else {
5696     PetscCall(PetscFree(idx_R_local));
5697   }
5698 
5699   /* print some info if requested */
5700   if (pcbddc->dbg_flag) {
5701     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5702     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5703     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5704     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5705     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5706     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5707                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5708     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5709   }
5710 
5711   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5712   if (!sub_schurs || !sub_schurs->reuse_solver) {
5713     IS        is_aux1, is_aux2;
5714     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5715 
5716     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5717     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5718     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5719     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5720     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5721     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5722     for (i = 0, j = 0; i < n_R; i++) {
5723       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5724     }
5725     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5726     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5727     for (i = 0, j = 0; i < n_B; i++) {
5728       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5729     }
5730     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5731     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5732     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5733     PetscCall(ISDestroy(&is_aux1));
5734     PetscCall(ISDestroy(&is_aux2));
5735 
5736     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5737       PetscCall(PetscMalloc1(n_D, &aux_array1));
5738       for (i = 0, j = 0; i < n_R; i++) {
5739         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5740       }
5741       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5742       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5743       PetscCall(ISDestroy(&is_aux1));
5744     }
5745     PetscCall(PetscBTDestroy(&bitmask));
5746     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5747   } else {
5748     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5749     IS                 tis;
5750     PetscInt           schur_size;
5751 
5752     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5753     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5754     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5755     PetscCall(ISDestroy(&tis));
5756     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5757       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5758       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5759       PetscCall(ISDestroy(&tis));
5760     }
5761   }
5762   PetscFunctionReturn(PETSC_SUCCESS);
5763 }
5764 
5765 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5766 {
5767   MatNullSpace NullSpace;
5768   Mat          dmat;
5769   const Vec   *nullvecs;
5770   Vec          v, v2, *nullvecs2;
5771   VecScatter   sct = NULL;
5772   PetscScalar *ddata;
5773   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5774   PetscBool    nnsp_has_cnst;
5775 
5776   PetscFunctionBegin;
5777   if (!is && !B) { /* MATIS */
5778     Mat_IS *matis = (Mat_IS *)A->data;
5779 
5780     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5781     sct = matis->cctx;
5782     PetscCall(PetscObjectReference((PetscObject)sct));
5783   } else {
5784     PetscCall(MatGetNullSpace(B, &NullSpace));
5785     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5786     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5787   }
5788   PetscCall(MatGetNullSpace(A, &NullSpace));
5789   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5790   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5791 
5792   PetscCall(MatCreateVecs(A, &v, NULL));
5793   PetscCall(MatCreateVecs(B, &v2, NULL));
5794   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5795   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5796   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5797   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5798   PetscCall(VecGetBlockSize(v2, &bs));
5799   PetscCall(VecGetSize(v2, &N));
5800   PetscCall(VecGetLocalSize(v2, &n));
5801   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5802   for (k = 0; k < nnsp_size; k++) {
5803     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5804     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5805     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5806   }
5807   if (nnsp_has_cnst) {
5808     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5809     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5810   }
5811   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5812   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5813 
5814   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5815   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5816   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5817   PetscCall(MatDestroy(&dmat));
5818 
5819   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5820   PetscCall(PetscFree(nullvecs2));
5821   PetscCall(MatSetNearNullSpace(B, NullSpace));
5822   PetscCall(MatNullSpaceDestroy(&NullSpace));
5823   PetscCall(VecDestroy(&v));
5824   PetscCall(VecDestroy(&v2));
5825   PetscCall(VecScatterDestroy(&sct));
5826   PetscFunctionReturn(PETSC_SUCCESS);
5827 }
5828 
5829 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5830 {
5831   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5832   PC_IS       *pcis   = (PC_IS *)pc->data;
5833   PC           pc_temp;
5834   Mat          A_RR;
5835   MatNullSpace nnsp;
5836   MatReuse     reuse;
5837   PetscScalar  m_one = -1.0;
5838   PetscReal    value;
5839   PetscInt     n_D, n_R;
5840   PetscBool    issbaij, opts, isset, issym;
5841   PetscBool    f = PETSC_FALSE;
5842   char         dir_prefix[256], neu_prefix[256], str_level[16];
5843   size_t       len;
5844 
5845   PetscFunctionBegin;
5846   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5847   /* approximate solver, propagate NearNullSpace if needed */
5848   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5849     MatNullSpace gnnsp1, gnnsp2;
5850     PetscBool    lhas, ghas;
5851 
5852     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5853     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5854     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5855     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5856     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5857     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5858   }
5859 
5860   /* compute prefixes */
5861   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5862   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5863   if (!pcbddc->current_level) {
5864     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5865     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5866     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5867     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5868   } else {
5869     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5870     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5871     len -= 15;                                /* remove "pc_bddc_coarse_" */
5872     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5873     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5874     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5875     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5876     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5877     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5878     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5879     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5880     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5881   }
5882 
5883   /* DIRICHLET PROBLEM */
5884   if (dirichlet) {
5885     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5886     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5887       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5888       if (pcbddc->dbg_flag) {
5889         Mat A_IIn;
5890 
5891         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5892         PetscCall(MatDestroy(&pcis->A_II));
5893         pcis->A_II = A_IIn;
5894       }
5895     }
5896     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5897     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5898 
5899     /* Matrix for Dirichlet problem is pcis->A_II */
5900     n_D  = pcis->n - pcis->n_B;
5901     opts = PETSC_FALSE;
5902     if (!pcbddc->ksp_D) { /* create object if not yet build */
5903       opts = PETSC_TRUE;
5904       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5905       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5906       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5907       /* default */
5908       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5909       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5910       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5911       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5912       if (issbaij) {
5913         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5914       } else {
5915         PetscCall(PCSetType(pc_temp, PCLU));
5916       }
5917       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5918     }
5919     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5920     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5921     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5922     /* Allow user's customization */
5923     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5924     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5925     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5926       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5927     }
5928     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5929     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5930     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5931     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5932       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5933       const PetscInt *idxs;
5934       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5935 
5936       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5937       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5938       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5939       for (i = 0; i < nl; i++) {
5940         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5941       }
5942       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5943       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5944       PetscCall(PetscFree(scoords));
5945     }
5946     if (sub_schurs && sub_schurs->reuse_solver) {
5947       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5948 
5949       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5950     }
5951 
5952     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5953     if (!n_D) {
5954       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5955       PetscCall(PCSetType(pc_temp, PCNONE));
5956     }
5957     PetscCall(KSPSetUp(pcbddc->ksp_D));
5958     /* set ksp_D into pcis data */
5959     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5960     PetscCall(KSPDestroy(&pcis->ksp_D));
5961     pcis->ksp_D = pcbddc->ksp_D;
5962   }
5963 
5964   /* NEUMANN PROBLEM */
5965   A_RR = NULL;
5966   if (neumann) {
5967     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5968     PetscInt        ibs, mbs;
5969     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5970     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5971 
5972     reuse_neumann_solver = PETSC_FALSE;
5973     if (sub_schurs && sub_schurs->reuse_solver) {
5974       IS iP;
5975 
5976       reuse_neumann_solver = PETSC_TRUE;
5977       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5978       if (iP) reuse_neumann_solver = PETSC_FALSE;
5979     }
5980     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5981     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5982     if (pcbddc->ksp_R) { /* already created ksp */
5983       PetscInt nn_R;
5984       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5985       PetscCall(PetscObjectReference((PetscObject)A_RR));
5986       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5987       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5988         PetscCall(KSPReset(pcbddc->ksp_R));
5989         PetscCall(MatDestroy(&A_RR));
5990         reuse = MAT_INITIAL_MATRIX;
5991       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5992         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5993           PetscCall(MatDestroy(&A_RR));
5994           reuse = MAT_INITIAL_MATRIX;
5995         } else { /* safe to reuse the matrix */
5996           reuse = MAT_REUSE_MATRIX;
5997         }
5998       }
5999       /* last check */
6000       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
6001         PetscCall(MatDestroy(&A_RR));
6002         reuse = MAT_INITIAL_MATRIX;
6003       }
6004     } else { /* first time, so we need to create the matrix */
6005       reuse = MAT_INITIAL_MATRIX;
6006     }
6007     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
6008        TODO: Get Rid of these conversions */
6009     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
6010     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
6011     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
6012     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
6013       if (matis->A == pcbddc->local_mat) {
6014         PetscCall(MatDestroy(&pcbddc->local_mat));
6015         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6016       } else {
6017         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6018       }
6019     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6020       if (matis->A == pcbddc->local_mat) {
6021         PetscCall(MatDestroy(&pcbddc->local_mat));
6022         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6023       } else {
6024         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6025       }
6026     }
6027     /* extract A_RR */
6028     if (reuse_neumann_solver) {
6029       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6030 
6031       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6032         PetscCall(MatDestroy(&A_RR));
6033         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6034           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6035         } else {
6036           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6037         }
6038       } else {
6039         PetscCall(MatDestroy(&A_RR));
6040         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6041         PetscCall(PetscObjectReference((PetscObject)A_RR));
6042       }
6043     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6044       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6045     }
6046     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6047     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6048     opts = PETSC_FALSE;
6049     if (!pcbddc->ksp_R) { /* create object if not present */
6050       opts = PETSC_TRUE;
6051       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6052       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6053       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6054       /* default */
6055       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6056       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6057       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6058       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6059       if (issbaij) {
6060         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6061       } else {
6062         PetscCall(PCSetType(pc_temp, PCLU));
6063       }
6064       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6065     }
6066     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6067     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6068     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6069     if (opts) { /* Allow user's customization once */
6070       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6071     }
6072     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6073     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6074       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6075     }
6076     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6077     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6078     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6079     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6080       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6081       const PetscInt *idxs;
6082       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6083 
6084       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6085       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6086       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6087       for (i = 0; i < nl; i++) {
6088         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6089       }
6090       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6091       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6092       PetscCall(PetscFree(scoords));
6093     }
6094 
6095     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6096     if (!n_R) {
6097       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6098       PetscCall(PCSetType(pc_temp, PCNONE));
6099     }
6100     /* Reuse solver if it is present */
6101     if (reuse_neumann_solver) {
6102       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6103 
6104       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6105     }
6106     PetscCall(KSPSetUp(pcbddc->ksp_R));
6107   }
6108 
6109   if (pcbddc->dbg_flag) {
6110     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6111     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6112     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6113   }
6114   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6115 
6116   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6117   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6118   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6119   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6120   /* check Dirichlet and Neumann solvers */
6121   if (pcbddc->dbg_flag) {
6122     if (dirichlet) { /* Dirichlet */
6123       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6124       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6125       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6126       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6127       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6128       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6129       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6130       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6131     }
6132     if (neumann) { /* Neumann */
6133       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6134       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6135       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6136       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6137       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6138       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6139       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6140       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6141     }
6142   }
6143   /* free Neumann problem's matrix */
6144   PetscCall(MatDestroy(&A_RR));
6145   PetscFunctionReturn(PETSC_SUCCESS);
6146 }
6147 
6148 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6149 {
6150   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6151   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6152   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6153 
6154   PetscFunctionBegin;
6155   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6156   if (!pcbddc->switch_static) {
6157     if (applytranspose && pcbddc->local_auxmat1) {
6158       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6159       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6160     }
6161     if (!reuse_solver) {
6162       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6163       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6164     } else {
6165       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6166 
6167       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6168       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6169     }
6170   } else {
6171     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6172     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6173     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6174     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6175     if (applytranspose && pcbddc->local_auxmat1) {
6176       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6177       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6178       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6179       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6180     }
6181   }
6182   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6183   if (!reuse_solver || pcbddc->switch_static) {
6184     if (applytranspose) {
6185       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6186     } else {
6187       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6188     }
6189     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6190   } else {
6191     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6192 
6193     if (applytranspose) {
6194       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6195     } else {
6196       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6197     }
6198   }
6199   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6200   PetscCall(VecSet(inout_B, 0.));
6201   if (!pcbddc->switch_static) {
6202     if (!reuse_solver) {
6203       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6204       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6205     } else {
6206       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6207 
6208       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6209       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6210     }
6211     if (!applytranspose && pcbddc->local_auxmat1) {
6212       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6213       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6214     }
6215   } else {
6216     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6217     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6218     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6219     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6220     if (!applytranspose && pcbddc->local_auxmat1) {
6221       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6222       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6223     }
6224     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6225     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6226     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6227     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6228   }
6229   PetscFunctionReturn(PETSC_SUCCESS);
6230 }
6231 
6232 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6233 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6234 {
6235   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6236   PC_IS            *pcis   = (PC_IS *)pc->data;
6237   const PetscScalar zero   = 0.0;
6238 
6239   PetscFunctionBegin;
6240   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6241   if (!pcbddc->benign_apply_coarse_only) {
6242     if (applytranspose) {
6243       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6244       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6245     } else {
6246       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6247       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6248     }
6249   } else {
6250     PetscCall(VecSet(pcbddc->vec1_P, zero));
6251   }
6252 
6253   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6254   if (pcbddc->benign_n) {
6255     PetscScalar *array;
6256     PetscInt     j;
6257 
6258     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6259     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6260     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6261   }
6262 
6263   /* start communications from local primal nodes to rhs of coarse solver */
6264   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6265   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6266   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6267 
6268   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6269   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6270   if (pcbddc->coarse_ksp) {
6271     Mat          coarse_mat;
6272     Vec          rhs, sol;
6273     MatNullSpace nullsp;
6274     PetscBool    isbddc = PETSC_FALSE;
6275 
6276     if (pcbddc->benign_have_null) {
6277       PC coarse_pc;
6278 
6279       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6280       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6281       /* we need to propagate to coarser levels the need for a possible benign correction */
6282       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6283         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6284         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6285         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6286       }
6287     }
6288     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6289     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6290     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6291     if (applytranspose) {
6292       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6293       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6294       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6295       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6296       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6297     } else {
6298       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6299       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6300         PC coarse_pc;
6301 
6302         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6303         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6304         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6305         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6306         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6307       } else {
6308         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6309         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6310         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6311       }
6312     }
6313     /* we don't need the benign correction at coarser levels anymore */
6314     if (pcbddc->benign_have_null && isbddc) {
6315       PC       coarse_pc;
6316       PC_BDDC *coarsepcbddc;
6317 
6318       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6319       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6320       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6321       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6322     }
6323   }
6324   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6325 
6326   /* Local solution on R nodes */
6327   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6328   /* communications from coarse sol to local primal nodes */
6329   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6330   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6331 
6332   /* Sum contributions from the two levels */
6333   if (!pcbddc->benign_apply_coarse_only) {
6334     if (applytranspose) {
6335       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6336       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6337     } else {
6338       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6339       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6340     }
6341     /* store p0 */
6342     if (pcbddc->benign_n) {
6343       PetscScalar *array;
6344       PetscInt     j;
6345 
6346       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6347       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6348       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6349     }
6350   } else { /* expand the coarse solution */
6351     if (applytranspose) {
6352       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6353     } else {
6354       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6355     }
6356   }
6357   PetscFunctionReturn(PETSC_SUCCESS);
6358 }
6359 
6360 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6361 {
6362   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6363   Vec                from, to;
6364   const PetscScalar *array;
6365 
6366   PetscFunctionBegin;
6367   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6368     from = pcbddc->coarse_vec;
6369     to   = pcbddc->vec1_P;
6370     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6371       Vec tvec;
6372 
6373       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6374       PetscCall(VecResetArray(tvec));
6375       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6376       PetscCall(VecGetArrayRead(tvec, &array));
6377       PetscCall(VecPlaceArray(from, array));
6378       PetscCall(VecRestoreArrayRead(tvec, &array));
6379     }
6380   } else { /* from local to global -> put data in coarse right-hand side */
6381     from = pcbddc->vec1_P;
6382     to   = pcbddc->coarse_vec;
6383   }
6384   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6385   PetscFunctionReturn(PETSC_SUCCESS);
6386 }
6387 
6388 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6389 {
6390   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6391   Vec                from, to;
6392   const PetscScalar *array;
6393 
6394   PetscFunctionBegin;
6395   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6396     from = pcbddc->coarse_vec;
6397     to   = pcbddc->vec1_P;
6398   } else { /* from local to global -> put data in coarse right-hand side */
6399     from = pcbddc->vec1_P;
6400     to   = pcbddc->coarse_vec;
6401   }
6402   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6403   if (smode == SCATTER_FORWARD) {
6404     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6405       Vec tvec;
6406 
6407       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6408       PetscCall(VecGetArrayRead(to, &array));
6409       PetscCall(VecPlaceArray(tvec, array));
6410       PetscCall(VecRestoreArrayRead(to, &array));
6411     }
6412   } else {
6413     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6414       PetscCall(VecResetArray(from));
6415     }
6416   }
6417   PetscFunctionReturn(PETSC_SUCCESS);
6418 }
6419 
6420 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6421 {
6422   PC_IS   *pcis   = (PC_IS *)pc->data;
6423   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6424   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6425   /* one and zero */
6426   PetscScalar one = 1.0, zero = 0.0;
6427   /* space to store constraints and their local indices */
6428   PetscScalar *constraints_data;
6429   PetscInt    *constraints_idxs, *constraints_idxs_B;
6430   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6431   PetscInt    *constraints_n;
6432   /* iterators */
6433   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6434   /* BLAS integers */
6435   PetscBLASInt lwork, lierr;
6436   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6437   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6438   /* reuse */
6439   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6440   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6441   /* change of basis */
6442   PetscBool qr_needed;
6443   PetscBT   change_basis, qr_needed_idx;
6444   /* auxiliary stuff */
6445   PetscInt *nnz, *is_indices;
6446   PetscInt  ncc;
6447   /* some quantities */
6448   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6449   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6450   PetscReal tol; /* tolerance for retaining eigenmodes */
6451 
6452   PetscFunctionBegin;
6453   tol = PetscSqrtReal(PETSC_SMALL);
6454   /* Destroy Mat objects computed previously */
6455   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6456   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6457   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6458   /* save info on constraints from previous setup (if any) */
6459   olocal_primal_size    = pcbddc->local_primal_size;
6460   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6461   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6462   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6463   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6464   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6465   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6466 
6467   if (!pcbddc->adaptive_selection) {
6468     IS           ISForVertices, *ISForFaces, *ISForEdges;
6469     MatNullSpace nearnullsp;
6470     const Vec   *nearnullvecs;
6471     Vec         *localnearnullsp;
6472     PetscScalar *array;
6473     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6474     PetscBool    nnsp_has_cnst;
6475     /* LAPACK working arrays for SVD or POD */
6476     PetscBool    skip_lapack, boolforchange;
6477     PetscScalar *work;
6478     PetscReal   *singular_vals;
6479 #if defined(PETSC_USE_COMPLEX)
6480     PetscReal *rwork;
6481 #endif
6482     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6483     PetscBLASInt dummy_int    = 1;
6484     PetscScalar  dummy_scalar = 1.;
6485     PetscBool    use_pod      = PETSC_FALSE;
6486 
6487     /* MKL SVD with same input gives different results on different processes! */
6488 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6489     use_pod = PETSC_TRUE;
6490 #endif
6491     /* Get index sets for faces, edges and vertices from graph */
6492     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6493     o_nf       = n_ISForFaces;
6494     o_ne       = n_ISForEdges;
6495     n_vertices = 0;
6496     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6497     /* print some info */
6498     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6499       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6500       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6501       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6502       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6503       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6504       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6505       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6506       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6507       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6508     }
6509 
6510     if (!pcbddc->use_vertices) n_vertices = 0;
6511     if (!pcbddc->use_edges) n_ISForEdges = 0;
6512     if (!pcbddc->use_faces) n_ISForFaces = 0;
6513 
6514     /* check if near null space is attached to global mat */
6515     if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6516     else nearnullsp = NULL;
6517 
6518     if (nearnullsp) {
6519       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6520       /* remove any stored info */
6521       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6522       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6523       /* store information for BDDC solver reuse */
6524       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6525       pcbddc->onearnullspace = nearnullsp;
6526       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6527       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6528     } else { /* if near null space is not provided BDDC uses constants by default */
6529       nnsp_size     = 0;
6530       nnsp_has_cnst = PETSC_TRUE;
6531     }
6532     /* get max number of constraints on a single cc */
6533     max_constraints = nnsp_size;
6534     if (nnsp_has_cnst) max_constraints++;
6535 
6536     /*
6537          Evaluate maximum storage size needed by the procedure
6538          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6539          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6540          There can be multiple constraints per connected component
6541                                                                                                                                                            */
6542     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6543     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6544 
6545     total_counts = n_ISForFaces + n_ISForEdges;
6546     total_counts *= max_constraints;
6547     total_counts += n_vertices;
6548     PetscCall(PetscBTCreate(total_counts, &change_basis));
6549 
6550     total_counts           = 0;
6551     max_size_of_constraint = 0;
6552     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6553       IS used_is;
6554       if (i < n_ISForEdges) {
6555         used_is = ISForEdges[i];
6556       } else {
6557         used_is = ISForFaces[i - n_ISForEdges];
6558       }
6559       PetscCall(ISGetSize(used_is, &j));
6560       total_counts += j;
6561       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6562     }
6563     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6564 
6565     /* get local part of global near null space vectors */
6566     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6567     for (k = 0; k < nnsp_size; k++) {
6568       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6569       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6570       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6571     }
6572 
6573     /* whether or not to skip lapack calls */
6574     skip_lapack = PETSC_TRUE;
6575     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6576 
6577     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6578     if (!skip_lapack) {
6579       PetscScalar temp_work;
6580 
6581       if (use_pod) {
6582         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6583         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6584         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6585         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6586 #if defined(PETSC_USE_COMPLEX)
6587         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6588 #endif
6589         /* now we evaluate the optimal workspace using query with lwork=-1 */
6590         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6591         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6592         lwork = -1;
6593         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6594 #if !defined(PETSC_USE_COMPLEX)
6595         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6596 #else
6597         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6598 #endif
6599         PetscCall(PetscFPTrapPop());
6600         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6601       } else {
6602 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6603         /* SVD */
6604         PetscInt max_n, min_n;
6605         max_n = max_size_of_constraint;
6606         min_n = max_constraints;
6607         if (max_size_of_constraint < max_constraints) {
6608           min_n = max_size_of_constraint;
6609           max_n = max_constraints;
6610         }
6611         PetscCall(PetscMalloc1(min_n, &singular_vals));
6612   #if defined(PETSC_USE_COMPLEX)
6613         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6614   #endif
6615         /* now we evaluate the optimal workspace using query with lwork=-1 */
6616         lwork = -1;
6617         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6618         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6619         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6620         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6621   #if !defined(PETSC_USE_COMPLEX)
6622         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6623   #else
6624         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6625   #endif
6626         PetscCall(PetscFPTrapPop());
6627         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6628 #else
6629         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6630 #endif /* on missing GESVD */
6631       }
6632       /* Allocate optimal workspace */
6633       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6634       PetscCall(PetscMalloc1(lwork, &work));
6635     }
6636     /* Now we can loop on constraining sets */
6637     total_counts            = 0;
6638     constraints_idxs_ptr[0] = 0;
6639     constraints_data_ptr[0] = 0;
6640     /* vertices */
6641     if (n_vertices) {
6642       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6643       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6644       for (i = 0; i < n_vertices; i++) {
6645         constraints_n[total_counts]            = 1;
6646         constraints_data[total_counts]         = 1.0;
6647         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6648         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6649         total_counts++;
6650       }
6651       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6652     }
6653 
6654     /* edges and faces */
6655     total_counts_cc = total_counts;
6656     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6657       IS        used_is;
6658       PetscBool idxs_copied = PETSC_FALSE;
6659 
6660       if (ncc < n_ISForEdges) {
6661         used_is       = ISForEdges[ncc];
6662         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6663       } else {
6664         used_is       = ISForFaces[ncc - n_ISForEdges];
6665         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6666       }
6667       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6668 
6669       PetscCall(ISGetSize(used_is, &size_of_constraint));
6670       if (!size_of_constraint) continue;
6671       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6672       if (nnsp_has_cnst) {
6673         PetscScalar quad_value;
6674 
6675         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6676         idxs_copied = PETSC_TRUE;
6677 
6678         if (!pcbddc->use_nnsp_true) {
6679           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6680         } else {
6681           quad_value = 1.0;
6682         }
6683         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6684         temp_constraints++;
6685         total_counts++;
6686       }
6687       for (k = 0; k < nnsp_size; k++) {
6688         PetscReal    real_value;
6689         PetscScalar *ptr_to_data;
6690 
6691         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6692         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6693         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6694         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6695         /* check if array is null on the connected component */
6696         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6697         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6698         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6699           temp_constraints++;
6700           total_counts++;
6701           if (!idxs_copied) {
6702             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6703             idxs_copied = PETSC_TRUE;
6704           }
6705         }
6706       }
6707       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6708       valid_constraints = temp_constraints;
6709       if (!pcbddc->use_nnsp_true && temp_constraints) {
6710         if (temp_constraints == 1) { /* just normalize the constraint */
6711           PetscScalar norm, *ptr_to_data;
6712 
6713           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6714           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6715           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6716           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6717           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6718         } else { /* perform SVD */
6719           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6720 
6721           if (use_pod) {
6722             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6723                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6724                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6725                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6726                   from that computed using LAPACKgesvd
6727                -> This is due to a different computation of eigenvectors in LAPACKheev
6728                -> The quality of the POD-computed basis will be the same */
6729             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6730             /* Store upper triangular part of correlation matrix */
6731             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6732             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6733             for (j = 0; j < temp_constraints; j++) {
6734               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6735             }
6736             /* compute eigenvalues and eigenvectors of correlation matrix */
6737             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6738             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6739 #if !defined(PETSC_USE_COMPLEX)
6740             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6741 #else
6742             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6743 #endif
6744             PetscCall(PetscFPTrapPop());
6745             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6746             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6747             j = 0;
6748             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6749             total_counts      = total_counts - j;
6750             valid_constraints = temp_constraints - j;
6751             /* scale and copy POD basis into used quadrature memory */
6752             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6753             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6754             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6755             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6756             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6757             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6758             if (j < temp_constraints) {
6759               PetscInt ii;
6760               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6761               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6762               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6763               PetscCall(PetscFPTrapPop());
6764               for (k = 0; k < temp_constraints - j; k++) {
6765                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6766               }
6767             }
6768           } else {
6769 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6770             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6771             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6772             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6773             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6774   #if !defined(PETSC_USE_COMPLEX)
6775             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6776   #else
6777             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6778   #endif
6779             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6780             PetscCall(PetscFPTrapPop());
6781             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6782             k = temp_constraints;
6783             if (k > size_of_constraint) k = size_of_constraint;
6784             j = 0;
6785             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6786             valid_constraints = k - j;
6787             total_counts      = total_counts - temp_constraints + valid_constraints;
6788 #else
6789             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6790 #endif /* on missing GESVD */
6791           }
6792         }
6793       }
6794       /* update pointers information */
6795       if (valid_constraints) {
6796         constraints_n[total_counts_cc]            = valid_constraints;
6797         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6798         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6799         /* set change_of_basis flag */
6800         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6801         total_counts_cc++;
6802       }
6803     }
6804     /* free workspace */
6805     if (!skip_lapack) {
6806       PetscCall(PetscFree(work));
6807 #if defined(PETSC_USE_COMPLEX)
6808       PetscCall(PetscFree(rwork));
6809 #endif
6810       PetscCall(PetscFree(singular_vals));
6811       PetscCall(PetscFree(correlation_mat));
6812       PetscCall(PetscFree(temp_basis));
6813     }
6814     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6815     PetscCall(PetscFree(localnearnullsp));
6816     /* free index sets of faces, edges and vertices */
6817     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6818   } else {
6819     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6820 
6821     total_counts = 0;
6822     n_vertices   = 0;
6823     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6824     max_constraints = 0;
6825     total_counts_cc = 0;
6826     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6827       total_counts += pcbddc->adaptive_constraints_n[i];
6828       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6829       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6830     }
6831     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6832     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6833     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6834     constraints_data     = pcbddc->adaptive_constraints_data;
6835     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6836     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6837     total_counts_cc = 0;
6838     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6839       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6840     }
6841 
6842     max_size_of_constraint = 0;
6843     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6844     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6845     /* Change of basis */
6846     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6847     if (pcbddc->use_change_of_basis) {
6848       for (i = 0; i < sub_schurs->n_subs; i++) {
6849         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6850       }
6851     }
6852   }
6853   pcbddc->local_primal_size = total_counts;
6854   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6855 
6856   /* map constraints_idxs in boundary numbering */
6857   if (pcbddc->use_change_of_basis) {
6858     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6859     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6860   }
6861 
6862   /* Create constraint matrix */
6863   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6864   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6865   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6866 
6867   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6868   /* determine if a QR strategy is needed for change of basis */
6869   qr_needed = pcbddc->use_qr_single;
6870   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6871   total_primal_vertices        = 0;
6872   pcbddc->local_primal_size_cc = 0;
6873   for (i = 0; i < total_counts_cc; i++) {
6874     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6875     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6876       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6877       pcbddc->local_primal_size_cc += 1;
6878     } else if (PetscBTLookup(change_basis, i)) {
6879       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6880       pcbddc->local_primal_size_cc += constraints_n[i];
6881       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6882         PetscCall(PetscBTSet(qr_needed_idx, i));
6883         qr_needed = PETSC_TRUE;
6884       }
6885     } else {
6886       pcbddc->local_primal_size_cc += 1;
6887     }
6888   }
6889   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6890   pcbddc->n_vertices = total_primal_vertices;
6891   /* permute indices in order to have a sorted set of vertices */
6892   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6893   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6894   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6895   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6896 
6897   /* nonzero structure of constraint matrix */
6898   /* and get reference dof for local constraints */
6899   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6900   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6901 
6902   j            = total_primal_vertices;
6903   total_counts = total_primal_vertices;
6904   cum          = total_primal_vertices;
6905   for (i = n_vertices; i < total_counts_cc; i++) {
6906     if (!PetscBTLookup(change_basis, i)) {
6907       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6908       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6909       cum++;
6910       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6911       for (k = 0; k < constraints_n[i]; k++) {
6912         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6913         nnz[j + k]                                        = size_of_constraint;
6914       }
6915       j += constraints_n[i];
6916     }
6917   }
6918   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6919   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6920   PetscCall(PetscFree(nnz));
6921 
6922   /* set values in constraint matrix */
6923   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6924   total_counts = total_primal_vertices;
6925   for (i = n_vertices; i < total_counts_cc; i++) {
6926     if (!PetscBTLookup(change_basis, i)) {
6927       PetscInt *cols;
6928 
6929       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6930       cols               = constraints_idxs + constraints_idxs_ptr[i];
6931       for (k = 0; k < constraints_n[i]; k++) {
6932         PetscInt     row = total_counts + k;
6933         PetscScalar *vals;
6934 
6935         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6936         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6937       }
6938       total_counts += constraints_n[i];
6939     }
6940   }
6941   /* assembling */
6942   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6943   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6944   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6945 
6946   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6947   if (pcbddc->use_change_of_basis) {
6948     /* dual and primal dofs on a single cc */
6949     PetscInt dual_dofs, primal_dofs;
6950     /* working stuff for GEQRF */
6951     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6952     PetscBLASInt lqr_work;
6953     /* working stuff for UNGQR */
6954     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6955     PetscBLASInt lgqr_work;
6956     /* working stuff for TRTRS */
6957     PetscScalar *trs_rhs = NULL;
6958     PetscBLASInt Blas_NRHS;
6959     /* pointers for values insertion into change of basis matrix */
6960     PetscInt    *start_rows, *start_cols;
6961     PetscScalar *start_vals;
6962     /* working stuff for values insertion */
6963     PetscBT   is_primal;
6964     PetscInt *aux_primal_numbering_B;
6965     /* matrix sizes */
6966     PetscInt global_size, local_size;
6967     /* temporary change of basis */
6968     Mat localChangeOfBasisMatrix;
6969     /* extra space for debugging */
6970     PetscScalar *dbg_work = NULL;
6971 
6972     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6973     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6974     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6975     /* nonzeros for local mat */
6976     PetscCall(PetscMalloc1(pcis->n, &nnz));
6977     if (!pcbddc->benign_change || pcbddc->fake_change) {
6978       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6979     } else {
6980       const PetscInt *ii;
6981       PetscInt        n;
6982       PetscBool       flg_row;
6983       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6984       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6985       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6986     }
6987     for (i = n_vertices; i < total_counts_cc; i++) {
6988       if (PetscBTLookup(change_basis, i)) {
6989         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6990         if (PetscBTLookup(qr_needed_idx, i)) {
6991           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6992         } else {
6993           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6994           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6995         }
6996       }
6997     }
6998     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6999     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7000     PetscCall(PetscFree(nnz));
7001     /* Set interior change in the matrix */
7002     if (!pcbddc->benign_change || pcbddc->fake_change) {
7003       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
7004     } else {
7005       const PetscInt *ii, *jj;
7006       PetscScalar    *aa;
7007       PetscInt        n;
7008       PetscBool       flg_row;
7009       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7010       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
7011       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
7012       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
7013       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7014     }
7015 
7016     if (pcbddc->dbg_flag) {
7017       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7018       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7019     }
7020 
7021     /* Now we loop on the constraints which need a change of basis */
7022     /*
7023        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7024        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7025 
7026        Basic blocks of change of basis matrix T computed:
7027 
7028           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7029 
7030             | 1        0   ...        0         s_1/S |
7031             | 0        1   ...        0         s_2/S |
7032             |              ...                        |
7033             | 0        ...            1     s_{n-1}/S |
7034             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7035 
7036             with S = \sum_{i=1}^n s_i^2
7037             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7038                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7039 
7040           - QR decomposition of constraints otherwise
7041     */
7042     if (qr_needed && max_size_of_constraint) {
7043       /* space to store Q */
7044       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7045       /* array to store scaling factors for reflectors */
7046       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7047       /* first we issue queries for optimal work */
7048       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7049       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7050       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7051       lqr_work = -1;
7052       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7053       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7054       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7055       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7056       lgqr_work = -1;
7057       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7058       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7059       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7060       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7061       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7062       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7063       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7064       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7065       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7066       /* array to store rhs and solution of triangular solver */
7067       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7068       /* allocating workspace for check */
7069       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7070     }
7071     /* array to store whether a node is primal or not */
7072     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7073     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7074     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7075     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7076     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7077     PetscCall(PetscFree(aux_primal_numbering_B));
7078 
7079     /* loop on constraints and see whether or not they need a change of basis and compute it */
7080     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7081       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7082       if (PetscBTLookup(change_basis, total_counts)) {
7083         /* get constraint info */
7084         primal_dofs = constraints_n[total_counts];
7085         dual_dofs   = size_of_constraint - primal_dofs;
7086 
7087         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7088 
7089         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7090 
7091           /* copy quadrature constraints for change of basis check */
7092           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7093           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7094           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7095 
7096           /* compute QR decomposition of constraints */
7097           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7098           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7099           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7100           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7101           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7102           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7103           PetscCall(PetscFPTrapPop());
7104 
7105           /* explicitly compute R^-T */
7106           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7107           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7108           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7109           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7110           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7111           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7112           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7113           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7114           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7115           PetscCall(PetscFPTrapPop());
7116 
7117           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7118           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7119           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7120           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7121           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7122           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7123           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7124           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7125           PetscCall(PetscFPTrapPop());
7126 
7127           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7128              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7129              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7130           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7131           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7132           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7133           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7134           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7135           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7136           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7137           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7138           PetscCall(PetscFPTrapPop());
7139           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7140 
7141           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7142           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7143           /* insert cols for primal dofs */
7144           for (j = 0; j < primal_dofs; j++) {
7145             start_vals = &qr_basis[j * size_of_constraint];
7146             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7147             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7148           }
7149           /* insert cols for dual dofs */
7150           for (j = 0, k = 0; j < dual_dofs; k++) {
7151             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7152               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7153               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7154               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7155               j++;
7156             }
7157           }
7158 
7159           /* check change of basis */
7160           if (pcbddc->dbg_flag) {
7161             PetscInt  ii, jj;
7162             PetscBool valid_qr = PETSC_TRUE;
7163             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7164             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7165             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7166             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7167             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7168             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7169             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7170             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7171             PetscCall(PetscFPTrapPop());
7172             for (jj = 0; jj < size_of_constraint; jj++) {
7173               for (ii = 0; ii < primal_dofs; ii++) {
7174                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7175                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7176               }
7177             }
7178             if (!valid_qr) {
7179               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7180               for (jj = 0; jj < size_of_constraint; jj++) {
7181                 for (ii = 0; ii < primal_dofs; ii++) {
7182                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7183                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7184                   }
7185                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7186                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7187                   }
7188                 }
7189               }
7190             } else {
7191               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7192             }
7193           }
7194         } else { /* simple transformation block */
7195           PetscInt    row, col;
7196           PetscScalar val, norm;
7197 
7198           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7199           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7200           for (j = 0; j < size_of_constraint; j++) {
7201             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7202             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7203             if (!PetscBTLookup(is_primal, row_B)) {
7204               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7205               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7206               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7207             } else {
7208               for (k = 0; k < size_of_constraint; k++) {
7209                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7210                 if (row != col) {
7211                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7212                 } else {
7213                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7214                 }
7215                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7216               }
7217             }
7218           }
7219           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7220         }
7221       } else {
7222         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7223       }
7224     }
7225 
7226     /* free workspace */
7227     if (qr_needed) {
7228       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7229       PetscCall(PetscFree(trs_rhs));
7230       PetscCall(PetscFree(qr_tau));
7231       PetscCall(PetscFree(qr_work));
7232       PetscCall(PetscFree(gqr_work));
7233       PetscCall(PetscFree(qr_basis));
7234     }
7235     PetscCall(PetscBTDestroy(&is_primal));
7236     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7237     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7238 
7239     /* assembling of global change of variable */
7240     if (!pcbddc->fake_change) {
7241       Mat tmat;
7242 
7243       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7244       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7245       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7246       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7247       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7248       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7249       PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7250       PetscCall(MatDestroy(&tmat));
7251       PetscCall(VecSet(pcis->vec1_global, 0.0));
7252       PetscCall(VecSet(pcis->vec1_N, 1.0));
7253       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7254       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7255       PetscCall(VecReciprocal(pcis->vec1_global));
7256       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7257 
7258       /* check */
7259       if (pcbddc->dbg_flag) {
7260         PetscReal error;
7261         Vec       x, x_change;
7262 
7263         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7264         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7265         PetscCall(VecSetRandom(x, NULL));
7266         PetscCall(VecCopy(x, pcis->vec1_global));
7267         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7268         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7269         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7270         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7271         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7272         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7273         PetscCall(VecAXPY(x, -1.0, x_change));
7274         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7275         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7276         PetscCall(VecDestroy(&x));
7277         PetscCall(VecDestroy(&x_change));
7278       }
7279       /* adapt sub_schurs computed (if any) */
7280       if (pcbddc->use_deluxe_scaling) {
7281         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7282 
7283         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7284         if (sub_schurs && sub_schurs->S_Ej_all) {
7285           Mat S_new, tmat;
7286           IS  is_all_N, is_V_Sall = NULL;
7287 
7288           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7289           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7290           if (pcbddc->deluxe_zerorows) {
7291             ISLocalToGlobalMapping NtoSall;
7292             IS                     is_V;
7293             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7294             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7295             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7296             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7297             PetscCall(ISDestroy(&is_V));
7298           }
7299           PetscCall(ISDestroy(&is_all_N));
7300           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7301           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7302           PetscCall(PetscObjectReference((PetscObject)S_new));
7303           if (pcbddc->deluxe_zerorows) {
7304             const PetscScalar *array;
7305             const PetscInt    *idxs_V, *idxs_all;
7306             PetscInt           i, n_V;
7307 
7308             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7309             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7310             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7311             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7312             PetscCall(VecGetArrayRead(pcis->D, &array));
7313             for (i = 0; i < n_V; i++) {
7314               PetscScalar val;
7315               PetscInt    idx;
7316 
7317               idx = idxs_V[i];
7318               val = array[idxs_all[idxs_V[i]]];
7319               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7320             }
7321             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7322             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7323             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7324             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7325             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7326           }
7327           sub_schurs->S_Ej_all = S_new;
7328           PetscCall(MatDestroy(&S_new));
7329           if (sub_schurs->sum_S_Ej_all) {
7330             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7331             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7332             PetscCall(PetscObjectReference((PetscObject)S_new));
7333             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7334             sub_schurs->sum_S_Ej_all = S_new;
7335             PetscCall(MatDestroy(&S_new));
7336           }
7337           PetscCall(ISDestroy(&is_V_Sall));
7338           PetscCall(MatDestroy(&tmat));
7339         }
7340         /* destroy any change of basis context in sub_schurs */
7341         if (sub_schurs && sub_schurs->change) {
7342           PetscInt i;
7343 
7344           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7345           PetscCall(PetscFree(sub_schurs->change));
7346         }
7347       }
7348       if (pcbddc->switch_static) { /* need to save the local change */
7349         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7350       } else {
7351         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7352       }
7353       /* determine if any process has changed the pressures locally */
7354       pcbddc->change_interior = pcbddc->benign_have_null;
7355     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7356       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7357       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7358       pcbddc->use_qr_single    = qr_needed;
7359     }
7360   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7361     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7362       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7363       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7364     } else {
7365       Mat benign_global = NULL;
7366       if (pcbddc->benign_have_null) {
7367         Mat M;
7368 
7369         pcbddc->change_interior = PETSC_TRUE;
7370         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7371         PetscCall(VecReciprocal(pcis->vec1_N));
7372         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7373         if (pcbddc->benign_change) {
7374           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7375           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7376         } else {
7377           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7378           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7379         }
7380         PetscCall(MatISSetLocalMat(benign_global, M));
7381         PetscCall(MatDestroy(&M));
7382         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7383         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7384       }
7385       if (pcbddc->user_ChangeOfBasisMatrix) {
7386         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7387         PetscCall(MatDestroy(&benign_global));
7388       } else if (pcbddc->benign_have_null) {
7389         pcbddc->ChangeOfBasisMatrix = benign_global;
7390       }
7391     }
7392     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7393       IS              is_global;
7394       const PetscInt *gidxs;
7395 
7396       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7397       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7398       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7399       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7400       PetscCall(ISDestroy(&is_global));
7401     }
7402   }
7403   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7404 
7405   if (!pcbddc->fake_change) {
7406     /* add pressure dofs to set of primal nodes for numbering purposes */
7407     for (i = 0; i < pcbddc->benign_n; i++) {
7408       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7409       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7410       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7411       pcbddc->local_primal_size_cc++;
7412       pcbddc->local_primal_size++;
7413     }
7414 
7415     /* check if a new primal space has been introduced (also take into account benign trick) */
7416     pcbddc->new_primal_space_local = PETSC_TRUE;
7417     if (olocal_primal_size == pcbddc->local_primal_size) {
7418       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7419       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7420       if (!pcbddc->new_primal_space_local) {
7421         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7422         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7423       }
7424     }
7425     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7426     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7427   }
7428   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7429 
7430   /* flush dbg viewer */
7431   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7432 
7433   /* free workspace */
7434   PetscCall(PetscBTDestroy(&qr_needed_idx));
7435   PetscCall(PetscBTDestroy(&change_basis));
7436   if (!pcbddc->adaptive_selection) {
7437     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7438     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7439   } else {
7440     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7441     PetscCall(PetscFree(constraints_n));
7442     PetscCall(PetscFree(constraints_idxs_B));
7443   }
7444   PetscFunctionReturn(PETSC_SUCCESS);
7445 }
7446 
7447 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7448 {
7449   ISLocalToGlobalMapping map;
7450   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7451   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7452   PetscInt               i, N;
7453   PetscBool              rcsr = PETSC_FALSE;
7454 
7455   PetscFunctionBegin;
7456   if (pcbddc->recompute_topography) {
7457     pcbddc->graphanalyzed = PETSC_FALSE;
7458     /* Reset previously computed graph */
7459     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7460     /* Init local Graph struct */
7461     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7462     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7463     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7464 
7465     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7466     /* Check validity of the csr graph passed in by the user */
7467     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7468                pcbddc->mat_graph->nvtxs);
7469 
7470     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7471     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7472       PetscInt *xadj, *adjncy;
7473       PetscInt  nvtxs;
7474       PetscBool flg_row;
7475       Mat       A;
7476 
7477       PetscCall(PetscObjectReference((PetscObject)matis->A));
7478       A = matis->A;
7479       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7480         Mat AtA;
7481 
7482         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7483         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7484         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7485         PetscCall(MatProductSetFromOptions(AtA));
7486         PetscCall(MatProductSymbolic(AtA));
7487         PetscCall(MatProductClear(AtA));
7488         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7489         AtA->assembled = PETSC_TRUE;
7490         PetscCall(MatDestroy(&A));
7491         A = AtA;
7492       }
7493       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7494       if (flg_row) {
7495         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7496         pcbddc->computed_rowadj = PETSC_TRUE;
7497         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7498         rcsr = PETSC_TRUE;
7499       }
7500       PetscCall(MatDestroy(&A));
7501     }
7502     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7503 
7504     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7505       PetscReal   *lcoords;
7506       PetscInt     n;
7507       MPI_Datatype dimrealtype;
7508       PetscMPIInt  cdimi;
7509 
7510       /* TODO: support for blocked */
7511       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7512       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7513       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7514       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7515       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7516       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7517       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7518       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7519       PetscCallMPI(MPI_Type_free(&dimrealtype));
7520       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7521 
7522       pcbddc->mat_graph->coords = lcoords;
7523       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7524       pcbddc->mat_graph->cnloc  = n;
7525     }
7526     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7527                pcbddc->mat_graph->nvtxs);
7528     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7529 
7530     /* attach info on disconnected subdomains if present */
7531     if (pcbddc->n_local_subs) {
7532       PetscInt *local_subs, n, totn;
7533 
7534       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7535       PetscCall(PetscMalloc1(n, &local_subs));
7536       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7537       for (i = 0; i < pcbddc->n_local_subs; i++) {
7538         const PetscInt *idxs;
7539         PetscInt        nl, j;
7540 
7541         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7542         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7543         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7544         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7545       }
7546       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7547       pcbddc->mat_graph->n_local_subs = totn + 1;
7548       pcbddc->mat_graph->local_subs   = local_subs;
7549     }
7550 
7551     /* Setup of Graph */
7552     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7553   }
7554 
7555   if (!pcbddc->graphanalyzed) {
7556     /* Graph's connected components analysis */
7557     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7558     pcbddc->graphanalyzed   = PETSC_TRUE;
7559     pcbddc->corner_selected = pcbddc->corner_selection;
7560   }
7561   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7562   PetscFunctionReturn(PETSC_SUCCESS);
7563 }
7564 
7565 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7566 {
7567   PetscInt     i, j, n;
7568   PetscScalar *alphas;
7569   PetscReal    norm, *onorms;
7570 
7571   PetscFunctionBegin;
7572   n = *nio;
7573   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7574   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7575   PetscCall(VecNormalize(vecs[0], &norm));
7576   if (norm < PETSC_SMALL) {
7577     onorms[0] = 0.0;
7578     PetscCall(VecSet(vecs[0], 0.0));
7579   } else {
7580     onorms[0] = norm;
7581   }
7582 
7583   for (i = 1; i < n; i++) {
7584     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7585     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7586     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7587     PetscCall(VecNormalize(vecs[i], &norm));
7588     if (norm < PETSC_SMALL) {
7589       onorms[i] = 0.0;
7590       PetscCall(VecSet(vecs[i], 0.0));
7591     } else {
7592       onorms[i] = norm;
7593     }
7594   }
7595   /* push nonzero vectors at the beginning */
7596   for (i = 0; i < n; i++) {
7597     if (onorms[i] == 0.0) {
7598       for (j = i + 1; j < n; j++) {
7599         if (onorms[j] != 0.0) {
7600           PetscCall(VecCopy(vecs[j], vecs[i]));
7601           onorms[i] = onorms[j];
7602           onorms[j] = 0.0;
7603           break;
7604         }
7605       }
7606     }
7607   }
7608   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7609   PetscCall(PetscFree2(alphas, onorms));
7610   PetscFunctionReturn(PETSC_SUCCESS);
7611 }
7612 
7613 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7614 {
7615   ISLocalToGlobalMapping mapping;
7616   Mat                    A;
7617   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7618   PetscMPIInt            size, rank, color;
7619   PetscInt              *xadj, *adjncy;
7620   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7621   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7622   PetscInt               void_procs, *procs_candidates = NULL;
7623   PetscInt               xadj_count, *count;
7624   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7625   PetscSubcomm           psubcomm;
7626   MPI_Comm               subcomm;
7627 
7628   PetscFunctionBegin;
7629   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7630   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7631   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7632   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7633   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7634   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7635 
7636   if (have_void) *have_void = PETSC_FALSE;
7637   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7638   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7639   PetscCall(MatISGetLocalMat(mat, &A));
7640   PetscCall(MatGetLocalSize(A, &n, NULL));
7641   im_active = !!n;
7642   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7643   void_procs = size - active_procs;
7644   /* get ranks of non-active processes in mat communicator */
7645   if (void_procs) {
7646     PetscInt ncand;
7647 
7648     if (have_void) *have_void = PETSC_TRUE;
7649     PetscCall(PetscMalloc1(size, &procs_candidates));
7650     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7651     for (i = 0, ncand = 0; i < size; i++) {
7652       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7653     }
7654     /* force n_subdomains to be not greater that the number of non-active processes */
7655     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7656   }
7657 
7658   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7659      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7660   PetscCall(MatGetSize(mat, &N, NULL));
7661   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7662     PetscInt  issize, isidx, dest;
7663     PetscBool default_sub;
7664 
7665     if (*n_subdomains == 1) dest = 0;
7666     else dest = rank;
7667     if (im_active) {
7668       issize = 1;
7669       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7670         isidx = procs_candidates[dest];
7671       } else {
7672         isidx = dest;
7673       }
7674     } else {
7675       issize = 0;
7676       isidx  = rank;
7677     }
7678     if (*n_subdomains != 1) *n_subdomains = active_procs;
7679     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7680     default_sub = (PetscBool)(isidx == rank);
7681     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
7682     if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling"));
7683     PetscCall(PetscFree(procs_candidates));
7684     PetscFunctionReturn(PETSC_SUCCESS);
7685   }
7686   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7687   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7688   threshold = PetscMax(threshold, 2);
7689 
7690   /* Get info on mapping */
7691   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7692   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7693 
7694   /* build local CSR graph of subdomains' connectivity */
7695   PetscCall(PetscMalloc1(2, &xadj));
7696   xadj[0] = 0;
7697   xadj[1] = PetscMax(n_neighs - 1, 0);
7698   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7699   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7700   PetscCall(PetscCalloc1(n, &count));
7701   for (i = 1; i < n_neighs; i++)
7702     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7703 
7704   xadj_count = 0;
7705   for (i = 1; i < n_neighs; i++) {
7706     for (j = 0; j < n_shared[i]; j++) {
7707       if (count[shared[i][j]] < threshold) {
7708         adjncy[xadj_count]     = neighs[i];
7709         adjncy_wgt[xadj_count] = n_shared[i];
7710         xadj_count++;
7711         break;
7712       }
7713     }
7714   }
7715   xadj[1] = xadj_count;
7716   PetscCall(PetscFree(count));
7717   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7718   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7719 
7720   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7721 
7722   /* Restrict work on active processes only */
7723   PetscCall(PetscMPIIntCast(im_active, &color));
7724   if (void_procs) {
7725     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7726     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7727     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7728     subcomm = PetscSubcommChild(psubcomm);
7729   } else {
7730     psubcomm = NULL;
7731     subcomm  = PetscObjectComm((PetscObject)mat);
7732   }
7733 
7734   v_wgt = NULL;
7735   if (!color) {
7736     PetscCall(PetscFree(xadj));
7737     PetscCall(PetscFree(adjncy));
7738     PetscCall(PetscFree(adjncy_wgt));
7739   } else {
7740     Mat             subdomain_adj;
7741     IS              new_ranks, new_ranks_contig;
7742     MatPartitioning partitioner;
7743     PetscInt        rstart, rend;
7744     PetscMPIInt     irstart = 0, irend = 0;
7745     PetscInt       *is_indices, *oldranks;
7746     PetscMPIInt     size;
7747     PetscBool       aggregate;
7748 
7749     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7750     if (void_procs) {
7751       PetscInt prank = rank;
7752       PetscCall(PetscMalloc1(size, &oldranks));
7753       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7754       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7755       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7756     } else {
7757       oldranks = NULL;
7758     }
7759     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7760     if (aggregate) { /* TODO: all this part could be made more efficient */
7761       PetscInt     lrows, row, ncols, *cols;
7762       PetscMPIInt  nrank;
7763       PetscScalar *vals;
7764 
7765       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7766       lrows = 0;
7767       if (nrank < redprocs) {
7768         lrows = size / redprocs;
7769         if (nrank < size % redprocs) lrows++;
7770       }
7771       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7772       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7773       PetscCall(PetscMPIIntCast(rstart, &irstart));
7774       PetscCall(PetscMPIIntCast(rend, &irend));
7775       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7776       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7777       row   = nrank;
7778       ncols = xadj[1] - xadj[0];
7779       cols  = adjncy;
7780       PetscCall(PetscMalloc1(ncols, &vals));
7781       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7782       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7783       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7784       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7785       PetscCall(PetscFree(xadj));
7786       PetscCall(PetscFree(adjncy));
7787       PetscCall(PetscFree(adjncy_wgt));
7788       PetscCall(PetscFree(vals));
7789       if (use_vwgt) {
7790         Vec                v;
7791         const PetscScalar *array;
7792         PetscInt           nl;
7793 
7794         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7795         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7796         PetscCall(VecAssemblyBegin(v));
7797         PetscCall(VecAssemblyEnd(v));
7798         PetscCall(VecGetLocalSize(v, &nl));
7799         PetscCall(VecGetArrayRead(v, &array));
7800         PetscCall(PetscMalloc1(nl, &v_wgt));
7801         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7802         PetscCall(VecRestoreArrayRead(v, &array));
7803         PetscCall(VecDestroy(&v));
7804       }
7805     } else {
7806       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7807       if (use_vwgt) {
7808         PetscCall(PetscMalloc1(1, &v_wgt));
7809         v_wgt[0] = n;
7810       }
7811     }
7812     /* PetscCall(MatView(subdomain_adj,0)); */
7813 
7814     /* Partition */
7815     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7816 #if defined(PETSC_HAVE_PTSCOTCH)
7817     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7818 #elif defined(PETSC_HAVE_PARMETIS)
7819     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7820 #else
7821     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7822 #endif
7823     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7824     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7825     *n_subdomains = PetscMin(size, *n_subdomains);
7826     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7827     PetscCall(MatPartitioningSetFromOptions(partitioner));
7828     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7829     /* PetscCall(MatPartitioningView(partitioner,0)); */
7830 
7831     /* renumber new_ranks to avoid "holes" in new set of processors */
7832     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7833     PetscCall(ISDestroy(&new_ranks));
7834     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7835     if (!aggregate) {
7836       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7837         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7838         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7839       } else if (oldranks) {
7840         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7841       } else {
7842         ranks_send_to_idx[0] = is_indices[0];
7843       }
7844     } else {
7845       PetscInt     idx = 0;
7846       PetscMPIInt  tag;
7847       MPI_Request *reqs;
7848 
7849       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7850       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7851       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7852       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7853       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7854       PetscCall(PetscFree(reqs));
7855       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7856         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7857         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7858       } else if (oldranks) {
7859         ranks_send_to_idx[0] = oldranks[idx];
7860       } else {
7861         ranks_send_to_idx[0] = idx;
7862       }
7863     }
7864     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7865     /* clean up */
7866     PetscCall(PetscFree(oldranks));
7867     PetscCall(ISDestroy(&new_ranks_contig));
7868     PetscCall(MatDestroy(&subdomain_adj));
7869     PetscCall(MatPartitioningDestroy(&partitioner));
7870   }
7871   PetscCall(PetscSubcommDestroy(&psubcomm));
7872   PetscCall(PetscFree(procs_candidates));
7873 
7874   /* assemble parallel IS for sends */
7875   i = 1;
7876   if (!color) i = 0;
7877   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7878   PetscFunctionReturn(PETSC_SUCCESS);
7879 }
7880 
7881 typedef enum {
7882   MATDENSE_PRIVATE = 0,
7883   MATAIJ_PRIVATE,
7884   MATBAIJ_PRIVATE,
7885   MATSBAIJ_PRIVATE
7886 } MatTypePrivate;
7887 
7888 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7889 {
7890   Mat                    local_mat;
7891   IS                     is_sends_internal;
7892   PetscInt               rows, cols, new_local_rows;
7893   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7894   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7895   ISLocalToGlobalMapping l2gmap;
7896   PetscInt              *l2gmap_indices;
7897   const PetscInt        *is_indices;
7898   MatType                new_local_type;
7899   /* buffers */
7900   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7901   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7902   PetscInt          *recv_buffer_idxs_local;
7903   PetscScalar       *ptr_vals, *recv_buffer_vals;
7904   const PetscScalar *send_buffer_vals;
7905   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7906   /* MPI */
7907   MPI_Comm     comm, comm_n;
7908   PetscSubcomm subcomm;
7909   PetscMPIInt  n_sends, n_recvs, size;
7910   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7911   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7912   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7913   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7914   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7915 
7916   PetscFunctionBegin;
7917   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7918   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7919   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7920   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7921   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7922   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7923   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7924   PetscValidLogicalCollectiveInt(mat, nis, 8);
7925   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7926   if (nvecs) {
7927     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7928     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7929   }
7930   /* further checks */
7931   PetscCall(MatISGetLocalMat(mat, &local_mat));
7932   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7933   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7934 
7935   PetscCall(MatGetSize(local_mat, &rows, &cols));
7936   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7937   if (reuse && *mat_n) {
7938     PetscInt mrows, mcols, mnrows, mncols;
7939     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7940     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7941     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7942     PetscCall(MatGetSize(mat, &mrows, &mcols));
7943     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7944     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7945     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7946   }
7947   PetscCall(MatGetBlockSize(local_mat, &bs));
7948   PetscValidLogicalCollectiveInt(mat, bs, 1);
7949 
7950   /* prepare IS for sending if not provided */
7951   if (!is_sends) {
7952     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7953     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7954   } else {
7955     PetscCall(PetscObjectReference((PetscObject)is_sends));
7956     is_sends_internal = is_sends;
7957   }
7958 
7959   /* get comm */
7960   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7961 
7962   /* compute number of sends */
7963   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7964   PetscCall(PetscMPIIntCast(i, &n_sends));
7965 
7966   /* compute number of receives */
7967   PetscCallMPI(MPI_Comm_size(comm, &size));
7968   PetscCall(PetscMalloc1(size, &iflags));
7969   PetscCall(PetscArrayzero(iflags, size));
7970   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7971   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7972   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7973   PetscCall(PetscFree(iflags));
7974 
7975   /* restrict comm if requested */
7976   subcomm     = NULL;
7977   destroy_mat = PETSC_FALSE;
7978   if (restrict_comm) {
7979     PetscMPIInt color, subcommsize;
7980 
7981     color = 0;
7982     if (restrict_full) {
7983       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7984     } else {
7985       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7986     }
7987     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7988     subcommsize = size - subcommsize;
7989     /* check if reuse has been requested */
7990     if (reuse) {
7991       if (*mat_n) {
7992         PetscMPIInt subcommsize2;
7993         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7994         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7995         comm_n = PetscObjectComm((PetscObject)*mat_n);
7996       } else {
7997         comm_n = PETSC_COMM_SELF;
7998       }
7999     } else { /* MAT_INITIAL_MATRIX */
8000       PetscMPIInt rank;
8001 
8002       PetscCallMPI(MPI_Comm_rank(comm, &rank));
8003       PetscCall(PetscSubcommCreate(comm, &subcomm));
8004       PetscCall(PetscSubcommSetNumber(subcomm, 2));
8005       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
8006       comm_n = PetscSubcommChild(subcomm);
8007     }
8008     /* flag to destroy *mat_n if not significative */
8009     if (color) destroy_mat = PETSC_TRUE;
8010   } else {
8011     comm_n = comm;
8012   }
8013 
8014   /* prepare send/receive buffers */
8015   PetscCall(PetscMalloc1(size, &ilengths_idxs));
8016   PetscCall(PetscArrayzero(ilengths_idxs, size));
8017   PetscCall(PetscMalloc1(size, &ilengths_vals));
8018   PetscCall(PetscArrayzero(ilengths_vals, size));
8019   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8020 
8021   /* Get data from local matrices */
8022   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8023   /* TODO: See below some guidelines on how to prepare the local buffers */
8024   /*
8025        send_buffer_vals should contain the raw values of the local matrix
8026        send_buffer_idxs should contain:
8027        - MatType_PRIVATE type
8028        - PetscInt        size_of_l2gmap
8029        - PetscInt        global_row_indices[size_of_l2gmap]
8030        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8031     */
8032   {
8033     ISLocalToGlobalMapping mapping;
8034 
8035     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8036     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8037     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8038     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8039     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8040     send_buffer_idxs[1] = i;
8041     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8042     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8043     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8044     PetscCall(PetscMPIIntCast(i, &len));
8045     for (i = 0; i < n_sends; i++) {
8046       ilengths_vals[is_indices[i]] = len * len;
8047       ilengths_idxs[is_indices[i]] = len + 2;
8048     }
8049   }
8050   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8051   /* additional is (if any) */
8052   if (nis) {
8053     PetscMPIInt psum;
8054     PetscInt    j;
8055     for (j = 0, psum = 0; j < nis; j++) {
8056       PetscInt plen;
8057       PetscCall(ISGetLocalSize(isarray[j], &plen));
8058       PetscCall(PetscMPIIntCast(plen, &len));
8059       psum += len + 1; /* indices + length */
8060     }
8061     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8062     for (j = 0, psum = 0; j < nis; j++) {
8063       PetscInt        plen;
8064       const PetscInt *is_array_idxs;
8065       PetscCall(ISGetLocalSize(isarray[j], &plen));
8066       send_buffer_idxs_is[psum] = plen;
8067       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8068       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8069       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8070       psum += plen + 1; /* indices + length */
8071     }
8072     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8073     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8074   }
8075   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8076 
8077   buf_size_idxs    = 0;
8078   buf_size_vals    = 0;
8079   buf_size_idxs_is = 0;
8080   buf_size_vecs    = 0;
8081   for (i = 0; i < n_recvs; i++) {
8082     buf_size_idxs += olengths_idxs[i];
8083     buf_size_vals += olengths_vals[i];
8084     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8085     if (nvecs) buf_size_vecs += olengths_idxs[i];
8086   }
8087   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8088   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8089   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8090   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8091 
8092   /* get new tags for clean communications */
8093   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8094   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8095   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8096   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8097 
8098   /* allocate for requests */
8099   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8100   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8101   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8102   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8103   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8104   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8105   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8106   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8107 
8108   /* communications */
8109   ptr_idxs    = recv_buffer_idxs;
8110   ptr_vals    = recv_buffer_vals;
8111   ptr_idxs_is = recv_buffer_idxs_is;
8112   ptr_vecs    = recv_buffer_vecs;
8113   for (i = 0; i < n_recvs; i++) {
8114     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8115     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8116     ptr_idxs += olengths_idxs[i];
8117     ptr_vals += olengths_vals[i];
8118     if (nis) {
8119       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8120       ptr_idxs_is += olengths_idxs_is[i];
8121     }
8122     if (nvecs) {
8123       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8124       ptr_vecs += olengths_idxs[i] - 2;
8125     }
8126   }
8127   for (i = 0; i < n_sends; i++) {
8128     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8129     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8130     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8131     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8132     if (nvecs) {
8133       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8134       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8135     }
8136   }
8137   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8138   PetscCall(ISDestroy(&is_sends_internal));
8139 
8140   /* assemble new l2g map */
8141   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8142   ptr_idxs       = recv_buffer_idxs;
8143   new_local_rows = 0;
8144   for (i = 0; i < n_recvs; i++) {
8145     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8146     ptr_idxs += olengths_idxs[i];
8147   }
8148   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8149   ptr_idxs       = recv_buffer_idxs;
8150   new_local_rows = 0;
8151   for (i = 0; i < n_recvs; i++) {
8152     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8153     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8154     ptr_idxs += olengths_idxs[i];
8155   }
8156   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8157   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8158   PetscCall(PetscFree(l2gmap_indices));
8159 
8160   /* infer new local matrix type from received local matrices type */
8161   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8162   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8163   if (n_recvs) {
8164     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8165     ptr_idxs                              = recv_buffer_idxs;
8166     for (i = 0; i < n_recvs; i++) {
8167       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8168         new_local_type_private = MATAIJ_PRIVATE;
8169         break;
8170       }
8171       ptr_idxs += olengths_idxs[i];
8172     }
8173     switch (new_local_type_private) {
8174     case MATDENSE_PRIVATE:
8175       new_local_type = MATSEQAIJ;
8176       bs             = 1;
8177       break;
8178     case MATAIJ_PRIVATE:
8179       new_local_type = MATSEQAIJ;
8180       bs             = 1;
8181       break;
8182     case MATBAIJ_PRIVATE:
8183       new_local_type = MATSEQBAIJ;
8184       break;
8185     case MATSBAIJ_PRIVATE:
8186       new_local_type = MATSEQSBAIJ;
8187       break;
8188     default:
8189       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8190     }
8191   } else { /* by default, new_local_type is seqaij */
8192     new_local_type = MATSEQAIJ;
8193     bs             = 1;
8194   }
8195 
8196   /* create MATIS object if needed */
8197   if (!reuse) {
8198     PetscCall(MatGetSize(mat, &rows, &cols));
8199     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8200   } else {
8201     /* it also destroys the local matrices */
8202     if (*mat_n) {
8203       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8204     } else { /* this is a fake object */
8205       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8206     }
8207   }
8208   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8209   PetscCall(MatSetType(local_mat, new_local_type));
8210 
8211   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8212 
8213   /* Global to local map of received indices */
8214   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8215   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8216   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8217 
8218   /* restore attributes -> type of incoming data and its size */
8219   buf_size_idxs = 0;
8220   for (i = 0; i < n_recvs; i++) {
8221     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8222     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8223     buf_size_idxs += olengths_idxs[i];
8224   }
8225   PetscCall(PetscFree(recv_buffer_idxs));
8226 
8227   /* set preallocation */
8228   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8229   if (!newisdense) {
8230     PetscInt *new_local_nnz = NULL;
8231 
8232     ptr_idxs = recv_buffer_idxs_local;
8233     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8234     for (i = 0; i < n_recvs; i++) {
8235       PetscInt j;
8236       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8237         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8238       } else {
8239         /* TODO */
8240       }
8241       ptr_idxs += olengths_idxs[i];
8242     }
8243     if (new_local_nnz) {
8244       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8245       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8246       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8247       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8248       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8249       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8250     } else {
8251       PetscCall(MatSetUp(local_mat));
8252     }
8253     PetscCall(PetscFree(new_local_nnz));
8254   } else {
8255     PetscCall(MatSetUp(local_mat));
8256   }
8257 
8258   /* set values */
8259   ptr_vals = recv_buffer_vals;
8260   ptr_idxs = recv_buffer_idxs_local;
8261   for (i = 0; i < n_recvs; i++) {
8262     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8263       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8264       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8265       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8266       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8267       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8268     } else {
8269       /* TODO */
8270     }
8271     ptr_idxs += olengths_idxs[i];
8272     ptr_vals += olengths_vals[i];
8273   }
8274   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8275   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8276   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8277   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8278   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8279   PetscCall(PetscFree(recv_buffer_vals));
8280 
8281 #if 0
8282   if (!restrict_comm) { /* check */
8283     Vec       lvec,rvec;
8284     PetscReal infty_error;
8285 
8286     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8287     PetscCall(VecSetRandom(rvec,NULL));
8288     PetscCall(MatMult(mat,rvec,lvec));
8289     PetscCall(VecScale(lvec,-1.0));
8290     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8291     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8292     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8293     PetscCall(VecDestroy(&rvec));
8294     PetscCall(VecDestroy(&lvec));
8295   }
8296 #endif
8297 
8298   /* assemble new additional is (if any) */
8299   if (nis) {
8300     PetscInt **temp_idxs, *count_is, j, psum;
8301 
8302     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8303     PetscCall(PetscCalloc1(nis, &count_is));
8304     ptr_idxs = recv_buffer_idxs_is;
8305     psum     = 0;
8306     for (i = 0; i < n_recvs; i++) {
8307       for (j = 0; j < nis; j++) {
8308         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8309         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8310         psum += plen;
8311         ptr_idxs += plen + 1; /* shift pointer to received data */
8312       }
8313     }
8314     PetscCall(PetscMalloc1(nis, &temp_idxs));
8315     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8316     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8317     PetscCall(PetscArrayzero(count_is, nis));
8318     ptr_idxs = recv_buffer_idxs_is;
8319     for (i = 0; i < n_recvs; i++) {
8320       for (j = 0; j < nis; j++) {
8321         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8322         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8323         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8324         ptr_idxs += plen + 1; /* shift pointer to received data */
8325       }
8326     }
8327     for (i = 0; i < nis; i++) {
8328       PetscCall(ISDestroy(&isarray[i]));
8329       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8330       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8331     }
8332     PetscCall(PetscFree(count_is));
8333     PetscCall(PetscFree(temp_idxs[0]));
8334     PetscCall(PetscFree(temp_idxs));
8335   }
8336   /* free workspace */
8337   PetscCall(PetscFree(recv_buffer_idxs_is));
8338   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8339   PetscCall(PetscFree(send_buffer_idxs));
8340   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8341   if (isdense) {
8342     PetscCall(MatISGetLocalMat(mat, &local_mat));
8343     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8344     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8345   } else {
8346     /* PetscCall(PetscFree(send_buffer_vals)); */
8347   }
8348   if (nis) {
8349     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8350     PetscCall(PetscFree(send_buffer_idxs_is));
8351   }
8352 
8353   if (nvecs) {
8354     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8355     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8356     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8357     PetscCall(VecDestroy(&nnsp_vec[0]));
8358     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8359     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8360     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8361     /* set values */
8362     ptr_vals = recv_buffer_vecs;
8363     ptr_idxs = recv_buffer_idxs_local;
8364     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8365     for (i = 0; i < n_recvs; i++) {
8366       PetscInt j;
8367       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8368       ptr_idxs += olengths_idxs[i];
8369       ptr_vals += olengths_idxs[i] - 2;
8370     }
8371     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8372     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8373     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8374   }
8375 
8376   PetscCall(PetscFree(recv_buffer_vecs));
8377   PetscCall(PetscFree(recv_buffer_idxs_local));
8378   PetscCall(PetscFree(recv_req_idxs));
8379   PetscCall(PetscFree(recv_req_vals));
8380   PetscCall(PetscFree(recv_req_vecs));
8381   PetscCall(PetscFree(recv_req_idxs_is));
8382   PetscCall(PetscFree(send_req_idxs));
8383   PetscCall(PetscFree(send_req_vals));
8384   PetscCall(PetscFree(send_req_vecs));
8385   PetscCall(PetscFree(send_req_idxs_is));
8386   PetscCall(PetscFree(ilengths_vals));
8387   PetscCall(PetscFree(ilengths_idxs));
8388   PetscCall(PetscFree(olengths_vals));
8389   PetscCall(PetscFree(olengths_idxs));
8390   PetscCall(PetscFree(onodes));
8391   if (nis) {
8392     PetscCall(PetscFree(ilengths_idxs_is));
8393     PetscCall(PetscFree(olengths_idxs_is));
8394     PetscCall(PetscFree(onodes_is));
8395   }
8396   PetscCall(PetscSubcommDestroy(&subcomm));
8397   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8398     PetscCall(MatDestroy(mat_n));
8399     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8400     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8401       PetscCall(VecDestroy(&nnsp_vec[0]));
8402     }
8403     *mat_n = NULL;
8404   }
8405   PetscFunctionReturn(PETSC_SUCCESS);
8406 }
8407 
8408 /* temporary hack into ksp private data structure */
8409 #include <petsc/private/kspimpl.h>
8410 
8411 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8412 {
8413   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8414   PC_IS                 *pcis   = (PC_IS *)pc->data;
8415   PCBDDCGraph            graph  = pcbddc->mat_graph;
8416   Mat                    coarse_mat, coarse_mat_is;
8417   Mat                    coarsedivudotp = NULL;
8418   Mat                    coarseG, t_coarse_mat_is;
8419   MatNullSpace           CoarseNullSpace = NULL;
8420   ISLocalToGlobalMapping coarse_islg;
8421   IS                     coarse_is, *isarray, corners;
8422   PetscInt               i, im_active = -1, active_procs = -1;
8423   PetscInt               nis, nisdofs, nisneu, nisvert;
8424   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8425   PC                     pc_temp;
8426   PCType                 coarse_pc_type;
8427   KSPType                coarse_ksp_type;
8428   PetscBool              multilevel_requested, multilevel_allowed;
8429   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8430   PetscInt               ncoarse, nedcfield;
8431   PetscBool              compute_vecs = PETSC_FALSE;
8432   PetscScalar           *array;
8433   MatReuse               coarse_mat_reuse;
8434   PetscBool              restr, full_restr, have_void;
8435   PetscMPIInt            size;
8436 
8437   PetscFunctionBegin;
8438   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8439   /* Assign global numbering to coarse dofs */
8440   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8441     PetscInt ocoarse_size;
8442     compute_vecs = PETSC_TRUE;
8443 
8444     pcbddc->new_primal_space = PETSC_TRUE;
8445     ocoarse_size             = pcbddc->coarse_size;
8446     PetscCall(PetscFree(pcbddc->global_primal_indices));
8447     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8448     /* see if we can avoid some work */
8449     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8450       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8451       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8452         PetscCall(KSPReset(pcbddc->coarse_ksp));
8453         coarse_reuse = PETSC_FALSE;
8454       } else { /* we can safely reuse already computed coarse matrix */
8455         coarse_reuse = PETSC_TRUE;
8456       }
8457     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8458       coarse_reuse = PETSC_FALSE;
8459     }
8460     /* reset any subassembling information */
8461     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8462   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8463     coarse_reuse = PETSC_TRUE;
8464   }
8465   if (coarse_reuse && pcbddc->coarse_ksp) {
8466     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8467     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8468     coarse_mat_reuse = MAT_REUSE_MATRIX;
8469   } else {
8470     coarse_mat       = NULL;
8471     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8472   }
8473 
8474   /* creates temporary l2gmap and IS for coarse indexes */
8475   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8476   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8477 
8478   /* creates temporary MATIS object for coarse matrix */
8479   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8480   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8481   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8482   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element));
8483   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8484   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8485   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8486   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8487   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8488 
8489   /* count "active" (i.e. with positive local size) and "void" processes */
8490   im_active = !!pcis->n;
8491   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8492 
8493   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8494   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8495   /* full_restr : just use the receivers from the subassembling pattern */
8496   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8497   coarse_mat_is        = NULL;
8498   multilevel_allowed   = PETSC_FALSE;
8499   multilevel_requested = PETSC_FALSE;
8500   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8501   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8502   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8503   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8504   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8505   if (multilevel_requested) {
8506     ncoarse    = active_procs / coarsening_ratio;
8507     restr      = PETSC_FALSE;
8508     full_restr = PETSC_FALSE;
8509   } else {
8510     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8511     restr      = PETSC_TRUE;
8512     full_restr = PETSC_TRUE;
8513   }
8514   if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8515   ncoarse = PetscMax(1, ncoarse);
8516   if (!pcbddc->coarse_subassembling) {
8517     if (coarsening_ratio > 1) {
8518       if (multilevel_requested) {
8519         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8520       } else {
8521         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8522       }
8523     } else {
8524       PetscMPIInt rank;
8525 
8526       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8527       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8528       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8529       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling"));
8530     }
8531   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8532     PetscInt psum;
8533     if (pcbddc->coarse_ksp) psum = 1;
8534     else psum = 0;
8535     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8536     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8537   }
8538   /* determine if we can go multilevel */
8539   if (multilevel_requested) {
8540     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8541     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8542   }
8543   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8544 
8545   /* dump subassembling pattern */
8546   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8547   /* compute dofs splitting and neumann boundaries for coarse dofs */
8548   nedcfield = -1;
8549   corners   = NULL;
8550   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8551     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8552     const PetscInt        *idxs;
8553     ISLocalToGlobalMapping tmap;
8554 
8555     /* create map between primal indices (in local representative ordering) and local primal numbering */
8556     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8557     /* allocate space for temporary storage */
8558     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8559     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8560     /* allocate for IS array */
8561     nisdofs = pcbddc->n_ISForDofsLocal;
8562     if (pcbddc->nedclocal) {
8563       if (pcbddc->nedfield > -1) {
8564         nedcfield = pcbddc->nedfield;
8565       } else {
8566         nedcfield = 0;
8567         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8568         nisdofs = 1;
8569       }
8570     }
8571     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8572     nisvert = 0; /* nisvert is not used */
8573     nis     = nisdofs + nisneu + nisvert;
8574     PetscCall(PetscMalloc1(nis, &isarray));
8575     /* dofs splitting */
8576     for (i = 0; i < nisdofs; i++) {
8577       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8578       if (nedcfield != i) {
8579         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8580         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8581         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8582         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8583       } else {
8584         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8585         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8586         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8587         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8588         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8589       }
8590       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8591       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8592       /* PetscCall(ISView(isarray[i],0)); */
8593     }
8594     /* neumann boundaries */
8595     if (pcbddc->NeumannBoundariesLocal) {
8596       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8597       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8598       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8599       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8600       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8601       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8602       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8603       /* PetscCall(ISView(isarray[nisdofs],0)); */
8604     }
8605     /* coordinates */
8606     if (pcbddc->corner_selected) {
8607       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8608       PetscCall(ISGetLocalSize(corners, &tsize));
8609       PetscCall(ISGetIndices(corners, &idxs));
8610       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8611       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8612       PetscCall(ISRestoreIndices(corners, &idxs));
8613       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8614       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8615       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8616     }
8617     PetscCall(PetscFree(tidxs));
8618     PetscCall(PetscFree(tidxs2));
8619     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8620   } else {
8621     nis     = 0;
8622     nisdofs = 0;
8623     nisneu  = 0;
8624     nisvert = 0;
8625     isarray = NULL;
8626   }
8627   /* destroy no longer needed map */
8628   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8629 
8630   /* subassemble */
8631   if (multilevel_allowed) {
8632     Vec       vp[1];
8633     PetscInt  nvecs = 0;
8634     PetscBool reuse;
8635 
8636     vp[0] = NULL;
8637     /* XXX HDIV also */
8638     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8639       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8640       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8641       PetscCall(VecSetType(vp[0], VECSTANDARD));
8642       nvecs = 1;
8643 
8644       if (pcbddc->divudotp) {
8645         Mat      B, loc_divudotp;
8646         Vec      v, p;
8647         IS       dummy;
8648         PetscInt np;
8649 
8650         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8651         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8652         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8653         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8654         PetscCall(MatCreateVecs(B, &v, &p));
8655         PetscCall(VecSet(p, 1.));
8656         PetscCall(MatMultTranspose(B, p, v));
8657         PetscCall(VecDestroy(&p));
8658         PetscCall(MatDestroy(&B));
8659         PetscCall(VecGetArray(vp[0], &array));
8660         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8661         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8662         PetscCall(VecResetArray(pcbddc->vec1_P));
8663         PetscCall(VecRestoreArray(vp[0], &array));
8664         PetscCall(ISDestroy(&dummy));
8665         PetscCall(VecDestroy(&v));
8666       }
8667     }
8668     if (coarse_mat) reuse = PETSC_TRUE;
8669     else reuse = PETSC_FALSE;
8670     if (multi_element) {
8671       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8672       coarse_mat_is = t_coarse_mat_is;
8673     } else {
8674       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8675       if (reuse) {
8676         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8677       } else {
8678         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8679       }
8680       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8681         PetscScalar       *arraym;
8682         const PetscScalar *arrayv;
8683         PetscInt           nl;
8684         PetscCall(VecGetLocalSize(vp[0], &nl));
8685         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8686         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8687         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8688         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8689         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8690         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8691         PetscCall(VecDestroy(&vp[0]));
8692       } else {
8693         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8694       }
8695     }
8696   } else {
8697     PetscBool default_sub;
8698 
8699     PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub));
8700     if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8701     else {
8702       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8703       coarse_mat_is = t_coarse_mat_is;
8704     }
8705   }
8706   if (coarse_mat_is || coarse_mat) {
8707     if (!multilevel_allowed) {
8708       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8709     } else {
8710       /* if this matrix is present, it means we are not reusing the coarse matrix */
8711       if (coarse_mat_is) {
8712         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8713         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8714         coarse_mat = coarse_mat_is;
8715       }
8716     }
8717   }
8718   PetscCall(MatDestroy(&t_coarse_mat_is));
8719   PetscCall(MatDestroy(&coarse_mat_is));
8720 
8721   /* create local to global scatters for coarse problem */
8722   if (compute_vecs) {
8723     PetscInt lrows;
8724     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8725     if (coarse_mat) {
8726       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8727     } else {
8728       lrows = 0;
8729     }
8730     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8731     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8732     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8733     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8734     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8735   }
8736   PetscCall(ISDestroy(&coarse_is));
8737 
8738   /* set defaults for coarse KSP and PC */
8739   if (multilevel_allowed) {
8740     coarse_ksp_type = KSPRICHARDSON;
8741     coarse_pc_type  = PCBDDC;
8742   } else {
8743     coarse_ksp_type = KSPPREONLY;
8744     coarse_pc_type  = PCREDUNDANT;
8745   }
8746 
8747   /* print some info if requested */
8748   if (pcbddc->dbg_flag) {
8749     if (!multilevel_allowed) {
8750       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8751       if (multilevel_requested) {
8752         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8753       } else if (pcbddc->max_levels) {
8754         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8755       }
8756       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8757     }
8758   }
8759 
8760   /* communicate coarse discrete gradient */
8761   coarseG = NULL;
8762   if (pcbddc->nedcG && multilevel_allowed) {
8763     MPI_Comm ccomm;
8764     if (coarse_mat) {
8765       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8766     } else {
8767       ccomm = MPI_COMM_NULL;
8768     }
8769     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8770   }
8771 
8772   /* create the coarse KSP object only once with defaults */
8773   if (coarse_mat) {
8774     PetscBool   isredundant, isbddc, force, valid;
8775     PetscViewer dbg_viewer = NULL;
8776     PetscBool   isset, issym, isher, isspd;
8777 
8778     if (pcbddc->dbg_flag) {
8779       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8780       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8781     }
8782     if (!pcbddc->coarse_ksp) {
8783       char   prefix[256], str_level[16];
8784       size_t len;
8785 
8786       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8787       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8788       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8789       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8790       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8791       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8792       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8793       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8794       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8795       /* TODO is this logic correct? should check for coarse_mat type */
8796       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8797       /* prefix */
8798       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8799       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8800       if (!pcbddc->current_level) {
8801         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8802         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8803       } else {
8804         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8805         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8806         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8807         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8808         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8809         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8810         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8811       }
8812       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8813       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8814       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8815       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8816       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8817       /* allow user customization */
8818       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8819       /* get some info after set from options */
8820       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8821       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8822       force = PETSC_FALSE;
8823       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8824       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8825       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8826       if (multilevel_allowed && !force && !valid) {
8827         isbddc = PETSC_TRUE;
8828         PetscCall(PCSetType(pc_temp, PCBDDC));
8829         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8830         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8831         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8832         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8833           PetscObjectOptionsBegin((PetscObject)pc_temp);
8834           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8835           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8836           PetscOptionsEnd();
8837           pc_temp->setfromoptionscalled++;
8838         }
8839       }
8840     }
8841     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8842     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8843     if (nisdofs) {
8844       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8845       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8846     }
8847     if (nisneu) {
8848       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8849       PetscCall(ISDestroy(&isarray[nisdofs]));
8850     }
8851     if (nisvert) {
8852       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8853       PetscCall(ISDestroy(&isarray[nis - 1]));
8854     }
8855     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8856 
8857     /* get some info after set from options */
8858     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8859 
8860     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8861     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8862     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8863     force = PETSC_FALSE;
8864     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8865     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8866     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8867     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8868     if (isredundant) {
8869       KSP inner_ksp;
8870       PC  inner_pc;
8871 
8872       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8873       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8874     }
8875 
8876     /* parameters which miss an API */
8877     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8878     if (isbddc) {
8879       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8880 
8881       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8882       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8883       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8884       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8885       if (pcbddc_coarse->benign_saddle_point) {
8886         Mat                    coarsedivudotp_is;
8887         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8888         IS                     row, col;
8889         const PetscInt        *gidxs;
8890         PetscInt               n, st, M, N;
8891 
8892         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8893         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8894         st = st - n;
8895         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8896         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8897         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8898         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8899         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8900         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8901         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8902         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8903         PetscCall(ISGetSize(row, &M));
8904         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8905         PetscCall(ISDestroy(&row));
8906         PetscCall(ISDestroy(&col));
8907         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8908         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8909         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8910         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8911         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8912         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8913         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8914         PetscCall(MatDestroy(&coarsedivudotp));
8915         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8916         PetscCall(MatDestroy(&coarsedivudotp_is));
8917         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8918         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8919       }
8920     }
8921 
8922     /* propagate symmetry info of coarse matrix */
8923     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8924     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8925     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8926     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8927     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8928     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8929     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8930 
8931     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8932     /* set operators */
8933     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8934     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8935     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8936     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8937   }
8938   PetscCall(MatDestroy(&coarseG));
8939   PetscCall(PetscFree(isarray));
8940 #if 0
8941   {
8942     PetscViewer viewer;
8943     char filename[256];
8944     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8945     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8946     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8947     PetscCall(MatView(coarse_mat,viewer));
8948     PetscCall(PetscViewerPopFormat(viewer));
8949     PetscCall(PetscViewerDestroy(&viewer));
8950   }
8951 #endif
8952 
8953   if (corners) {
8954     Vec             gv;
8955     IS              is;
8956     const PetscInt *idxs;
8957     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8958     PetscScalar    *coords;
8959 
8960     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8961     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8962     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8963     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8964     PetscCall(VecSetBlockSize(gv, cdim));
8965     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8966     PetscCall(VecSetType(gv, VECSTANDARD));
8967     PetscCall(VecSetFromOptions(gv));
8968     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8969 
8970     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8971     PetscCall(ISGetLocalSize(is, &n));
8972     PetscCall(ISGetIndices(is, &idxs));
8973     PetscCall(PetscMalloc1(n * cdim, &coords));
8974     for (i = 0; i < n; i++) {
8975       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8976     }
8977     PetscCall(ISRestoreIndices(is, &idxs));
8978     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8979 
8980     PetscCall(ISGetLocalSize(corners, &n));
8981     PetscCall(ISGetIndices(corners, &idxs));
8982     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8983     PetscCall(ISRestoreIndices(corners, &idxs));
8984     PetscCall(PetscFree(coords));
8985     PetscCall(VecAssemblyBegin(gv));
8986     PetscCall(VecAssemblyEnd(gv));
8987     PetscCall(VecGetArray(gv, &coords));
8988     if (pcbddc->coarse_ksp) {
8989       PC        coarse_pc;
8990       PetscBool isbddc;
8991 
8992       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8993       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8994       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8995         PetscReal *realcoords;
8996 
8997         PetscCall(VecGetLocalSize(gv, &n));
8998 #if defined(PETSC_USE_COMPLEX)
8999         PetscCall(PetscMalloc1(n, &realcoords));
9000         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
9001 #else
9002         realcoords = coords;
9003 #endif
9004         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
9005 #if defined(PETSC_USE_COMPLEX)
9006         PetscCall(PetscFree(realcoords));
9007 #endif
9008       }
9009     }
9010     PetscCall(VecRestoreArray(gv, &coords));
9011     PetscCall(VecDestroy(&gv));
9012   }
9013   PetscCall(ISDestroy(&corners));
9014 
9015   if (pcbddc->coarse_ksp) {
9016     Vec crhs, csol;
9017 
9018     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9019     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9020     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9021     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9022   }
9023   PetscCall(MatDestroy(&coarsedivudotp));
9024 
9025   /* compute null space for coarse solver if the benign trick has been requested */
9026   if (pcbddc->benign_null) {
9027     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9028     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9029     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9030     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9031     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9032     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9033     if (coarse_mat) {
9034       Vec          nullv;
9035       PetscScalar *array, *array2;
9036       PetscInt     nl;
9037 
9038       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9039       PetscCall(VecGetLocalSize(nullv, &nl));
9040       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9041       PetscCall(VecGetArray(nullv, &array2));
9042       PetscCall(PetscArraycpy(array2, array, nl));
9043       PetscCall(VecRestoreArray(nullv, &array2));
9044       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9045       PetscCall(VecNormalize(nullv, NULL));
9046       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9047       PetscCall(VecDestroy(&nullv));
9048     }
9049   }
9050   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9051 
9052   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9053   if (pcbddc->coarse_ksp) {
9054     PetscBool ispreonly;
9055 
9056     if (CoarseNullSpace) {
9057       PetscBool isnull;
9058 
9059       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9060       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9061       /* TODO: add local nullspaces (if any) */
9062     }
9063     /* setup coarse ksp */
9064     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9065     /* Check coarse problem if in debug mode or if solving with an iterative method */
9066     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9067     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9068       KSP         check_ksp;
9069       KSPType     check_ksp_type;
9070       PC          check_pc;
9071       Vec         check_vec, coarse_vec;
9072       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9073       PetscInt    its;
9074       PetscBool   compute_eigs;
9075       PetscReal  *eigs_r, *eigs_c;
9076       PetscInt    neigs;
9077       const char *prefix;
9078 
9079       /* Create ksp object suitable for estimation of extreme eigenvalues */
9080       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9081       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9082       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9083       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9084       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9085       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9086       /* prevent from setup unneeded object */
9087       PetscCall(KSPGetPC(check_ksp, &check_pc));
9088       PetscCall(PCSetType(check_pc, PCNONE));
9089       if (ispreonly) {
9090         check_ksp_type = KSPPREONLY;
9091         compute_eigs   = PETSC_FALSE;
9092       } else {
9093         check_ksp_type = KSPGMRES;
9094         compute_eigs   = PETSC_TRUE;
9095       }
9096       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9097       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9098       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9099       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9100       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9101       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9102       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9103       PetscCall(KSPSetFromOptions(check_ksp));
9104       PetscCall(KSPSetUp(check_ksp));
9105       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9106       PetscCall(KSPSetPC(check_ksp, check_pc));
9107       /* create random vec */
9108       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9109       PetscCall(VecSetRandom(check_vec, NULL));
9110       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9111       /* solve coarse problem */
9112       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9113       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9114       /* set eigenvalue estimation if preonly has not been requested */
9115       if (compute_eigs) {
9116         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9117         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9118         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9119         if (neigs) {
9120           lambda_max = eigs_r[neigs - 1];
9121           lambda_min = eigs_r[0];
9122           if (pcbddc->use_coarse_estimates) {
9123             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9124               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9125               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9126             }
9127           }
9128         }
9129       }
9130 
9131       /* check coarse problem residual error */
9132       if (pcbddc->dbg_flag) {
9133         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9134         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9135         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9136         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9137         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9138         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9139         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9140         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9141         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9142         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9143         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9144         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9145         if (compute_eigs) {
9146           PetscReal          lambda_max_s, lambda_min_s;
9147           KSPConvergedReason reason;
9148           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9149           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9150           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9151           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9152           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9153           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9154         }
9155         PetscCall(PetscViewerFlush(dbg_viewer));
9156         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9157       }
9158       PetscCall(VecDestroy(&check_vec));
9159       PetscCall(VecDestroy(&coarse_vec));
9160       PetscCall(KSPDestroy(&check_ksp));
9161       if (compute_eigs) {
9162         PetscCall(PetscFree(eigs_r));
9163         PetscCall(PetscFree(eigs_c));
9164       }
9165     }
9166   }
9167   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9168   /* print additional info */
9169   if (pcbddc->dbg_flag) {
9170     /* waits until all processes reaches this point */
9171     PetscCall(PetscBarrier((PetscObject)pc));
9172     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9173     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9174   }
9175 
9176   /* free memory */
9177   PetscCall(MatDestroy(&coarse_mat));
9178   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9179   PetscFunctionReturn(PETSC_SUCCESS);
9180 }
9181 
9182 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9183 {
9184   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9185   PC_IS          *pcis   = (PC_IS *)pc->data;
9186   IS              subset, subset_mult, subset_n;
9187   PetscInt        local_size, coarse_size = 0;
9188   PetscInt       *local_primal_indices = NULL;
9189   const PetscInt *t_local_primal_indices;
9190 
9191   PetscFunctionBegin;
9192   /* Compute global number of coarse dofs */
9193   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9194   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9195   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9196   PetscCall(ISDestroy(&subset_n));
9197   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9198   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9199   PetscCall(ISDestroy(&subset));
9200   PetscCall(ISDestroy(&subset_mult));
9201   PetscCall(ISGetLocalSize(subset_n, &local_size));
9202   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9203   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9204   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9205   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9206   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9207   PetscCall(ISDestroy(&subset_n));
9208 
9209   if (pcbddc->dbg_flag) {
9210     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9211     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9212     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9213     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9214   }
9215 
9216   /* get back data */
9217   *coarse_size_n          = coarse_size;
9218   *local_primal_indices_n = local_primal_indices;
9219   PetscFunctionReturn(PETSC_SUCCESS);
9220 }
9221 
9222 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9223 {
9224   IS           localis_t;
9225   PetscInt     i, lsize, *idxs, n;
9226   PetscScalar *vals;
9227 
9228   PetscFunctionBegin;
9229   /* get indices in local ordering exploiting local to global map */
9230   PetscCall(ISGetLocalSize(globalis, &lsize));
9231   PetscCall(PetscMalloc1(lsize, &vals));
9232   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9233   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9234   PetscCall(VecSet(gwork, 0.0));
9235   PetscCall(VecSet(lwork, 0.0));
9236   if (idxs) { /* multilevel guard */
9237     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9238     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9239   }
9240   PetscCall(VecAssemblyBegin(gwork));
9241   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9242   PetscCall(PetscFree(vals));
9243   PetscCall(VecAssemblyEnd(gwork));
9244   /* now compute set in local ordering */
9245   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9246   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9247   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9248   PetscCall(VecGetSize(lwork, &n));
9249   for (i = 0, lsize = 0; i < n; i++) {
9250     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9251   }
9252   PetscCall(PetscMalloc1(lsize, &idxs));
9253   for (i = 0, lsize = 0; i < n; i++) {
9254     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9255   }
9256   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9257   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9258   *localis = localis_t;
9259   PetscFunctionReturn(PETSC_SUCCESS);
9260 }
9261 
9262 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9263 {
9264   PC_IS   *pcis   = (PC_IS *)pc->data;
9265   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9266   PC_IS   *pcisf;
9267   PC_BDDC *pcbddcf;
9268   PC       pcf;
9269 
9270   PetscFunctionBegin;
9271   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9272   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9273   PetscCall(PCSetType(pcf, PCBDDC));
9274 
9275   pcisf   = (PC_IS *)pcf->data;
9276   pcbddcf = (PC_BDDC *)pcf->data;
9277 
9278   pcisf->is_B_local = pcis->is_B_local;
9279   pcisf->vec1_N     = pcis->vec1_N;
9280   pcisf->BtoNmap    = pcis->BtoNmap;
9281   pcisf->n          = pcis->n;
9282   pcisf->n_B        = pcis->n_B;
9283 
9284   PetscCall(PetscFree(pcbddcf->mat_graph));
9285   PetscCall(PetscFree(pcbddcf->sub_schurs));
9286   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9287   pcbddcf->sub_schurs            = schurs;
9288   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9289   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9290   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9291   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9292   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9293   pcbddcf->use_faces             = PETSC_TRUE;
9294   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9295   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9296   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9297   pcbddcf->fake_change           = PETSC_TRUE;
9298   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9299 
9300   PetscCall(PCBDDCAdaptiveSelection(pcf));
9301   PetscCall(PCBDDCConstraintsSetUp(pcf));
9302 
9303   *change = pcbddcf->ConstraintMatrix;
9304   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9305   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9306   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9307 
9308   if (schurs) pcbddcf->sub_schurs = NULL;
9309   pcbddcf->ConstraintMatrix = NULL;
9310   pcbddcf->mat_graph        = NULL;
9311   pcisf->is_B_local         = NULL;
9312   pcisf->vec1_N             = NULL;
9313   pcisf->BtoNmap            = NULL;
9314   PetscCall(PCDestroy(&pcf));
9315   PetscFunctionReturn(PETSC_SUCCESS);
9316 }
9317 
9318 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9319 {
9320   PC_IS          *pcis       = (PC_IS *)pc->data;
9321   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9322   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9323   Mat             S_j;
9324   PetscInt       *used_xadj, *used_adjncy;
9325   PetscBool       free_used_adj;
9326 
9327   PetscFunctionBegin;
9328   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9329   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9330   free_used_adj = PETSC_FALSE;
9331   if (pcbddc->sub_schurs_layers == -1) {
9332     used_xadj   = NULL;
9333     used_adjncy = NULL;
9334   } else {
9335     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9336       used_xadj   = pcbddc->mat_graph->xadj;
9337       used_adjncy = pcbddc->mat_graph->adjncy;
9338     } else if (pcbddc->computed_rowadj) {
9339       used_xadj   = pcbddc->mat_graph->xadj;
9340       used_adjncy = pcbddc->mat_graph->adjncy;
9341     } else {
9342       PetscBool       flg_row = PETSC_FALSE;
9343       const PetscInt *xadj, *adjncy;
9344       PetscInt        nvtxs;
9345 
9346       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9347       if (flg_row) {
9348         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9349         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9350         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9351         free_used_adj = PETSC_TRUE;
9352       } else {
9353         pcbddc->sub_schurs_layers = -1;
9354         used_xadj                 = NULL;
9355         used_adjncy               = NULL;
9356       }
9357       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9358     }
9359   }
9360 
9361   /* setup sub_schurs data */
9362   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9363   if (!sub_schurs->schur_explicit) {
9364     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9365     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9366     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9367   } else {
9368     Mat       change        = NULL;
9369     Vec       scaling       = NULL;
9370     IS        change_primal = NULL, iP;
9371     PetscInt  benign_n;
9372     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9373     PetscBool need_change       = PETSC_FALSE;
9374     PetscBool discrete_harmonic = PETSC_FALSE;
9375 
9376     if (!pcbddc->use_vertices && reuse_solvers) {
9377       PetscInt n_vertices;
9378 
9379       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9380       reuse_solvers = (PetscBool)!n_vertices;
9381     }
9382     if (!pcbddc->benign_change_explicit) {
9383       benign_n = pcbddc->benign_n;
9384     } else {
9385       benign_n = 0;
9386     }
9387     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9388        We need a global reduction to avoid possible deadlocks.
9389        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9390     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9391       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9392       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9393       need_change = (PetscBool)(!need_change);
9394     }
9395     /* If the user defines additional constraints, we import them here */
9396     if (need_change) {
9397       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9398       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9399     }
9400     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9401 
9402     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9403     if (iP) {
9404       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9405       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9406       PetscOptionsEnd();
9407     }
9408     if (discrete_harmonic) {
9409       Mat A;
9410       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9411       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9412       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9413       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9414                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9415       PetscCall(MatDestroy(&A));
9416     } else {
9417       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9418                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9419     }
9420     PetscCall(MatDestroy(&change));
9421     PetscCall(ISDestroy(&change_primal));
9422   }
9423   PetscCall(MatDestroy(&S_j));
9424 
9425   /* free adjacency */
9426   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9427   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9428   PetscFunctionReturn(PETSC_SUCCESS);
9429 }
9430 
9431 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9432 {
9433   PC_IS      *pcis   = (PC_IS *)pc->data;
9434   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9435   PCBDDCGraph graph;
9436 
9437   PetscFunctionBegin;
9438   /* attach interface graph for determining subsets */
9439   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9440     IS       verticesIS, verticescomm;
9441     PetscInt vsize, *idxs;
9442 
9443     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9444     PetscCall(ISGetSize(verticesIS, &vsize));
9445     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9446     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9447     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9448     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9449     PetscCall(PCBDDCGraphCreate(&graph));
9450     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9451     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9452     PetscCall(ISDestroy(&verticescomm));
9453     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9454   } else {
9455     graph = pcbddc->mat_graph;
9456   }
9457   /* print some info */
9458   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9459     IS       vertices;
9460     PetscInt nv, nedges, nfaces;
9461     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9462     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9463     PetscCall(ISGetSize(vertices, &nv));
9464     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9465     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9466     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9467     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9468     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9469     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9470     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9471     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9472   }
9473 
9474   /* sub_schurs init */
9475   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9476   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9477 
9478   /* free graph struct */
9479   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9480   PetscFunctionReturn(PETSC_SUCCESS);
9481 }
9482 
9483 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9484 {
9485   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9486   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9487   const PetscInt *idxs;
9488   IS              gis;
9489 
9490   PetscFunctionBegin;
9491   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9492   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9493   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9494   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9495   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9496   PetscCall(ISGetLocalSize(is, &ni));
9497   PetscCall(ISGetIndices(is, &idxs));
9498   for (PetscInt i = 0; i < ni; i++) {
9499     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9500     matis->sf_leafdata[idxs[i]] = 1;
9501   }
9502   PetscCall(ISRestoreIndices(is, &idxs));
9503   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9504   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9505   ln = 0;
9506   for (PetscInt i = 0; i < n; i++) {
9507     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9508   }
9509   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9510   PetscCall(ISView(gis, viewer));
9511   PetscCall(ISDestroy(&gis));
9512   PetscFunctionReturn(PETSC_SUCCESS);
9513 }
9514 
9515 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9516 {
9517   PetscInt    header[11];
9518   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9519   PetscViewer viewer;
9520   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9521 
9522   PetscFunctionBegin;
9523   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9524   if (load) {
9525     IS  is;
9526     Mat A;
9527 
9528     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9529     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9530     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9531     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9532     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9533     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9534     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9535     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9536     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9537     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9538     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9539     if (header[0]) {
9540       PetscCall(ISCreate(comm, &is));
9541       PetscCall(ISLoad(is, viewer));
9542       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9543       PetscCall(ISDestroy(&is));
9544     }
9545     if (header[1]) {
9546       PetscCall(ISCreate(comm, &is));
9547       PetscCall(ISLoad(is, viewer));
9548       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9549       PetscCall(ISDestroy(&is));
9550     }
9551     if (header[2]) {
9552       IS *isarray;
9553 
9554       PetscCall(PetscMalloc1(header[2], &isarray));
9555       for (PetscInt i = 0; i < header[2]; i++) {
9556         PetscCall(ISCreate(comm, &isarray[i]));
9557         PetscCall(ISLoad(isarray[i], viewer));
9558       }
9559       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9560       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9561       PetscCall(PetscFree(isarray));
9562     }
9563     if (header[3]) {
9564       PetscCall(ISCreate(comm, &is));
9565       PetscCall(ISLoad(is, viewer));
9566       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9567       PetscCall(ISDestroy(&is));
9568     }
9569     if (header[4]) {
9570       PetscCall(MatCreate(comm, &A));
9571       PetscCall(MatSetType(A, MATAIJ));
9572       PetscCall(MatLoad(A, viewer));
9573       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9574       PetscCall(MatDestroy(&A));
9575     }
9576     if (header[9]) {
9577       PetscCall(MatCreate(comm, &A));
9578       PetscCall(MatSetType(A, MATIS));
9579       PetscCall(MatLoad(A, viewer));
9580       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9581       PetscCall(MatDestroy(&A));
9582     }
9583   } else {
9584     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9585     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9586     header[2]  = pcbddc->n_ISForDofsLocal;
9587     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9588     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9589     header[5]  = pcbddc->nedorder;
9590     header[6]  = pcbddc->nedfield;
9591     header[7]  = (PetscInt)pcbddc->nedglobal;
9592     header[8]  = (PetscInt)pcbddc->conforming;
9593     header[9]  = (PetscInt)!!pcbddc->divudotp;
9594     header[10] = (PetscInt)pcbddc->divudotp_trans;
9595     if (header[4]) header[3] = 0;
9596 
9597     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9598     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9599     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9600     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9601     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9602     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9603     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9604   }
9605   PetscCall(PetscViewerDestroy(&viewer));
9606   PetscFunctionReturn(PETSC_SUCCESS);
9607 }
9608 
9609 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9610 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9611 {
9612   Mat         At;
9613   IS          rows;
9614   PetscInt    rst, ren;
9615   PetscLayout rmap;
9616 
9617   PetscFunctionBegin;
9618   rst = ren = 0;
9619   if (ccomm != MPI_COMM_NULL) {
9620     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9621     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9622     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9623     PetscCall(PetscLayoutSetUp(rmap));
9624     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9625   }
9626   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9627   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9628   PetscCall(ISDestroy(&rows));
9629 
9630   if (ccomm != MPI_COMM_NULL) {
9631     Mat_MPIAIJ *a, *b;
9632     IS          from, to;
9633     Vec         gvec;
9634     PetscInt    lsize;
9635 
9636     PetscCall(MatCreate(ccomm, B));
9637     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9638     PetscCall(MatSetType(*B, MATAIJ));
9639     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9640     PetscCall(PetscLayoutSetUp((*B)->cmap));
9641     a = (Mat_MPIAIJ *)At->data;
9642     b = (Mat_MPIAIJ *)(*B)->data;
9643     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9644     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9645     PetscCall(PetscObjectReference((PetscObject)a->A));
9646     PetscCall(PetscObjectReference((PetscObject)a->B));
9647     b->A = a->A;
9648     b->B = a->B;
9649 
9650     b->donotstash   = a->donotstash;
9651     b->roworiented  = a->roworiented;
9652     b->rowindices   = NULL;
9653     b->rowvalues    = NULL;
9654     b->getrowactive = PETSC_FALSE;
9655 
9656     (*B)->rmap         = rmap;
9657     (*B)->factortype   = A->factortype;
9658     (*B)->assembled    = PETSC_TRUE;
9659     (*B)->insertmode   = NOT_SET_VALUES;
9660     (*B)->preallocated = PETSC_TRUE;
9661 
9662     if (a->colmap) {
9663 #if defined(PETSC_USE_CTABLE)
9664       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9665 #else
9666       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9667       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9668 #endif
9669     } else b->colmap = NULL;
9670     if (a->garray) {
9671       PetscInt len;
9672       len = a->B->cmap->n;
9673       PetscCall(PetscMalloc1(len + 1, &b->garray));
9674       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9675     } else b->garray = NULL;
9676 
9677     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9678     b->lvec = a->lvec;
9679 
9680     /* cannot use VecScatterCopy */
9681     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9682     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9683     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9684     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9685     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9686     PetscCall(ISDestroy(&from));
9687     PetscCall(ISDestroy(&to));
9688     PetscCall(VecDestroy(&gvec));
9689   }
9690   PetscCall(MatDestroy(&At));
9691   PetscFunctionReturn(PETSC_SUCCESS);
9692 }
9693 
9694 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9695 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9696 {
9697   PetscBool isaij;
9698   MPI_Comm  comm;
9699 
9700   PetscFunctionBegin;
9701   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9702   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9703   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9704   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9705   if (isaij) { /* SeqAIJ supports repeated rows */
9706     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9707   } else {
9708     Mat                A_loc;
9709     Mat_SeqAIJ        *da;
9710     PetscSF            sf;
9711     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9712     PetscScalar       *daa;
9713     const PetscInt    *idxs;
9714     const PetscSFNode *iremotes;
9715     PetscSFNode       *remotes;
9716 
9717     /* SF for incoming rows */
9718     PetscCall(PetscSFCreate(comm, &sf));
9719     PetscCall(ISGetLocalSize(rows, &ni));
9720     PetscCall(ISGetIndices(rows, &idxs));
9721     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9722     PetscCall(ISRestoreIndices(rows, &idxs));
9723 
9724     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9725     da = (Mat_SeqAIJ *)A_loc->data;
9726     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9727     for (PetscInt i = 0; i < m; i++) {
9728       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9729       rdata[2 * i + 1] = da->i[i];
9730     }
9731     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9732     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9733     PetscCall(PetscMalloc1(ni + 1, &di));
9734     di[0] = 0;
9735     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9736     PetscCall(PetscMalloc1(di[ni], &dj));
9737     PetscCall(PetscMalloc1(di[ni], &daa));
9738     PetscCall(PetscMalloc1(di[ni], &remotes));
9739 
9740     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9741 
9742     /* SF graph for nonzeros */
9743     c = 0;
9744     for (PetscInt i = 0; i < ni; i++) {
9745       const PetscInt rank  = iremotes[i].rank;
9746       const PetscInt rsize = ldata[2 * i];
9747       for (PetscInt j = 0; j < rsize; j++) {
9748         remotes[c].rank  = rank;
9749         remotes[c].index = ldata[2 * i + 1] + j;
9750         c++;
9751       }
9752     }
9753     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9754     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9755     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9756     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9757     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9758     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9759 
9760     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9761     PetscCall(MatDestroy(&A_loc));
9762     PetscCall(PetscSFDestroy(&sf));
9763     PetscCall(PetscFree(di));
9764     PetscCall(PetscFree(dj));
9765     PetscCall(PetscFree(daa));
9766     PetscCall(PetscFree(remotes));
9767     PetscCall(PetscFree2(ldata, rdata));
9768   }
9769   PetscFunctionReturn(PETSC_SUCCESS);
9770 }
9771