xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ad49c6e3020e95528bc71effe5cd79a1b737ce7c)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (preallocation can be improved) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1334   PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1335   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1336   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1337   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1338   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1339 
1340   /* Defaults to identity */
1341   for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1342 
1343   /* Create discrete gradient for the coarser level if needed */
1344   PetscCall(MatDestroy(&pcbddc->nedcG));
1345   PetscCall(ISDestroy(&pcbddc->nedclocal));
1346   if (pcbddc->current_level < pcbddc->max_levels) {
1347     ISLocalToGlobalMapping cel2g, cvl2g;
1348     IS                     wis, gwis;
1349     PetscInt               cnv, cne;
1350 
1351     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1352     if (fl2g) {
1353       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1354     } else {
1355       PetscCall(PetscObjectReference((PetscObject)wis));
1356       pcbddc->nedclocal = wis;
1357     }
1358     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1359     PetscCall(ISDestroy(&wis));
1360     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1361     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1362     PetscCall(ISDestroy(&wis));
1363     PetscCall(ISDestroy(&gwis));
1364 
1365     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1366     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1367     PetscCall(ISDestroy(&wis));
1368     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1369     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1370     PetscCall(ISDestroy(&wis));
1371     PetscCall(ISDestroy(&gwis));
1372 
1373     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1374     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1375     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1376     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1377     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1378     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1379     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1380     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1381   }
1382   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1383 
1384 #if defined(PRINT_GDET)
1385   inc = 0;
1386   lev = pcbddc->current_level;
1387 #endif
1388 
1389   /* Insert values in the change of basis matrix */
1390   for (i = 0; i < nee; i++) {
1391     Mat         Gins = NULL, GKins = NULL;
1392     IS          cornersis = NULL;
1393     PetscScalar cvals[2];
1394 
1395     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1396     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1397     if (Gins && GKins) {
1398       const PetscScalar *data;
1399       const PetscInt    *rows, *cols;
1400       PetscInt           nrh, nch, nrc, ncc;
1401 
1402       PetscCall(ISGetIndices(eedges[i], &cols));
1403       /* H1 */
1404       PetscCall(ISGetIndices(extrows[i], &rows));
1405       PetscCall(MatGetSize(Gins, &nrh, &nch));
1406       PetscCall(MatDenseGetArrayRead(Gins, &data));
1407       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1408       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1409       PetscCall(ISRestoreIndices(extrows[i], &rows));
1410       /* complement */
1411       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1412       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1413       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1414       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1415       PetscCall(MatDenseGetArrayRead(GKins, &data));
1416       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1417       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1418 
1419       /* coarse discrete gradient */
1420       if (pcbddc->nedcG) {
1421         PetscInt cols[2];
1422 
1423         cols[0] = 2 * i;
1424         cols[1] = 2 * i + 1;
1425         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1426       }
1427       PetscCall(ISRestoreIndices(eedges[i], &cols));
1428     }
1429     PetscCall(ISDestroy(&extrows[i]));
1430     PetscCall(ISDestroy(&extcols[i]));
1431     PetscCall(ISDestroy(&cornersis));
1432     PetscCall(MatDestroy(&Gins));
1433     PetscCall(MatDestroy(&GKins));
1434   }
1435 
1436   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1437   if (elements_corners && pcbddc->mat_graph->multi_element) {
1438     ISLocalToGlobalMapping map;
1439     MatNullSpace           nnsp;
1440     Vec                    quad_vec;
1441 
1442     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1443     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1444     PetscCall(VecLockReadPop(quad_vec));
1445     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
1446     PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1447     for (i = 0; i < nee; i++) {
1448       const PetscInt *idxs;
1449       PetscScalar     one = 1.0;
1450 
1451       PetscCall(ISGetLocalSize(alleedges[i], &cum));
1452       if (!cum) continue;
1453       PetscCall(ISGetIndices(alleedges[i], &idxs));
1454       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1455       PetscCall(ISRestoreIndices(alleedges[i], &idxs));
1456     }
1457     PetscCall(VecLockReadPush(quad_vec));
1458     PetscCall(VecDestroy(&quad_vec));
1459     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1460     PetscCall(MatNullSpaceDestroy(&nnsp));
1461   }
1462   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1463 
1464   /* Start assembling */
1465   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1466   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1467 
1468   /* Free */
1469   if (fl2g) {
1470     PetscCall(ISDestroy(&primals));
1471     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1472     PetscCall(PetscFree(eedges));
1473   }
1474 
1475   /* hack mat_graph with primal dofs on the coarse edges */
1476   {
1477     PCBDDCGraph graph  = pcbddc->mat_graph;
1478     PetscInt   *oqueue = graph->queue;
1479     PetscInt   *ocptr  = graph->cptr;
1480     PetscInt    ncc, *idxs;
1481 
1482     /* find first primal edge */
1483     if (pcbddc->nedclocal) {
1484       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1485     } else {
1486       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1487       idxs = cedges;
1488     }
1489     cum = 0;
1490     while (cum < nee && cedges[cum] < 0) cum++;
1491 
1492     /* adapt connected components */
1493     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1494     graph->cptr[0] = 0;
1495     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1496       PetscInt lc = ocptr[i + 1] - ocptr[i];
1497       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1498         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1499         graph->queue[graph->cptr[ncc]] = cedges[cum];
1500         ncc++;
1501         lc--;
1502         cum++;
1503         while (cum < nee && cedges[cum] < 0) cum++;
1504       }
1505       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1506       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1507       ncc++;
1508     }
1509     graph->ncc = ncc;
1510     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1511     PetscCall(PetscFree2(ocptr, oqueue));
1512   }
1513   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1514   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1515   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1516 
1517   PetscCall(ISDestroy(&nedfieldlocal));
1518   PetscCall(PetscFree(extrow));
1519   PetscCall(PetscFree2(work, rwork));
1520   PetscCall(PetscFree(corners));
1521   PetscCall(PetscFree(cedges));
1522   PetscCall(PetscFree(extrows));
1523   PetscCall(PetscFree(extcols));
1524   PetscCall(MatDestroy(&lG));
1525 
1526   /* Complete assembling */
1527   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1528   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1529   if (pcbddc->nedcG) {
1530     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1531     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1532   }
1533 
1534   PetscCall(ISDestroy(&elements_corners));
1535 
1536   /* set change of basis */
1537   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1538   PetscCall(MatDestroy(&T));
1539   PetscFunctionReturn(PETSC_SUCCESS);
1540 }
1541 
1542 /* the near-null space of BDDC carries information on quadrature weights,
1543    and these can be collinear -> so cheat with MatNullSpaceCreate
1544    and create a suitable set of basis vectors first */
1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1546 {
1547   PetscInt i;
1548 
1549   PetscFunctionBegin;
1550   for (i = 0; i < nvecs; i++) {
1551     PetscInt first, last;
1552 
1553     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1554     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1555     if (i >= first && i < last) {
1556       PetscScalar *data;
1557       PetscCall(VecGetArray(quad_vecs[i], &data));
1558       if (!has_const) {
1559         data[i - first] = 1.;
1560       } else {
1561         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1562         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1563       }
1564       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1565     }
1566     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1567   }
1568   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1569   for (i = 0; i < nvecs; i++) { /* reset vectors */
1570     PetscInt first, last;
1571     PetscCall(VecLockReadPop(quad_vecs[i]));
1572     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1573     if (i >= first && i < last) {
1574       PetscScalar *data;
1575       PetscCall(VecGetArray(quad_vecs[i], &data));
1576       if (!has_const) {
1577         data[i - first] = 0.;
1578       } else {
1579         data[2 * i - first]     = 0.;
1580         data[2 * i - first + 1] = 0.;
1581       }
1582       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1583     }
1584     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1585     PetscCall(VecLockReadPush(quad_vecs[i]));
1586   }
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1591 {
1592   Mat                    loc_divudotp;
1593   Vec                    p, v, quad_vec;
1594   ISLocalToGlobalMapping map;
1595   PetscScalar           *array;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1599   if (!transpose) {
1600     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1601   } else {
1602     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1603   }
1604   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1605   PetscCall(VecLockReadPop(quad_vec));
1606   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1607 
1608   /* compute local quad vec */
1609   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1610   if (!transpose) {
1611     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1612   } else {
1613     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1614   }
1615   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1616   PetscCall(VecSet(p, 1.));
1617   if (!transpose) {
1618     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1619   } else {
1620     PetscCall(MatMult(loc_divudotp, p, v));
1621   }
1622   PetscCall(VecDestroy(&p));
1623   if (vl2l) {
1624     Mat        lA;
1625     VecScatter sc;
1626     Vec        vins;
1627 
1628     PetscCall(MatISGetLocalMat(A, &lA));
1629     PetscCall(MatCreateVecs(lA, &vins, NULL));
1630     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1631     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1632     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1633     PetscCall(VecScatterDestroy(&sc));
1634     PetscCall(VecDestroy(&v));
1635     v = vins;
1636   }
1637 
1638   /* mask summation of interface values */
1639   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1640   const PetscInt *degree;
1641   PetscSF         msf;
1642 
1643   PetscCall(VecGetLocalSize(v, &n));
1644   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1645   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1646   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1647   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1648   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1649   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1650   for (PetscInt i = 0, c = 0; i < nr; i++) {
1651     mmask[c] = 1;
1652     c += degree[i];
1653   }
1654   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1655   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1656   PetscCall(VecGetArray(v, &array));
1657   for (PetscInt i = 0; i < n; i++) {
1658     array[i] *= mask[i];
1659     idxs[i] = i;
1660   }
1661   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1662   PetscCall(VecRestoreArray(v, &array));
1663   PetscCall(PetscFree3(mmask, mask, idxs));
1664   PetscCall(VecDestroy(&v));
1665   PetscCall(VecAssemblyBegin(quad_vec));
1666   PetscCall(VecAssemblyEnd(quad_vec));
1667   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1668   PetscCall(VecLockReadPush(quad_vec));
1669   PetscCall(VecDestroy(&quad_vec));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1674 {
1675   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1676 
1677   PetscFunctionBegin;
1678   if (primalv) {
1679     if (pcbddc->user_primal_vertices_local) {
1680       IS list[2], newp;
1681 
1682       list[0] = primalv;
1683       list[1] = pcbddc->user_primal_vertices_local;
1684       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1685       PetscCall(ISSortRemoveDups(newp));
1686       PetscCall(ISDestroy(&list[1]));
1687       pcbddc->user_primal_vertices_local = newp;
1688     } else {
1689       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1690     }
1691   }
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1696 {
1697   PetscInt f, *comp = (PetscInt *)ctx;
1698 
1699   PetscFunctionBegin;
1700   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1705 {
1706   Vec       local, global;
1707   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1708   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1709   PetscBool monolithic = PETSC_FALSE;
1710 
1711   PetscFunctionBegin;
1712   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1713   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1714   PetscOptionsEnd();
1715   /* need to convert from global to local topology information and remove references to information in global ordering */
1716   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1717   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1718   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1719   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1720   if (monolithic) { /* just get block size to properly compute vertices */
1721     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1722     goto boundary;
1723   }
1724 
1725   if (pcbddc->user_provided_isfordofs) {
1726     if (pcbddc->n_ISForDofs) {
1727       PetscInt i;
1728 
1729       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1730       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1731         PetscInt bs;
1732 
1733         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1734         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1735         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1736         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1737       }
1738       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1739       pcbddc->n_ISForDofs      = 0;
1740       PetscCall(PetscFree(pcbddc->ISForDofs));
1741     }
1742   } else {
1743     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1744       DM dm;
1745 
1746       PetscCall(MatGetDM(pc->pmat, &dm));
1747       if (!dm) PetscCall(PCGetDM(pc, &dm));
1748       if (dm) {
1749         IS      *fields;
1750         PetscInt nf, i;
1751 
1752         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1753         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1754         for (i = 0; i < nf; i++) {
1755           PetscInt bs;
1756 
1757           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1758           PetscCall(ISGetBlockSize(fields[i], &bs));
1759           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1760           PetscCall(ISDestroy(&fields[i]));
1761         }
1762         PetscCall(PetscFree(fields));
1763         pcbddc->n_ISForDofsLocal = nf;
1764       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1765         PetscContainer c;
1766 
1767         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1768         if (c) {
1769           MatISLocalFields lf;
1770           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1771           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1772         } else { /* fallback, create the default fields if bs > 1 */
1773           PetscInt i, n = matis->A->rmap->n;
1774           PetscCall(MatGetBlockSize(pc->pmat, &i));
1775           if (i > 1) {
1776             pcbddc->n_ISForDofsLocal = i;
1777             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1778             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1779           }
1780         }
1781       }
1782     } else {
1783       PetscInt i;
1784       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1785     }
1786   }
1787 
1788 boundary:
1789   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1790     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1791   } else if (pcbddc->DirichletBoundariesLocal) {
1792     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1793   }
1794   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1795     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1796   } else if (pcbddc->NeumannBoundariesLocal) {
1797     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1798   }
1799   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1800   PetscCall(VecDestroy(&global));
1801   PetscCall(VecDestroy(&local));
1802   /* detect local disconnected subdomains if requested or needed */
1803   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1804     IS        primalv = NULL;
1805     PetscInt  nel;
1806     PetscBool filter = pcbddc->detect_disconnected_filter;
1807 
1808     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1809     PetscCall(PetscFree(pcbddc->local_subs));
1810     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1811     if (matis->allow_repeated && nel) {
1812       const PetscInt *elsizes;
1813 
1814       pcbddc->n_local_subs = nel;
1815       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1816       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1817       for (PetscInt i = 0, c = 0; i < nel; i++) {
1818         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1819         c += elsizes[i];
1820       }
1821     } else {
1822       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1823     }
1824     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1825     PetscCall(ISDestroy(&primalv));
1826   }
1827   /* early stage corner detection */
1828   {
1829     DM dm;
1830 
1831     PetscCall(MatGetDM(pc->pmat, &dm));
1832     if (!dm) PetscCall(PCGetDM(pc, &dm));
1833     if (dm) {
1834       PetscBool isda;
1835 
1836       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1837       if (isda) {
1838         ISLocalToGlobalMapping l2l;
1839         IS                     corners;
1840         Mat                    lA;
1841         PetscBool              gl, lo;
1842 
1843         {
1844           Vec                cvec;
1845           const PetscScalar *coords;
1846           PetscInt           dof, n, cdim;
1847           PetscBool          memc = PETSC_TRUE;
1848 
1849           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1850           PetscCall(DMGetCoordinates(dm, &cvec));
1851           PetscCall(VecGetLocalSize(cvec, &n));
1852           PetscCall(VecGetBlockSize(cvec, &cdim));
1853           n /= cdim;
1854           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1855           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1856           PetscCall(VecGetArrayRead(cvec, &coords));
1857 #if defined(PETSC_USE_COMPLEX)
1858           memc = PETSC_FALSE;
1859 #endif
1860           if (dof != 1) memc = PETSC_FALSE;
1861           if (memc) {
1862             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1863           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1864             PetscReal *bcoords = pcbddc->mat_graph->coords;
1865             PetscInt   i, b, d;
1866 
1867             for (i = 0; i < n; i++) {
1868               for (b = 0; b < dof; b++) {
1869                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1870               }
1871             }
1872           }
1873           PetscCall(VecRestoreArrayRead(cvec, &coords));
1874           pcbddc->mat_graph->cdim  = cdim;
1875           pcbddc->mat_graph->cnloc = dof * n;
1876           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1877         }
1878         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1879         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1880         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1881         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1882         lo = (PetscBool)(l2l && corners);
1883         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1884         if (gl) { /* From PETSc's DMDA */
1885           const PetscInt *idx;
1886           PetscInt        dof, bs, *idxout, n;
1887 
1888           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1889           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1890           PetscCall(ISGetLocalSize(corners, &n));
1891           PetscCall(ISGetIndices(corners, &idx));
1892           if (bs == dof) {
1893             PetscCall(PetscMalloc1(n, &idxout));
1894             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1895           } else { /* the original DMDA local-to-local map have been modified */
1896             PetscInt i, d;
1897 
1898             PetscCall(PetscMalloc1(dof * n, &idxout));
1899             for (i = 0; i < n; i++)
1900               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1901             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1902 
1903             bs = 1;
1904             n *= dof;
1905           }
1906           PetscCall(ISRestoreIndices(corners, &idx));
1907           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1908           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1909           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1910           PetscCall(ISDestroy(&corners));
1911           pcbddc->corner_selected  = PETSC_TRUE;
1912           pcbddc->corner_selection = PETSC_TRUE;
1913         }
1914         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1915       }
1916     }
1917   }
1918   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1919     DM dm;
1920 
1921     PetscCall(MatGetDM(pc->pmat, &dm));
1922     if (!dm) PetscCall(PCGetDM(pc, &dm));
1923     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1924       Vec          vcoords;
1925       PetscSection section;
1926       PetscReal   *coords;
1927       PetscInt     d, cdim, nl, nf, **ctxs;
1928       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1929       /* debug coordinates */
1930       PetscViewer       viewer;
1931       PetscBool         flg;
1932       PetscViewerFormat format;
1933       const char       *prefix;
1934 
1935       PetscCall(DMGetCoordinateDim(dm, &cdim));
1936       PetscCall(DMGetLocalSection(dm, &section));
1937       PetscCall(PetscSectionGetNumFields(section, &nf));
1938       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1939       PetscCall(VecGetLocalSize(vcoords, &nl));
1940       PetscCall(PetscMalloc1(nl * cdim, &coords));
1941       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1942       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1943       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1944       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1945 
1946       /* debug coordinates */
1947       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1948       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1949       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1950       for (d = 0; d < cdim; d++) {
1951         PetscInt           i;
1952         const PetscScalar *v;
1953         char               name[16];
1954 
1955         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1956         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
1957         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1958         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1959         if (flg) PetscCall(VecView(vcoords, viewer));
1960         PetscCall(VecGetArrayRead(vcoords, &v));
1961         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1962         PetscCall(VecRestoreArrayRead(vcoords, &v));
1963       }
1964       PetscCall(VecDestroy(&vcoords));
1965       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1966       PetscCall(PetscFree(coords));
1967       PetscCall(PetscFree(ctxs[0]));
1968       PetscCall(PetscFree2(funcs, ctxs));
1969       if (flg) {
1970         PetscCall(PetscViewerPopFormat(viewer));
1971         PetscCall(PetscViewerDestroy(&viewer));
1972       }
1973     }
1974   }
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1979 {
1980   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
1981   IS              nis;
1982   const PetscInt *idxs;
1983   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1984 
1985   PetscFunctionBegin;
1986   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1987   if (mop == MPI_LAND) {
1988     /* init rootdata with true */
1989     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1990   } else {
1991     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1992   }
1993   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1994   PetscCall(ISGetLocalSize(*is, &nd));
1995   PetscCall(ISGetIndices(*is, &idxs));
1996   for (i = 0; i < nd; i++)
1997     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1998   PetscCall(ISRestoreIndices(*is, &idxs));
1999   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2000   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2001   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2002   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2003   if (mop == MPI_LAND) {
2004     PetscCall(PetscMalloc1(nd, &nidxs));
2005   } else {
2006     PetscCall(PetscMalloc1(n, &nidxs));
2007   }
2008   for (i = 0, nnd = 0; i < n; i++)
2009     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2010   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2011   PetscCall(ISDestroy(is));
2012   *is = nis;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2017 {
2018   PC_IS   *pcis   = (PC_IS *)pc->data;
2019   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2020 
2021   PetscFunctionBegin;
2022   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2023   if (pcbddc->ChangeOfBasisMatrix) {
2024     Vec swap;
2025 
2026     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2027     swap                = pcbddc->work_change;
2028     pcbddc->work_change = r;
2029     r                   = swap;
2030   }
2031   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2032   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2033   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2034   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2035   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2036   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2037   PetscCall(VecSet(z, 0.));
2038   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2039   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2040   if (pcbddc->ChangeOfBasisMatrix) {
2041     pcbddc->work_change = r;
2042     PetscCall(VecCopy(z, pcbddc->work_change));
2043     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2044   }
2045   PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047 
2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2049 {
2050   PCBDDCBenignMatMult_ctx ctx;
2051   PetscBool               apply_right, apply_left, reset_x;
2052 
2053   PetscFunctionBegin;
2054   PetscCall(MatShellGetContext(A, &ctx));
2055   if (transpose) {
2056     apply_right = ctx->apply_left;
2057     apply_left  = ctx->apply_right;
2058   } else {
2059     apply_right = ctx->apply_right;
2060     apply_left  = ctx->apply_left;
2061   }
2062   reset_x = PETSC_FALSE;
2063   if (apply_right) {
2064     const PetscScalar *ax;
2065     PetscInt           nl, i;
2066 
2067     PetscCall(VecGetLocalSize(x, &nl));
2068     PetscCall(VecGetArrayRead(x, &ax));
2069     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2070     PetscCall(VecRestoreArrayRead(x, &ax));
2071     for (i = 0; i < ctx->benign_n; i++) {
2072       PetscScalar     sum, val;
2073       const PetscInt *idxs;
2074       PetscInt        nz, j;
2075       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2076       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2077       sum = 0.;
2078       if (ctx->apply_p0) {
2079         val = ctx->work[idxs[nz - 1]];
2080         for (j = 0; j < nz - 1; j++) {
2081           sum += ctx->work[idxs[j]];
2082           ctx->work[idxs[j]] += val;
2083         }
2084       } else {
2085         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2086       }
2087       ctx->work[idxs[nz - 1]] -= sum;
2088       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2089     }
2090     PetscCall(VecPlaceArray(x, ctx->work));
2091     reset_x = PETSC_TRUE;
2092   }
2093   if (transpose) {
2094     PetscCall(MatMultTranspose(ctx->A, x, y));
2095   } else {
2096     PetscCall(MatMult(ctx->A, x, y));
2097   }
2098   if (reset_x) PetscCall(VecResetArray(x));
2099   if (apply_left) {
2100     PetscScalar *ay;
2101     PetscInt     i;
2102 
2103     PetscCall(VecGetArray(y, &ay));
2104     for (i = 0; i < ctx->benign_n; i++) {
2105       PetscScalar     sum, val;
2106       const PetscInt *idxs;
2107       PetscInt        nz, j;
2108       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2109       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2110       val = -ay[idxs[nz - 1]];
2111       if (ctx->apply_p0) {
2112         sum = 0.;
2113         for (j = 0; j < nz - 1; j++) {
2114           sum += ay[idxs[j]];
2115           ay[idxs[j]] += val;
2116         }
2117         ay[idxs[nz - 1]] += sum;
2118       } else {
2119         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2120         ay[idxs[nz - 1]] = 0.;
2121       }
2122       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2123     }
2124     PetscCall(VecRestoreArray(y, &ay));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2130 {
2131   PetscFunctionBegin;
2132   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2137 {
2138   PetscFunctionBegin;
2139   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2140   PetscFunctionReturn(PETSC_SUCCESS);
2141 }
2142 
2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2144 {
2145   PC_IS                  *pcis   = (PC_IS *)pc->data;
2146   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2147   PCBDDCBenignMatMult_ctx ctx;
2148 
2149   PetscFunctionBegin;
2150   if (!restore) {
2151     Mat                A_IB, A_BI;
2152     PetscScalar       *work;
2153     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2154 
2155     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2156     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2157     PetscCall(PetscMalloc1(pcis->n, &work));
2158     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2159     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2160     PetscCall(MatSetType(A_IB, MATSHELL));
2161     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2162     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2163     PetscCall(PetscNew(&ctx));
2164     PetscCall(MatShellSetContext(A_IB, ctx));
2165     ctx->apply_left  = PETSC_TRUE;
2166     ctx->apply_right = PETSC_FALSE;
2167     ctx->apply_p0    = PETSC_FALSE;
2168     ctx->benign_n    = pcbddc->benign_n;
2169     if (reuse) {
2170       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2171       ctx->free                 = PETSC_FALSE;
2172     } else { /* TODO: could be optimized for successive solves */
2173       ISLocalToGlobalMapping N_to_D;
2174       PetscInt               i;
2175 
2176       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2177       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2178       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2179       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2180       ctx->free = PETSC_TRUE;
2181     }
2182     ctx->A    = pcis->A_IB;
2183     ctx->work = work;
2184     PetscCall(MatSetUp(A_IB));
2185     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2186     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2187     pcis->A_IB = A_IB;
2188 
2189     /* A_BI as A_IB^T */
2190     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2191     pcbddc->benign_original_mat = pcis->A_BI;
2192     pcis->A_BI                  = A_BI;
2193   } else {
2194     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2195     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2196     PetscCall(MatDestroy(&pcis->A_IB));
2197     pcis->A_IB = ctx->A;
2198     ctx->A     = NULL;
2199     PetscCall(MatDestroy(&pcis->A_BI));
2200     pcis->A_BI                  = pcbddc->benign_original_mat;
2201     pcbddc->benign_original_mat = NULL;
2202     if (ctx->free) {
2203       PetscInt i;
2204       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2205       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2206     }
2207     PetscCall(PetscFree(ctx->work));
2208     PetscCall(PetscFree(ctx));
2209   }
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 /* used just in bddc debug mode */
2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2215 {
2216   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2217   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2218   Mat      An;
2219 
2220   PetscFunctionBegin;
2221   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2222   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2223   if (is1) {
2224     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2225     PetscCall(MatDestroy(&An));
2226   } else {
2227     *B = An;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 /* TODO: add reuse flag */
2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2234 {
2235   Mat             Bt;
2236   PetscScalar    *a, *bdata;
2237   const PetscInt *ii, *ij;
2238   PetscInt        m, n, i, nnz, *bii, *bij;
2239   PetscBool       flg_row;
2240 
2241   PetscFunctionBegin;
2242   PetscCall(MatGetSize(A, &n, &m));
2243   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2244   PetscCall(MatSeqAIJGetArray(A, &a));
2245   nnz = n;
2246   for (i = 0; i < ii[n]; i++) {
2247     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2248   }
2249   PetscCall(PetscMalloc1(n + 1, &bii));
2250   PetscCall(PetscMalloc1(nnz, &bij));
2251   PetscCall(PetscMalloc1(nnz, &bdata));
2252   nnz    = 0;
2253   bii[0] = 0;
2254   for (i = 0; i < n; i++) {
2255     PetscInt j;
2256     for (j = ii[i]; j < ii[i + 1]; j++) {
2257       PetscScalar entry = a[j];
2258       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2259         bij[nnz]   = ij[j];
2260         bdata[nnz] = entry;
2261         nnz++;
2262       }
2263     }
2264     bii[i + 1] = nnz;
2265   }
2266   PetscCall(MatSeqAIJRestoreArray(A, &a));
2267   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2268   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2269   {
2270     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2271     b->free_a     = PETSC_TRUE;
2272     b->free_ij    = PETSC_TRUE;
2273   }
2274   if (*B == A) PetscCall(MatDestroy(&A));
2275   *B = Bt;
2276   PetscFunctionReturn(PETSC_SUCCESS);
2277 }
2278 
2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2280 {
2281   Mat                    B = NULL;
2282   DM                     dm;
2283   IS                     is_dummy, *cc_n;
2284   ISLocalToGlobalMapping l2gmap_dummy;
2285   PCBDDCGraph            graph;
2286   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2287   PetscInt               i, n;
2288   PetscInt              *xadj, *adjncy;
2289   PetscBool              isplex = PETSC_FALSE;
2290 
2291   PetscFunctionBegin;
2292   if (ncc) *ncc = 0;
2293   if (cc) *cc = NULL;
2294   if (primalv) *primalv = NULL;
2295   PetscCall(PCBDDCGraphCreate(&graph));
2296   PetscCall(MatGetDM(pc->pmat, &dm));
2297   if (!dm) PetscCall(PCGetDM(pc, &dm));
2298   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2299   if (filter) isplex = PETSC_FALSE;
2300 
2301   if (isplex) { /* this code has been modified from plexpartition.c */
2302     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2303     PetscInt       *adj = NULL;
2304     IS              cellNumbering;
2305     const PetscInt *cellNum;
2306     PetscBool       useCone, useClosure;
2307     PetscSection    section;
2308     PetscSegBuffer  adjBuffer;
2309     PetscSF         sfPoint;
2310 
2311     PetscCall(DMConvert(dm, DMPLEX, &dm));
2312     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2313     PetscCall(DMGetPointSF(dm, &sfPoint));
2314     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2315     /* Build adjacency graph via a section/segbuffer */
2316     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2317     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2318     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2319     /* Always use FVM adjacency to create partitioner graph */
2320     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2321     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2322     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2323     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2324     for (n = 0, p = pStart; p < pEnd; p++) {
2325       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2326       if (nroots > 0) {
2327         if (cellNum[p] < 0) continue;
2328       }
2329       adjSize = PETSC_DETERMINE;
2330       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2331       for (a = 0; a < adjSize; ++a) {
2332         const PetscInt point = adj[a];
2333         if (pStart <= point && point < pEnd) {
2334           PetscInt *PETSC_RESTRICT pBuf;
2335           PetscCall(PetscSectionAddDof(section, p, 1));
2336           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2337           *pBuf = point;
2338         }
2339       }
2340       n++;
2341     }
2342     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2343     /* Derive CSR graph from section/segbuffer */
2344     PetscCall(PetscSectionSetUp(section));
2345     PetscCall(PetscSectionGetStorageSize(section, &size));
2346     PetscCall(PetscMalloc1(n + 1, &xadj));
2347     for (idx = 0, p = pStart; p < pEnd; p++) {
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2352     }
2353     xadj[n] = size;
2354     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2355     /* Clean up */
2356     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2357     PetscCall(PetscSectionDestroy(&section));
2358     PetscCall(PetscFree(adj));
2359     graph->xadj   = xadj;
2360     graph->adjncy = adjncy;
2361   } else {
2362     Mat       A;
2363     PetscBool isseqaij, flg_row;
2364 
2365     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2366     if (!A->rmap->N || !A->cmap->N) {
2367       PetscCall(PCBDDCGraphDestroy(&graph));
2368       PetscFunctionReturn(PETSC_SUCCESS);
2369     }
2370     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2371     if (!isseqaij && filter) {
2372       PetscBool isseqdense;
2373 
2374       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2375       if (!isseqdense) {
2376         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2377       } else { /* TODO: rectangular case and LDA */
2378         PetscScalar *array;
2379         PetscReal    chop = 1.e-6;
2380 
2381         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2382         PetscCall(MatDenseGetArray(B, &array));
2383         PetscCall(MatGetSize(B, &n, NULL));
2384         for (i = 0; i < n; i++) {
2385           PetscInt j;
2386           for (j = i + 1; j < n; j++) {
2387             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2388             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2389             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2390           }
2391         }
2392         PetscCall(MatDenseRestoreArray(B, &array));
2393         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2394       }
2395     } else {
2396       PetscCall(PetscObjectReference((PetscObject)A));
2397       B = A;
2398     }
2399     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2400 
2401     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2402     if (filter) {
2403       PetscScalar *data;
2404       PetscInt     j, cum;
2405 
2406       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2407       PetscCall(MatSeqAIJGetArray(B, &data));
2408       cum = 0;
2409       for (i = 0; i < n; i++) {
2410         PetscInt t;
2411 
2412         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2413           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2414           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2415         }
2416         t                = xadj_filtered[i];
2417         xadj_filtered[i] = cum;
2418         cum += t;
2419       }
2420       PetscCall(MatSeqAIJRestoreArray(B, &data));
2421       graph->xadj   = xadj_filtered;
2422       graph->adjncy = adjncy_filtered;
2423     } else {
2424       graph->xadj   = xadj;
2425       graph->adjncy = adjncy;
2426     }
2427   }
2428   /* compute local connected components using PCBDDCGraph */
2429   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2430   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2431   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2432   PetscCall(ISDestroy(&is_dummy));
2433   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2434   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2435   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2436   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2437 
2438   /* partial clean up */
2439   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2440   if (B) {
2441     PetscBool flg_row;
2442     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2443     PetscCall(MatDestroy(&B));
2444   }
2445   if (isplex) {
2446     PetscCall(PetscFree(xadj));
2447     PetscCall(PetscFree(adjncy));
2448   }
2449 
2450   /* get back data */
2451   if (isplex) {
2452     if (ncc) *ncc = graph->ncc;
2453     if (cc || primalv) {
2454       Mat          A;
2455       PetscBT      btv, btvt, btvc;
2456       PetscSection subSection;
2457       PetscInt    *ids, cum, cump, *cids, *pids;
2458       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2459 
2460       PetscCall(DMGetDimension(dm, &dim));
2461       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2462       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2463       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2464       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2465       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2466       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2467       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2468       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2469       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2470       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2471 
2472       /* First see if we find corners for the subdomains, i.e. a vertex
2473          shared by at least dim subdomain boundary faces. This does not
2474          cover all the possible cases with simplices but it is enough
2475          for tensor cells */
2476       if (vStart != fStart && dim <= 3) {
2477         for (PetscInt c = cStart; c < cEnd; c++) {
2478           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2479           const PetscInt *faces;
2480 
2481           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2482           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2483           PetscCall(DMPlexGetCone(dm, c, &faces));
2484           for (PetscInt f = 0; f < nf; f++) {
2485             PetscInt nc, ff;
2486 
2487             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2488             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2489             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2490           }
2491           if (cnt >= mcnt) {
2492             PetscInt size, *closure = NULL;
2493 
2494             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2495             for (PetscInt k = 0; k < 2 * size; k += 2) {
2496               PetscInt v = closure[k];
2497               if (v >= vStart && v < vEnd) {
2498                 PetscInt vsize, *vclosure = NULL;
2499 
2500                 cnt = 0;
2501                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2502                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2503                   PetscInt f = vclosure[vk];
2504                   if (f >= fStart && f < fEnd) {
2505                     PetscInt  nc, ff;
2506                     PetscBool valid = PETSC_FALSE;
2507 
2508                     for (PetscInt fk = 0; fk < nf; fk++)
2509                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2510                     if (!valid) continue;
2511                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2512                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2513                     if (nc == 1 && f == ff) cnt++;
2514                   }
2515                 }
2516                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2517                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2518               }
2519             }
2520             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2521           }
2522           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2523         }
2524       }
2525 
2526       cids[0] = 0;
2527       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2528         PetscInt j;
2529 
2530         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2531         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2532           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2533 
2534           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2535           for (k = 0; k < 2 * size; k += 2) {
2536             PetscInt s, pp, p = closure[k], off, dof, cdof;
2537 
2538             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2539             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2540             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2541             for (s = 0; s < dof - cdof; s++) {
2542               if (PetscBTLookupSet(btvt, off + s)) continue;
2543               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2544               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2545               else pids[cump++] = off + s; /* cross-vertex */
2546             }
2547             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2548             if (pp != p) {
2549               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2550               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2551               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2552               for (s = 0; s < dof - cdof; s++) {
2553                 if (PetscBTLookupSet(btvt, off + s)) continue;
2554                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2555                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2556                 else pids[cump++] = off + s; /* cross-vertex */
2557               }
2558             }
2559           }
2560           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2561         }
2562         cids[i + 1] = cum;
2563         /* mark dofs as already assigned */
2564         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2565       }
2566       if (cc) {
2567         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2568         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2569         *cc = cc_n;
2570       }
2571       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2572       PetscCall(PetscFree3(ids, cids, pids));
2573       PetscCall(PetscBTDestroy(&btv));
2574       PetscCall(PetscBTDestroy(&btvt));
2575       PetscCall(PetscBTDestroy(&btvc));
2576       PetscCall(DMDestroy(&dm));
2577     }
2578   } else {
2579     if (ncc) *ncc = graph->ncc;
2580     if (cc) {
2581       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2582       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2583       *cc = cc_n;
2584     }
2585   }
2586   /* clean up graph */
2587   graph->xadj   = NULL;
2588   graph->adjncy = NULL;
2589   PetscCall(PCBDDCGraphDestroy(&graph));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2594 {
2595   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2596   PC_IS   *pcis   = (PC_IS *)pc->data;
2597   IS       dirIS  = NULL;
2598   PetscInt i;
2599 
2600   PetscFunctionBegin;
2601   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2602   if (zerodiag) {
2603     Mat             A;
2604     Vec             vec3_N;
2605     PetscScalar    *vals;
2606     const PetscInt *idxs;
2607     PetscInt        nz, *count;
2608 
2609     /* p0 */
2610     PetscCall(VecSet(pcis->vec1_N, 0.));
2611     PetscCall(PetscMalloc1(pcis->n, &vals));
2612     PetscCall(ISGetLocalSize(zerodiag, &nz));
2613     PetscCall(ISGetIndices(zerodiag, &idxs));
2614     for (i = 0; i < nz; i++) vals[i] = 1.;
2615     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2616     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2617     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2618     /* v_I */
2619     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2620     for (i = 0; i < nz; i++) vals[i] = 0.;
2621     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2622     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2623     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2624     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2625     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2626     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2627     if (dirIS) {
2628       PetscInt n;
2629 
2630       PetscCall(ISGetLocalSize(dirIS, &n));
2631       PetscCall(ISGetIndices(dirIS, &idxs));
2632       for (i = 0; i < n; i++) vals[i] = 0.;
2633       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2634       PetscCall(ISRestoreIndices(dirIS, &idxs));
2635     }
2636     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2637     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2638     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2639     PetscCall(VecSet(vec3_N, 0.));
2640     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2641     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2642     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2643     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2644     PetscCall(PetscFree(vals));
2645     PetscCall(VecDestroy(&vec3_N));
2646 
2647     /* there should not be any pressure dofs lying on the interface */
2648     PetscCall(PetscCalloc1(pcis->n, &count));
2649     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2650     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2651     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2652     PetscCall(ISGetIndices(zerodiag, &idxs));
2653     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2654     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2655     PetscCall(PetscFree(count));
2656   }
2657   PetscCall(ISDestroy(&dirIS));
2658 
2659   /* check PCBDDCBenignGetOrSetP0 */
2660   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2661   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2662   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2663   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2664   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2665   for (i = 0; i < pcbddc->benign_n; i++) {
2666     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2667     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2668   }
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2673 {
2674   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2675   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2676   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2677   PetscInt  nz, n, benign_n, bsp = 1;
2678   PetscInt *interior_dofs, n_interior_dofs, nneu;
2679   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2680 
2681   PetscFunctionBegin;
2682   if (reuse) goto project_b0;
2683   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2684   PetscCall(MatDestroy(&pcbddc->benign_B0));
2685   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2686   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2687   has_null_pressures = PETSC_TRUE;
2688   have_null          = PETSC_TRUE;
2689   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2690      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2691      Checks if all the pressure dofs in each subdomain have a zero diagonal
2692      If not, a change of basis on pressures is not needed
2693      since the local Schur complements are already SPD
2694   */
2695   if (pcbddc->n_ISForDofsLocal) {
2696     IS        iP = NULL;
2697     PetscInt  p, *pp;
2698     PetscBool flg, blocked = PETSC_FALSE;
2699 
2700     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2701     n = pcbddc->n_ISForDofsLocal;
2702     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2703     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2704     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2705     PetscOptionsEnd();
2706     if (!flg) {
2707       n     = 1;
2708       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2709     }
2710 
2711     bsp = 0;
2712     for (p = 0; p < n; p++) {
2713       PetscInt bs = 1;
2714 
2715       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2716       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2717       bsp += bs;
2718     }
2719     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2720     bsp = 0;
2721     for (p = 0; p < n; p++) {
2722       const PetscInt *idxs;
2723       PetscInt        b, bs = 1, npl, *bidxs;
2724 
2725       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2726       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2727       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2728       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2729       for (b = 0; b < bs; b++) {
2730         PetscInt i;
2731 
2732         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2733         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2734         bsp++;
2735       }
2736       PetscCall(PetscFree(bidxs));
2737       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2738     }
2739     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2740 
2741     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2742     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2743     if (iP) {
2744       IS newpressures;
2745 
2746       PetscCall(ISDifference(pressures, iP, &newpressures));
2747       PetscCall(ISDestroy(&pressures));
2748       pressures = newpressures;
2749     }
2750     PetscCall(ISSorted(pressures, &sorted));
2751     if (!sorted) PetscCall(ISSort(pressures));
2752     PetscCall(PetscFree(pp));
2753   }
2754 
2755   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2756   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2757   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2758   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2759   PetscCall(ISSorted(zerodiag, &sorted));
2760   if (!sorted) PetscCall(ISSort(zerodiag));
2761   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2762   zerodiag_save = zerodiag;
2763   PetscCall(ISGetLocalSize(zerodiag, &nz));
2764   if (!nz) {
2765     if (n) have_null = PETSC_FALSE;
2766     has_null_pressures = PETSC_FALSE;
2767     PetscCall(ISDestroy(&zerodiag));
2768   }
2769   recompute_zerodiag = PETSC_FALSE;
2770 
2771   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2772   zerodiag_subs   = NULL;
2773   benign_n        = 0;
2774   n_interior_dofs = 0;
2775   interior_dofs   = NULL;
2776   nneu            = 0;
2777   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2778   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2779   if (checkb) { /* need to compute interior nodes */
2780     PetscInt               n, i;
2781     PetscInt              *count;
2782     ISLocalToGlobalMapping mapping;
2783 
2784     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2785     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2786     PetscCall(PetscMalloc1(n, &interior_dofs));
2787     for (i = 0; i < n; i++)
2788       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2789     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2790   }
2791   if (has_null_pressures) {
2792     IS             *subs;
2793     PetscInt        nsubs, i, j, nl;
2794     const PetscInt *idxs;
2795     PetscScalar    *array;
2796     Vec            *work;
2797 
2798     subs  = pcbddc->local_subs;
2799     nsubs = pcbddc->n_local_subs;
2800     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2801     if (checkb) {
2802       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2803       PetscCall(ISGetLocalSize(zerodiag, &nl));
2804       PetscCall(ISGetIndices(zerodiag, &idxs));
2805       /* work[0] = 1_p */
2806       PetscCall(VecSet(work[0], 0.));
2807       PetscCall(VecGetArray(work[0], &array));
2808       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2809       PetscCall(VecRestoreArray(work[0], &array));
2810       /* work[0] = 1_v */
2811       PetscCall(VecSet(work[1], 1.));
2812       PetscCall(VecGetArray(work[1], &array));
2813       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2814       PetscCall(VecRestoreArray(work[1], &array));
2815       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2816     }
2817 
2818     if (nsubs > 1 || bsp > 1) {
2819       IS      *is;
2820       PetscInt b, totb;
2821 
2822       totb  = bsp;
2823       is    = bsp > 1 ? bzerodiag : &zerodiag;
2824       nsubs = PetscMax(nsubs, 1);
2825       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2826       for (b = 0; b < totb; b++) {
2827         for (i = 0; i < nsubs; i++) {
2828           ISLocalToGlobalMapping l2g;
2829           IS                     t_zerodiag_subs;
2830           PetscInt               nl;
2831 
2832           if (subs) {
2833             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2834           } else {
2835             IS tis;
2836 
2837             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2838             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2839             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2840             PetscCall(ISDestroy(&tis));
2841           }
2842           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2843           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2844           if (nl) {
2845             PetscBool valid = PETSC_TRUE;
2846 
2847             if (checkb) {
2848               PetscCall(VecSet(matis->x, 0));
2849               PetscCall(ISGetLocalSize(subs[i], &nl));
2850               PetscCall(ISGetIndices(subs[i], &idxs));
2851               PetscCall(VecGetArray(matis->x, &array));
2852               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2853               PetscCall(VecRestoreArray(matis->x, &array));
2854               PetscCall(ISRestoreIndices(subs[i], &idxs));
2855               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2856               PetscCall(MatMult(matis->A, matis->x, matis->y));
2857               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2858               PetscCall(VecGetArray(matis->y, &array));
2859               for (j = 0; j < n_interior_dofs; j++) {
2860                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2861                   valid = PETSC_FALSE;
2862                   break;
2863                 }
2864               }
2865               PetscCall(VecRestoreArray(matis->y, &array));
2866             }
2867             if (valid && nneu) {
2868               const PetscInt *idxs;
2869               PetscInt        nzb;
2870 
2871               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2872               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2873               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2874               if (nzb) valid = PETSC_FALSE;
2875             }
2876             if (valid && pressures) {
2877               IS       t_pressure_subs, tmp;
2878               PetscInt i1, i2;
2879 
2880               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2881               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2882               PetscCall(ISGetLocalSize(tmp, &i1));
2883               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2884               if (i2 != i1) valid = PETSC_FALSE;
2885               PetscCall(ISDestroy(&t_pressure_subs));
2886               PetscCall(ISDestroy(&tmp));
2887             }
2888             if (valid) {
2889               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2890               benign_n++;
2891             } else recompute_zerodiag = PETSC_TRUE;
2892           }
2893           PetscCall(ISDestroy(&t_zerodiag_subs));
2894           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2895         }
2896       }
2897     } else { /* there's just one subdomain (or zero if they have not been detected */
2898       PetscBool valid = PETSC_TRUE;
2899 
2900       if (nneu) valid = PETSC_FALSE;
2901       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2902       if (valid && checkb) {
2903         PetscCall(MatMult(matis->A, work[0], matis->x));
2904         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2905         PetscCall(VecGetArray(matis->x, &array));
2906         for (j = 0; j < n_interior_dofs; j++) {
2907           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2908             valid = PETSC_FALSE;
2909             break;
2910           }
2911         }
2912         PetscCall(VecRestoreArray(matis->x, &array));
2913       }
2914       if (valid) {
2915         benign_n = 1;
2916         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2917         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2918         zerodiag_subs[0] = zerodiag;
2919       }
2920     }
2921     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2922   }
2923   PetscCall(PetscFree(interior_dofs));
2924 
2925   if (!benign_n) {
2926     PetscInt n;
2927 
2928     PetscCall(ISDestroy(&zerodiag));
2929     recompute_zerodiag = PETSC_FALSE;
2930     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2931     if (n) have_null = PETSC_FALSE;
2932   }
2933 
2934   /* final check for null pressures */
2935   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2936 
2937   if (recompute_zerodiag) {
2938     PetscCall(ISDestroy(&zerodiag));
2939     if (benign_n == 1) {
2940       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2941       zerodiag = zerodiag_subs[0];
2942     } else {
2943       PetscInt i, nzn, *new_idxs;
2944 
2945       nzn = 0;
2946       for (i = 0; i < benign_n; i++) {
2947         PetscInt ns;
2948         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2949         nzn += ns;
2950       }
2951       PetscCall(PetscMalloc1(nzn, &new_idxs));
2952       nzn = 0;
2953       for (i = 0; i < benign_n; i++) {
2954         PetscInt ns, *idxs;
2955         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2956         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2957         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2958         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2959         nzn += ns;
2960       }
2961       PetscCall(PetscSortInt(nzn, new_idxs));
2962       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2963     }
2964     have_null = PETSC_FALSE;
2965   }
2966 
2967   /* determines if the coarse solver will be singular or not */
2968   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2969 
2970   /* Prepare matrix to compute no-net-flux */
2971   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2972     Mat                    A, loc_divudotp;
2973     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2974     IS                     row, col, isused = NULL;
2975     PetscInt               M, N, n, st, n_isused;
2976 
2977     if (pressures) {
2978       isused = pressures;
2979     } else {
2980       isused = zerodiag_save;
2981     }
2982     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2983     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2984     PetscCall(MatGetLocalSize(A, &n, NULL));
2985     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2986     n_isused = 0;
2987     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2988     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2989     st = st - n_isused;
2990     if (n) {
2991       const PetscInt *gidxs;
2992 
2993       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2994       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2995       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2996       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2997       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2998       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2999     } else {
3000       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3001       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3002       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3003     }
3004     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3005     PetscCall(ISGetSize(row, &M));
3006     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3007     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3008     PetscCall(ISDestroy(&row));
3009     PetscCall(ISDestroy(&col));
3010     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3011     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3012     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3013     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3014     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3015     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3016     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3017     PetscCall(MatDestroy(&loc_divudotp));
3018     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3019     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3020   }
3021   PetscCall(ISDestroy(&zerodiag_save));
3022   PetscCall(ISDestroy(&pressures));
3023   if (bzerodiag) {
3024     PetscInt i;
3025 
3026     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3027     PetscCall(PetscFree(bzerodiag));
3028   }
3029   pcbddc->benign_n             = benign_n;
3030   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3031 
3032   /* determines if the problem has subdomains with 0 pressure block */
3033   have_null = (PetscBool)(!!pcbddc->benign_n);
3034   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3035 
3036 project_b0:
3037   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3038   /* change of basis and p0 dofs */
3039   if (pcbddc->benign_n) {
3040     PetscInt i, s, *nnz;
3041 
3042     /* local change of basis for pressures */
3043     PetscCall(MatDestroy(&pcbddc->benign_change));
3044     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3045     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3046     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3047     PetscCall(PetscMalloc1(n, &nnz));
3048     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3049     for (i = 0; i < pcbddc->benign_n; i++) {
3050       const PetscInt *idxs;
3051       PetscInt        nzs, j;
3052 
3053       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3054       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3055       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3056       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3057       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3058     }
3059     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3060     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3061     PetscCall(PetscFree(nnz));
3062     /* set identity by default */
3063     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3064     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3065     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3066     /* set change on pressures */
3067     for (s = 0; s < pcbddc->benign_n; s++) {
3068       PetscScalar    *array;
3069       const PetscInt *idxs;
3070       PetscInt        nzs;
3071 
3072       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3073       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3074       for (i = 0; i < nzs - 1; i++) {
3075         PetscScalar vals[2];
3076         PetscInt    cols[2];
3077 
3078         cols[0] = idxs[i];
3079         cols[1] = idxs[nzs - 1];
3080         vals[0] = 1.;
3081         vals[1] = 1.;
3082         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3083       }
3084       PetscCall(PetscMalloc1(nzs, &array));
3085       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3086       array[nzs - 1] = 1.;
3087       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3088       /* store local idxs for p0 */
3089       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3090       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3091       PetscCall(PetscFree(array));
3092     }
3093     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3094     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3095 
3096     /* project if needed */
3097     if (pcbddc->benign_change_explicit) {
3098       Mat M;
3099 
3100       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3101       PetscCall(MatDestroy(&pcbddc->local_mat));
3102       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3103       PetscCall(MatDestroy(&M));
3104     }
3105     /* store global idxs for p0 */
3106     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3107   }
3108   *zerodiaglocal = zerodiag;
3109   PetscFunctionReturn(PETSC_SUCCESS);
3110 }
3111 
3112 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3113 {
3114   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3115   PetscScalar *array;
3116 
3117   PetscFunctionBegin;
3118   if (!pcbddc->benign_sf) {
3119     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3120     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3121   }
3122   if (get) {
3123     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3124     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3125     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3126     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3127   } else {
3128     PetscCall(VecGetArray(v, &array));
3129     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3130     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3131     PetscCall(VecRestoreArray(v, &array));
3132   }
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3137 {
3138   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3139 
3140   PetscFunctionBegin;
3141   /* TODO: add error checking
3142     - avoid nested pop (or push) calls.
3143     - cannot push before pop.
3144     - cannot call this if pcbddc->local_mat is NULL
3145   */
3146   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3147   if (pop) {
3148     if (pcbddc->benign_change_explicit) {
3149       IS       is_p0;
3150       MatReuse reuse;
3151 
3152       /* extract B_0 */
3153       reuse = MAT_INITIAL_MATRIX;
3154       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3155       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3156       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3157       /* remove rows and cols from local problem */
3158       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3159       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3160       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3161       PetscCall(ISDestroy(&is_p0));
3162     } else {
3163       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3164       PetscScalar *vals;
3165       PetscInt     i, n, *idxs_ins;
3166 
3167       PetscCall(VecGetLocalSize(matis->y, &n));
3168       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3169       if (!pcbddc->benign_B0) {
3170         PetscInt *nnz;
3171         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3172         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3173         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3174         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3175         for (i = 0; i < pcbddc->benign_n; i++) {
3176           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3177           nnz[i] = n - nnz[i];
3178         }
3179         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3180         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3181         PetscCall(PetscFree(nnz));
3182       }
3183 
3184       for (i = 0; i < pcbddc->benign_n; i++) {
3185         PetscScalar *array;
3186         PetscInt    *idxs, j, nz, cum;
3187 
3188         PetscCall(VecSet(matis->x, 0.));
3189         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3190         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3191         for (j = 0; j < nz; j++) vals[j] = 1.;
3192         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3193         PetscCall(VecAssemblyBegin(matis->x));
3194         PetscCall(VecAssemblyEnd(matis->x));
3195         PetscCall(VecSet(matis->y, 0.));
3196         PetscCall(MatMult(matis->A, matis->x, matis->y));
3197         PetscCall(VecGetArray(matis->y, &array));
3198         cum = 0;
3199         for (j = 0; j < n; j++) {
3200           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3201             vals[cum]     = array[j];
3202             idxs_ins[cum] = j;
3203             cum++;
3204           }
3205         }
3206         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3207         PetscCall(VecRestoreArray(matis->y, &array));
3208         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3209       }
3210       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3211       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3212       PetscCall(PetscFree2(idxs_ins, vals));
3213     }
3214   } else { /* push */
3215 
3216     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3217     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3218       PetscScalar *B0_vals;
3219       PetscInt    *B0_cols, B0_ncol;
3220 
3221       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3222       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3223       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3224       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3225       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3226     }
3227     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3228     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3229   }
3230   PetscFunctionReturn(PETSC_SUCCESS);
3231 }
3232 
3233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3234 {
3235   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3236   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3237   PetscBLASInt    B_neigs, B_ierr, B_lwork;
3238   PetscBLASInt   *B_iwork, *B_ifail;
3239   PetscScalar    *work, lwork;
3240   PetscScalar    *St, *S, *eigv;
3241   PetscScalar    *Sarray, *Starray;
3242   PetscReal      *eigs, thresh, lthresh, uthresh;
3243   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3244   PetscBool       allocated_S_St, upart;
3245 #if defined(PETSC_USE_COMPLEX)
3246   PetscReal *rwork;
3247 #endif
3248 
3249   PetscFunctionBegin;
3250   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3251   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3252   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3253   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3254              sub_schurs->is_posdef);
3255   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3256 
3257   if (pcbddc->dbg_flag) {
3258     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3259     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3260     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3261     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3262     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3263   }
3264 
3265   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3266 
3267   /* max size of subsets */
3268   mss = 0;
3269   for (i = 0; i < sub_schurs->n_subs; i++) {
3270     PetscInt subset_size;
3271 
3272     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3273     mss = PetscMax(mss, subset_size);
3274   }
3275 
3276   /* min/max and threshold */
3277   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3278   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3279   nmax           = PetscMax(nmin, nmax);
3280   allocated_S_St = PETSC_FALSE;
3281   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3282     allocated_S_St = PETSC_TRUE;
3283   }
3284 
3285   /* allocate lapack workspace */
3286   cum = cum2 = 0;
3287   maxneigs   = 0;
3288   for (i = 0; i < sub_schurs->n_subs; i++) {
3289     PetscInt n, subset_size;
3290 
3291     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3292     n = PetscMin(subset_size, nmax);
3293     cum += subset_size;
3294     cum2 += subset_size * n;
3295     maxneigs = PetscMax(maxneigs, n);
3296   }
3297   lwork = 0;
3298   if (mss) {
3299     PetscScalar  sdummy  = 0.;
3300     PetscBLASInt B_itype = 1;
3301     PetscBLASInt B_N, idummy = 0;
3302     PetscReal    rdummy = 0., zero = 0.0;
3303     PetscReal    eps = 0.0; /* dlamch? */
3304 
3305     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3306     PetscCall(PetscBLASIntCast(mss, &B_N));
3307     B_lwork = -1;
3308     /* some implementations may complain about NULL pointers, even if we are querying */
3309     S       = &sdummy;
3310     St      = &sdummy;
3311     eigs    = &rdummy;
3312     eigv    = &sdummy;
3313     B_iwork = &idummy;
3314     B_ifail = &idummy;
3315 #if defined(PETSC_USE_COMPLEX)
3316     rwork = &rdummy;
3317 #endif
3318     thresh = 1.0;
3319     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3320 #if defined(PETSC_USE_COMPLEX)
3321     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3322 #else
3323     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3324 #endif
3325     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3326     PetscCall(PetscFPTrapPop());
3327   }
3328 
3329   nv = 0;
3330   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3331     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3332   }
3333   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3334   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3335   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3336 #if defined(PETSC_USE_COMPLEX)
3337   PetscCall(PetscMalloc1(7 * mss, &rwork));
3338 #endif
3339   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3340                          &pcbddc->adaptive_constraints_data));
3341   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3342 
3343   maxneigs = 0;
3344   cum = cumarray                           = 0;
3345   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3346   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3347   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3348     const PetscInt *idxs;
3349 
3350     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3351     for (cum = 0; cum < nv; cum++) {
3352       pcbddc->adaptive_constraints_n[cum]            = 1;
3353       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3354       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3355       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3356       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3357     }
3358     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3359   }
3360 
3361   if (mss) { /* multilevel */
3362     if (sub_schurs->gdsw) {
3363       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3364       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3365     } else {
3366       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3367       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3368     }
3369   }
3370 
3371   lthresh = pcbddc->adaptive_threshold[0];
3372   uthresh = pcbddc->adaptive_threshold[1];
3373   upart   = pcbddc->use_deluxe_scaling;
3374   for (i = 0; i < sub_schurs->n_subs; i++) {
3375     const PetscInt *idxs;
3376     PetscReal       upper, lower;
3377     PetscInt        j, subset_size, eigs_start = 0;
3378     PetscBLASInt    B_N;
3379     PetscBool       same_data = PETSC_FALSE;
3380     PetscBool       scal      = PETSC_FALSE;
3381 
3382     if (upart) {
3383       upper = PETSC_MAX_REAL;
3384       lower = uthresh;
3385     } else {
3386       if (sub_schurs->gdsw) {
3387         upper = uthresh;
3388         lower = PETSC_MIN_REAL;
3389       } else {
3390         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3391         upper = 1. / uthresh;
3392         lower = 0.;
3393       }
3394     }
3395     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3396     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3397     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3398     /* this is experimental: we assume the dofs have been properly grouped to have
3399        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3400     if (!sub_schurs->is_posdef) {
3401       Mat T;
3402 
3403       for (j = 0; j < subset_size; j++) {
3404         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3405           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3406           PetscCall(MatScale(T, -1.0));
3407           PetscCall(MatDestroy(&T));
3408           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3409           PetscCall(MatScale(T, -1.0));
3410           PetscCall(MatDestroy(&T));
3411           if (sub_schurs->change_primal_sub) {
3412             PetscInt        nz, k;
3413             const PetscInt *idxs;
3414 
3415             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3416             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3417             for (k = 0; k < nz; k++) {
3418               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3419               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3420             }
3421             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3422           }
3423           scal = PETSC_TRUE;
3424           break;
3425         }
3426       }
3427     }
3428 
3429     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3430       if (sub_schurs->is_symmetric) {
3431         PetscInt j, k;
3432         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3433           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3434           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3435         }
3436         for (j = 0; j < subset_size; j++) {
3437           for (k = j; k < subset_size; k++) {
3438             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3439             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3440           }
3441         }
3442       } else {
3443         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3444         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3445       }
3446     } else {
3447       S  = Sarray + cumarray;
3448       St = Starray + cumarray;
3449     }
3450     /* see if we can save some work */
3451     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3452 
3453     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3454       B_neigs = 0;
3455     } else {
3456       PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3457       PetscReal    eps = -1.0; /* dlamch? */
3458       PetscInt     nmin_s;
3459       PetscBool    compute_range;
3460 
3461       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3462       B_neigs       = 0;
3463       compute_range = (PetscBool)!same_data;
3464       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3465 
3466       if (pcbddc->dbg_flag) {
3467         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3468 
3469         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3470         PetscCall(
3471           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3472       }
3473 
3474       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3475       if (compute_range) {
3476         /* ask for eigenvalues larger than thresh */
3477         if (sub_schurs->is_posdef) {
3478 #if defined(PETSC_USE_COMPLEX)
3479           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3480 #else
3481           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3482 #endif
3483           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3484         } else { /* no theory so far, but it works nicely */
3485           PetscInt  recipe = 0, recipe_m = 1;
3486           PetscReal bb[2];
3487 
3488           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3489           switch (recipe) {
3490           case 0:
3491             if (scal) {
3492               bb[0] = PETSC_MIN_REAL;
3493               bb[1] = lthresh;
3494             } else {
3495               bb[0] = uthresh;
3496               bb[1] = PETSC_MAX_REAL;
3497             }
3498 #if defined(PETSC_USE_COMPLEX)
3499             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3500 #else
3501             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3502 #endif
3503             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3504             break;
3505           case 1:
3506             bb[0] = PETSC_MIN_REAL;
3507             bb[1] = lthresh * lthresh;
3508 #if defined(PETSC_USE_COMPLEX)
3509             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3510 #else
3511             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3512 #endif
3513             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3514             if (!scal) {
3515               PetscBLASInt B_neigs2 = 0;
3516 
3517               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3518               bb[1] = PETSC_MAX_REAL;
3519               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3520               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3521 #if defined(PETSC_USE_COMPLEX)
3522               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3523 #else
3524               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3525 #endif
3526               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3527               B_neigs += B_neigs2;
3528             }
3529             break;
3530           case 2:
3531             if (scal) {
3532               bb[0] = PETSC_MIN_REAL;
3533               bb[1] = 0;
3534 #if defined(PETSC_USE_COMPLEX)
3535               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3536 #else
3537               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3538 #endif
3539               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3540             } else {
3541               PetscBLASInt B_neigs2 = 0;
3542               PetscBool    do_copy  = PETSC_FALSE;
3543 
3544               lthresh = PetscMax(lthresh, 0.0);
3545               if (lthresh > 0.0) {
3546                 bb[0] = PETSC_MIN_REAL;
3547                 bb[1] = lthresh * lthresh;
3548 
3549                 do_copy = PETSC_TRUE;
3550 #if defined(PETSC_USE_COMPLEX)
3551                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3552 #else
3553                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3554 #endif
3555                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3556               }
3557               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3558               bb[1] = PETSC_MAX_REAL;
3559               if (do_copy) {
3560                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3561                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3562               }
3563 #if defined(PETSC_USE_COMPLEX)
3564               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3565 #else
3566               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3567 #endif
3568               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3569               B_neigs += B_neigs2;
3570             }
3571             break;
3572           case 3:
3573             if (scal) {
3574               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3575             } else {
3576               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3577             }
3578             if (!scal) {
3579               bb[0] = uthresh;
3580               bb[1] = PETSC_MAX_REAL;
3581 #if defined(PETSC_USE_COMPLEX)
3582               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3583 #else
3584               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3585 #endif
3586               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3587             }
3588             if (recipe_m > 0 && B_N - B_neigs > 0) {
3589               PetscBLASInt B_neigs2 = 0;
3590 
3591               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3592               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3593               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3594 #if defined(PETSC_USE_COMPLEX)
3595               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3596 #else
3597               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3598 #endif
3599               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3600               B_neigs += B_neigs2;
3601             }
3602             break;
3603           case 4:
3604             bb[0] = PETSC_MIN_REAL;
3605             bb[1] = lthresh;
3606 #if defined(PETSC_USE_COMPLEX)
3607             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3608 #else
3609             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3610 #endif
3611             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3612             {
3613               PetscBLASInt B_neigs2 = 0;
3614 
3615               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3616               bb[1] = PETSC_MAX_REAL;
3617               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3618               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3619 #if defined(PETSC_USE_COMPLEX)
3620               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3621 #else
3622               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3623 #endif
3624               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3625               B_neigs += B_neigs2;
3626             }
3627             break;
3628           case 5: /* same as before: first compute all eigenvalues, then filter */
3629 #if defined(PETSC_USE_COMPLEX)
3630             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3631 #else
3632             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3633 #endif
3634             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3635             {
3636               PetscInt e, k, ne;
3637               for (e = 0, ne = 0; e < B_neigs; e++) {
3638                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3639                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3640                   eigs[ne] = eigs[e];
3641                   ne++;
3642                 }
3643               }
3644               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3645               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3646             }
3647             break;
3648           default:
3649             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3650           }
3651         }
3652       } else if (!same_data) { /* this is just to see all the eigenvalues */
3653         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3654 #if defined(PETSC_USE_COMPLEX)
3655         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3656 #else
3657         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3658 #endif
3659         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3660       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3661         PetscInt k;
3662         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3663         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3664         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3665         nmin = nmax;
3666         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3667         for (k = 0; k < nmax; k++) {
3668           eigs[k]                     = 1. / PETSC_SMALL;
3669           eigv[k * (subset_size + 1)] = 1.0;
3670         }
3671       }
3672       PetscCall(PetscFPTrapPop());
3673       if (B_ierr) {
3674         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3675         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3676         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3677       }
3678 
3679       if (B_neigs > nmax) {
3680         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3681         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3682         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3683       }
3684 
3685       nmin_s = PetscMin(nmin, B_N);
3686       if (B_neigs < nmin_s) {
3687         PetscBLASInt B_neigs2 = 0;
3688 
3689         if (upart) {
3690           if (scal) {
3691             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3692             B_IL = B_neigs + 1;
3693           } else {
3694             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3695             B_IU = B_N - B_neigs;
3696           }
3697         } else {
3698           B_IL = B_neigs + 1;
3699           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3700         }
3701         if (pcbddc->dbg_flag) {
3702           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3703         }
3704         if (sub_schurs->is_symmetric) {
3705           PetscInt j, k;
3706           for (j = 0; j < subset_size; j++) {
3707             for (k = j; k < subset_size; k++) {
3708               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3709               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3710             }
3711           }
3712         } else {
3713           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3714           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3715         }
3716         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3717 #if defined(PETSC_USE_COMPLEX)
3718         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3719 #else
3720         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3721 #endif
3722         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3723         PetscCall(PetscFPTrapPop());
3724         B_neigs += B_neigs2;
3725       }
3726       if (B_ierr) {
3727         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3728         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3729         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3730       }
3731       if (pcbddc->dbg_flag) {
3732         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3733         for (j = 0; j < B_neigs; j++) {
3734           if (!sub_schurs->gdsw) {
3735             if (eigs[j] == 0.0) {
3736               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3737             } else {
3738               if (upart) {
3739                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3740               } else {
3741                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3742               }
3743             }
3744           } else {
3745             double pg = (double)eigs[j + eigs_start];
3746             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3747             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3748           }
3749         }
3750       }
3751     }
3752     /* change the basis back to the original one */
3753     if (sub_schurs->change) {
3754       Mat change, phi, phit;
3755 
3756       if (pcbddc->dbg_flag > 2) {
3757         PetscInt ii;
3758         for (ii = 0; ii < B_neigs; ii++) {
3759           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3760           for (j = 0; j < B_N; j++) {
3761 #if defined(PETSC_USE_COMPLEX)
3762             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3763             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3764             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3765 #else
3766             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3767 #endif
3768           }
3769         }
3770       }
3771       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3772       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3773       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3774       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3775       PetscCall(MatDestroy(&phit));
3776       PetscCall(MatDestroy(&phi));
3777     }
3778     maxneigs                               = PetscMax(B_neigs, maxneigs);
3779     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3780     if (B_neigs) {
3781       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3782 
3783       if (pcbddc->dbg_flag > 1) {
3784         PetscInt ii;
3785         for (ii = 0; ii < B_neigs; ii++) {
3786           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3787           for (j = 0; j < B_N; j++) {
3788 #if defined(PETSC_USE_COMPLEX)
3789             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3790             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3791             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3792 #else
3793             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3794 #endif
3795           }
3796         }
3797       }
3798       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3799       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3800       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3801       cum++;
3802     }
3803     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3804     /* shift for next computation */
3805     cumarray += subset_size * subset_size;
3806   }
3807   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3808 
3809   if (mss) {
3810     if (sub_schurs->gdsw) {
3811       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3812       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3813     } else {
3814       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3815       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3816       /* destroy matrices (junk) */
3817       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3818       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3819     }
3820   }
3821   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3822   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3823 #if defined(PETSC_USE_COMPLEX)
3824   PetscCall(PetscFree(rwork));
3825 #endif
3826   if (pcbddc->dbg_flag) {
3827     PetscInt maxneigs_r;
3828     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3829     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3830   }
3831   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3832   PetscFunctionReturn(PETSC_SUCCESS);
3833 }
3834 
3835 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3836 {
3837   Mat coarse_submat;
3838 
3839   PetscFunctionBegin;
3840   /* Setup local scatters R_to_B and (optionally) R_to_D */
3841   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3842   PetscCall(PCBDDCSetUpLocalScatters(pc));
3843 
3844   /* Setup local neumann solver ksp_R */
3845   /* PCBDDCSetUpLocalScatters should be called first! */
3846   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3847 
3848   /*
3849      Setup local correction and local part of coarse basis.
3850      Gives back the dense local part of the coarse matrix in column major ordering
3851   */
3852   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3853 
3854   /* Compute total number of coarse nodes and setup coarse solver */
3855   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3856   PetscCall(MatDestroy(&coarse_submat));
3857   PetscFunctionReturn(PETSC_SUCCESS);
3858 }
3859 
3860 PetscErrorCode PCBDDCResetCustomization(PC pc)
3861 {
3862   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3863 
3864   PetscFunctionBegin;
3865   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3866   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3867   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3868   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3869   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3870   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3871   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3872   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3873   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3874   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3875   PetscFunctionReturn(PETSC_SUCCESS);
3876 }
3877 
3878 PetscErrorCode PCBDDCResetTopography(PC pc)
3879 {
3880   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3881   PetscInt i;
3882 
3883   PetscFunctionBegin;
3884   PetscCall(MatDestroy(&pcbddc->nedcG));
3885   PetscCall(ISDestroy(&pcbddc->nedclocal));
3886   PetscCall(MatDestroy(&pcbddc->discretegradient));
3887   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3888   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3889   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3890   PetscCall(VecDestroy(&pcbddc->work_change));
3891   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3892   PetscCall(MatDestroy(&pcbddc->divudotp));
3893   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3894   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3895   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3896   pcbddc->n_local_subs = 0;
3897   PetscCall(PetscFree(pcbddc->local_subs));
3898   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3899   pcbddc->graphanalyzed        = PETSC_FALSE;
3900   pcbddc->recompute_topography = PETSC_TRUE;
3901   pcbddc->corner_selected      = PETSC_FALSE;
3902   PetscFunctionReturn(PETSC_SUCCESS);
3903 }
3904 
3905 PetscErrorCode PCBDDCResetSolvers(PC pc)
3906 {
3907   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3908 
3909   PetscFunctionBegin;
3910   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3911   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3912   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3913   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3914   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3915   PetscCall(VecDestroy(&pcbddc->vec1_P));
3916   PetscCall(VecDestroy(&pcbddc->vec1_C));
3917   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3918   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3919   PetscCall(VecDestroy(&pcbddc->vec1_R));
3920   PetscCall(VecDestroy(&pcbddc->vec2_R));
3921   PetscCall(ISDestroy(&pcbddc->is_R_local));
3922   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3923   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3924   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3925   PetscCall(KSPReset(pcbddc->ksp_D));
3926   PetscCall(KSPReset(pcbddc->ksp_R));
3927   PetscCall(KSPReset(pcbddc->coarse_ksp));
3928   PetscCall(MatDestroy(&pcbddc->local_mat));
3929   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3930   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3931   PetscCall(PetscFree(pcbddc->global_primal_indices));
3932   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3933   PetscCall(MatDestroy(&pcbddc->benign_change));
3934   PetscCall(VecDestroy(&pcbddc->benign_vec));
3935   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3936   PetscCall(MatDestroy(&pcbddc->benign_B0));
3937   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3938   if (pcbddc->benign_zerodiag_subs) {
3939     PetscInt i;
3940     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3941     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3942   }
3943   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3944   PetscFunctionReturn(PETSC_SUCCESS);
3945 }
3946 
3947 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3948 {
3949   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3950   PC_IS   *pcis   = (PC_IS *)pc->data;
3951   VecType  impVecType;
3952   PetscInt n_constraints, n_R, old_size;
3953 
3954   PetscFunctionBegin;
3955   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3956   n_R           = pcis->n - pcbddc->n_vertices;
3957   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3958   /* local work vectors (try to avoid unneeded work)*/
3959   /* R nodes */
3960   old_size = -1;
3961   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3962   if (n_R != old_size) {
3963     PetscCall(VecDestroy(&pcbddc->vec1_R));
3964     PetscCall(VecDestroy(&pcbddc->vec2_R));
3965     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3966     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3967     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3968     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3969   }
3970   /* local primal dofs */
3971   old_size = -1;
3972   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3973   if (pcbddc->local_primal_size != old_size) {
3974     PetscCall(VecDestroy(&pcbddc->vec1_P));
3975     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3976     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3977     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3978   }
3979   /* local explicit constraints */
3980   old_size = -1;
3981   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3982   if (n_constraints && n_constraints != old_size) {
3983     PetscCall(VecDestroy(&pcbddc->vec1_C));
3984     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3985     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3986     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3987   }
3988   PetscFunctionReturn(PETSC_SUCCESS);
3989 }
3990 
3991 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
3992 {
3993   PetscBool          flg;
3994   const PetscScalar *a;
3995 
3996   PetscFunctionBegin;
3997   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
3998   if (flg) {
3999     PetscCall(MatDenseGetArrayRead(S, &a));
4000     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4001     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4002     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4003     PetscCall(MatDenseRestoreArrayRead(S, &a));
4004   } else {
4005     const PetscInt *ii, *jj;
4006     PetscInt        n;
4007     PetscInt        buf[8192], *bufc = NULL;
4008     PetscBool       freeb = PETSC_FALSE;
4009     Mat             Sm    = S;
4010 
4011     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4012     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4013     else PetscCall(PetscObjectReference((PetscObject)S));
4014     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4015     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4016     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4017     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4018       bufc = buf;
4019     } else {
4020       PetscCall(PetscMalloc1(nc, &bufc));
4021       freeb = PETSC_TRUE;
4022     }
4023 
4024     for (PetscInt i = 0; i < n; i++) {
4025       const PetscInt nci = ii[i + 1] - ii[i];
4026 
4027       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4028       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4029     }
4030     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4031     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4032     PetscCall(MatDestroy(&Sm));
4033     if (freeb) PetscCall(PetscFree(bufc));
4034   }
4035   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4036   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4037   PetscFunctionReturn(PETSC_SUCCESS);
4038 }
4039 
4040 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4041 {
4042   Mat_SeqAIJ        *aij;
4043   PetscInt          *ii, *jj;
4044   PetscScalar       *aa;
4045   PetscInt           nnz = 0, m, nc;
4046   const PetscScalar *a;
4047   const PetscScalar  zero = 0.0;
4048 
4049   PetscFunctionBegin;
4050   PetscCall(MatGetLocalSize(D, &m, &nc));
4051   PetscCall(MatDenseGetArrayRead(D, &a));
4052   PetscCall(PetscMalloc1(m + 1, &ii));
4053   PetscCall(PetscMalloc1(m * nc, &jj));
4054   PetscCall(PetscMalloc1(m * nc, &aa));
4055   ii[0] = 0;
4056   for (PetscInt k = 0; k < m; k++) {
4057     for (PetscInt s = 0; s < nc; s++) {
4058       const PetscInt    c = s + k * nc;
4059       const PetscScalar v = a[k + s * m];
4060 
4061       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4062       jj[nnz] = j[c];
4063       aa[nnz] = a[k + s * m];
4064       nnz++;
4065     }
4066     ii[k + 1] = nnz;
4067   }
4068 
4069   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4070   PetscCall(MatDenseRestoreArrayRead(D, &a));
4071 
4072   aij          = (Mat_SeqAIJ *)(*mat)->data;
4073   aij->free_a  = PETSC_TRUE;
4074   aij->free_ij = PETSC_TRUE;
4075   PetscFunctionReturn(PETSC_SUCCESS);
4076 }
4077 
4078 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4079 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4080 {
4081   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4082   const PetscBool allowzeropivot    = PETSC_FALSE;
4083   PetscBool       zeropivotdetected = PETSC_FALSE;
4084   const PetscReal shift             = 0.0;
4085   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4086   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4087   PetscLogDouble  flops = 0.0;
4088 
4089   PetscFunctionBegin;
4090   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4091   for (PetscInt i = 0; i < nblocks; i++) {
4092     ncnt += bsizes[i];
4093     ncnt2 += PetscSqr(bsizes[i]);
4094   }
4095   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4096   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4097   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4098 
4099   PetscCall(PetscMalloc1(n + 1, &ii));
4100   PetscCall(PetscMalloc1(ncnt2, &jj));
4101   PetscCall(PetscCalloc1(ncnt2, &aa));
4102 
4103   ncnt  = 0;
4104   ii[0] = 0;
4105   indi  = ii;
4106   indj  = jj;
4107   diag  = aa;
4108   for (PetscInt i = 0; i < nblocks; i++) {
4109     const PetscInt bs = bsizes[i];
4110 
4111     for (PetscInt k = 0; k < bs; k++) {
4112       indi[k + 1] = indi[k] + bs;
4113       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4114     }
4115     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4116     switch (bs) {
4117     case 1:
4118       *diag = 1.0 / (*diag);
4119       break;
4120     case 2:
4121       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4122       break;
4123     case 3:
4124       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4125       break;
4126     case 4:
4127       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4128       break;
4129     case 5:
4130       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4131       break;
4132     case 6:
4133       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4134       break;
4135     case 7:
4136       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4137       break;
4138     default:
4139       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4140     }
4141     ncnt += bs;
4142     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4143     diag += bs * bs;
4144     indj += bs * bs;
4145     indi += bs;
4146   }
4147   PetscCall(PetscLogFlops(flops));
4148   PetscCall(PetscFree2(v_work, v_pivots));
4149   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4150   {
4151     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4152     aij->free_a     = PETSC_TRUE;
4153     aij->free_ij    = PETSC_TRUE;
4154   }
4155   PetscFunctionReturn(PETSC_SUCCESS);
4156 }
4157 
4158 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4159 {
4160   const PetscScalar *rarr;
4161   PetscScalar       *larr;
4162   PetscSF            vsf;
4163   PetscInt           n, rld, lld;
4164 
4165   PetscFunctionBegin;
4166   PetscCall(MatGetSize(A, NULL, &n));
4167   PetscCall(MatDenseGetLDA(A, &rld));
4168   PetscCall(MatDenseGetLDA(B, &lld));
4169   PetscCall(MatDenseGetArrayRead(A, &rarr));
4170   PetscCall(MatDenseGetArrayWrite(B, &larr));
4171   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4172   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4173   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4174   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4175   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4176   PetscCall(PetscSFDestroy(&vsf));
4177   PetscFunctionReturn(PETSC_SUCCESS);
4178 }
4179 
4180 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4181 {
4182   PC_IS          *pcis       = (PC_IS *)pc->data;
4183   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4184   PCBDDCGraph     graph      = pcbddc->mat_graph;
4185   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4186   /* submatrices of local problem */
4187   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4188   /* submatrices of local coarse problem */
4189   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4190   /* working matrices */
4191   Mat C_CR;
4192 
4193   /* additional working stuff */
4194   PC              pc_R;
4195   IS              is_R, is_V, is_C;
4196   const PetscInt *idx_V, *idx_C;
4197   Mat             F, Brhs = NULL;
4198   Vec             dummy_vec;
4199   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4200   PetscInt       *idx_V_B;
4201   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4202   PetscInt        n_eff_vertices, n_eff_constraints;
4203   PetscInt        i, n_R, n_D, n_B;
4204   PetscScalar     one = 1.0, m_one = -1.0;
4205 
4206   /* Multi-element support */
4207   PetscBool multi_element = graph->multi_element;
4208   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4209   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4210   IS        is_C_perm = NULL;
4211   PetscInt  n_C_bss = 0, *C_bss = NULL;
4212   Mat       coarse_phi_multi;
4213 
4214   PetscFunctionBegin;
4215   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4216   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4217 
4218   /* Set Non-overlapping dimensions */
4219   n_vertices    = pcbddc->n_vertices;
4220   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4221   n_B           = pcis->n_B;
4222   n_D           = pcis->n - n_B;
4223   n_R           = pcis->n - n_vertices;
4224 
4225   /* vertices in boundary numbering */
4226   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4227   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4228   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4229 
4230   /* these two cases still need to be optimized */
4231   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4232 
4233   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4234   if (multi_element) {
4235     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4236 
4237     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4238     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4239     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4240     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4241     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4242 
4243     /* group vertices and constraints by subdomain id */
4244     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4245     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4246     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4247     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4248 
4249     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4250     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4251     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4252     for (PetscInt i = 0; i < n_vertices; i++) {
4253       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4254 
4255       V_to_eff_V[i] = count_eff[s];
4256       count_eff[s] += 1;
4257     }
4258     for (PetscInt i = 0; i < n_constraints; i++) {
4259       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4260 
4261       C_to_eff_C[i] = count_eff[s];
4262       count_eff[s] += 1;
4263     }
4264 
4265     /* preallocation */
4266     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4267     for (PetscInt i = 0; i < n_vertices; i++) {
4268       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4269 
4270       nnz[i] = count_eff[s] + count_eff[s + 1];
4271     }
4272     for (PetscInt i = 0; i < n_constraints; i++) {
4273       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4274 
4275       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4276     }
4277     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4278     PetscCall(PetscFree(nnz));
4279 
4280     n_eff_vertices    = 0;
4281     n_eff_constraints = 0;
4282     for (PetscInt i = 0; i < n_el; i++) {
4283       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4284       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4285       count_eff[2 * i]     = 0;
4286       count_eff[2 * i + 1] = 0;
4287     }
4288 
4289     const PetscInt *idx;
4290     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4291 
4292     for (PetscInt i = 0; i < n_vertices; i++) {
4293       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4294       const PetscInt s = 2 * e;
4295 
4296       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4297       count_eff[s] += 1;
4298     }
4299     for (PetscInt i = 0; i < n_constraints; i++) {
4300       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4301       const PetscInt s = 2 * e + 1;
4302 
4303       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4304       count_eff[s] += 1;
4305     }
4306 
4307     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4308     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4309     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4310     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4311     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4312     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4313     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4314     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4315 
4316     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4317     for (PetscInt i = 0; i < n_R; i++) {
4318       const PetscInt e = graph->nodes[idx[i]].local_sub;
4319       const PetscInt s = 2 * e;
4320       PetscInt       j;
4321 
4322       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4323       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4324     }
4325     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4326     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4327     for (PetscInt i = 0; i < n_B; i++) {
4328       const PetscInt e = graph->nodes[idx[i]].local_sub;
4329       const PetscInt s = 2 * e;
4330       PetscInt       j;
4331 
4332       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4333       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4334     }
4335     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4336 
4337     /* permutation and blocksizes for block invert of S_CC */
4338     PetscInt *idxp;
4339 
4340     PetscCall(PetscMalloc1(n_constraints, &idxp));
4341     PetscCall(PetscMalloc1(n_el, &C_bss));
4342     n_C_bss = 0;
4343     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4344       const PetscInt nc = count_eff[2 * e + 1];
4345 
4346       if (nc) C_bss[n_C_bss++] = nc;
4347       for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4348       cnt += nc;
4349     }
4350 
4351     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4352 
4353     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4354     PetscCall(PetscFree(count_eff));
4355   } else {
4356     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4357     n_eff_constraints = n_constraints;
4358     n_eff_vertices    = n_vertices;
4359   }
4360 
4361   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4362   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4363   PetscCall(PCSetUp(pc_R));
4364   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4365   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4366   lda_rhs                = n_R;
4367   need_benign_correction = PETSC_FALSE;
4368   if (isLU || isCHOL) {
4369     PetscCall(PCFactorGetMatrix(pc_R, &F));
4370   } else if (sub_schurs && sub_schurs->reuse_solver) {
4371     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4372     MatFactorType      type;
4373 
4374     F = reuse_solver->F;
4375     PetscCall(MatGetFactorType(F, &type));
4376     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4377     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4378     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4379     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4380   } else F = NULL;
4381 
4382   /* determine if we can use a sparse right-hand side */
4383   sparserhs = PETSC_FALSE;
4384   if (F && !multi_element) {
4385     MatSolverType solver;
4386 
4387     PetscCall(MatFactorGetSolverType(F, &solver));
4388     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4389   }
4390 
4391   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4392   dummy_vec = NULL;
4393   if (need_benign_correction && lda_rhs != n_R && F) {
4394     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4395     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4396     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4397   }
4398 
4399   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4400   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4401 
4402   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4403   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4404   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4405   PetscCall(ISGetIndices(is_V, &idx_V));
4406   PetscCall(ISGetIndices(is_C, &idx_C));
4407 
4408   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4409   if (n_constraints) {
4410     Mat C_B;
4411 
4412     /* Extract constraints on R nodes: C_{CR}  */
4413     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4414     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4415 
4416     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4417     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4418     if (!sparserhs) {
4419       PetscScalar *marr;
4420 
4421       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4422       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4423       for (i = 0; i < n_constraints; i++) {
4424         const PetscScalar *row_cmat_values;
4425         const PetscInt    *row_cmat_indices;
4426         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4427 
4428         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4429         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4430         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4431       }
4432       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4433     } else {
4434       Mat tC_CR;
4435 
4436       PetscCall(MatScale(C_CR, -1.0));
4437       if (lda_rhs != n_R) {
4438         PetscScalar *aa;
4439         PetscInt     r, *ii, *jj;
4440         PetscBool    done;
4441 
4442         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4443         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4444         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4445         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4446         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4447         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4448       } else {
4449         PetscCall(PetscObjectReference((PetscObject)C_CR));
4450         tC_CR = C_CR;
4451       }
4452       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4453       PetscCall(MatDestroy(&tC_CR));
4454     }
4455     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4456     if (F) {
4457       if (need_benign_correction) {
4458         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4459 
4460         /* rhs is already zero on interior dofs, no need to change the rhs */
4461         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4462       }
4463       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4464       if (need_benign_correction) {
4465         PetscScalar       *marr;
4466         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4467 
4468         /* XXX multi_element? */
4469         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4470         if (lda_rhs != n_R) {
4471           for (i = 0; i < n_eff_constraints; i++) {
4472             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4473             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4474             PetscCall(VecResetArray(dummy_vec));
4475           }
4476         } else {
4477           for (i = 0; i < n_eff_constraints; i++) {
4478             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4479             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4480             PetscCall(VecResetArray(pcbddc->vec1_R));
4481           }
4482         }
4483         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4484       }
4485     } else {
4486       const PetscScalar *barr;
4487       PetscScalar       *marr;
4488 
4489       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4490       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4491       for (i = 0; i < n_eff_constraints; i++) {
4492         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4493         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4494         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4495         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4496         PetscCall(VecResetArray(pcbddc->vec1_R));
4497         PetscCall(VecResetArray(pcbddc->vec2_R));
4498       }
4499       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4500       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4501     }
4502     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4503     PetscCall(MatDestroy(&Brhs));
4504     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4505     if (!pcbddc->switch_static) {
4506       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4507       for (i = 0; i < n_eff_constraints; i++) {
4508         Vec r, b;
4509         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4510         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4511         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4512         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4513         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4514         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4515       }
4516       if (multi_element) {
4517         Mat T;
4518 
4519         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4520         PetscCall(MatDestroy(&local_auxmat2_R));
4521         local_auxmat2_R = T;
4522         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4523         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4524         pcbddc->local_auxmat2 = T;
4525       }
4526       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4527     } else {
4528       if (multi_element) {
4529         Mat T;
4530 
4531         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4532         PetscCall(MatDestroy(&local_auxmat2_R));
4533         local_auxmat2_R = T;
4534       }
4535       if (lda_rhs != n_R) {
4536         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4537       } else {
4538         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4539         pcbddc->local_auxmat2 = local_auxmat2_R;
4540       }
4541       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4542     }
4543     PetscCall(MatScale(S_CC, m_one));
4544     if (multi_element) {
4545       Mat T, T2;
4546       IS  isp, ispi;
4547 
4548       isp = is_C_perm;
4549 
4550       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4551       PetscCall(MatPermute(S_CC, isp, isp, &T));
4552       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4553       PetscCall(MatDestroy(&T));
4554       PetscCall(MatDestroy(&S_CC));
4555       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4556       PetscCall(MatDestroy(&T2));
4557       PetscCall(ISDestroy(&ispi));
4558     } else {
4559       if (isCHOL) {
4560         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4561       } else {
4562         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4563       }
4564       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4565     }
4566     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4567     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4568     PetscCall(MatDestroy(&C_B));
4569     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4570   }
4571 
4572   /* Get submatrices from subdomain matrix */
4573   if (n_vertices) {
4574 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4575     PetscBool oldpin;
4576 #endif
4577     IS is_aux;
4578 
4579     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4580       IS tis;
4581 
4582       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4583       PetscCall(ISSort(tis));
4584       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4585       PetscCall(ISDestroy(&tis));
4586     } else {
4587       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4588     }
4589 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4590     oldpin = pcbddc->local_mat->boundtocpu;
4591 #endif
4592     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4593     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4594     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4595     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4596     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4597     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4598 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4599     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4600 #endif
4601     PetscCall(ISDestroy(&is_aux));
4602   }
4603   PetscCall(ISDestroy(&is_C_perm));
4604   PetscCall(PetscFree(C_bss));
4605 
4606   p0_lidx_I = NULL;
4607   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4608     const PetscInt *idxs;
4609 
4610     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4611     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4612     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4613     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4614   }
4615 
4616   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4617 
4618   /* Matrices of coarse basis functions (local) */
4619   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4620   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4621   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4622   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4623   if (!multi_element) {
4624     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4625     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4626     coarse_phi_multi = NULL;
4627   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4628     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4629     IS is_cols[2] = {is_V, is_C};
4630 
4631     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4632     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4633     PetscCall(ISDestroy(&is_rows[1]));
4634   }
4635 
4636   /* vertices */
4637   if (n_vertices) {
4638     PetscBool restoreavr = PETSC_FALSE;
4639     Mat       A_RRmA_RV  = NULL;
4640 
4641     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4642     PetscCall(MatDestroy(&A_VV));
4643 
4644     if (n_R) {
4645       Mat A_RV_bcorr = NULL, S_VV;
4646 
4647       PetscCall(MatScale(A_RV, m_one));
4648       if (need_benign_correction) {
4649         ISLocalToGlobalMapping RtoN;
4650         IS                     is_p0;
4651         PetscInt              *idxs_p0, n;
4652 
4653         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4654         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4655         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4656         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4657         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4658         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4659         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4660         PetscCall(ISDestroy(&is_p0));
4661       }
4662 
4663       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4664       if (!sparserhs || need_benign_correction) {
4665         if (lda_rhs == n_R && !multi_element) {
4666           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4667         } else {
4668           Mat             T;
4669           PetscScalar    *av, *array;
4670           const PetscInt *xadj, *adjncy;
4671           PetscInt        n;
4672           PetscBool       flg_row;
4673 
4674           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4675           PetscCall(MatDenseGetArrayWrite(T, &array));
4676           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4677           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4678           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4679           for (i = 0; i < n; i++) {
4680             PetscInt j;
4681             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4682           }
4683           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4684           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4685           PetscCall(MatDestroy(&A_RV));
4686           A_RV = T;
4687         }
4688         if (need_benign_correction) {
4689           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4690           PetscScalar       *marr;
4691 
4692           /* XXX multi_element */
4693           PetscCall(MatDenseGetArray(A_RV, &marr));
4694           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4695 
4696                  | 0 0  0 | (V)
4697              L = | 0 0 -1 | (P-p0)
4698                  | 0 0 -1 | (p0)
4699 
4700           */
4701           for (i = 0; i < reuse_solver->benign_n; i++) {
4702             const PetscScalar *vals;
4703             const PetscInt    *idxs, *idxs_zero;
4704             PetscInt           n, j, nz;
4705 
4706             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4707             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4708             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4709             for (j = 0; j < n; j++) {
4710               PetscScalar val = vals[j];
4711               PetscInt    k, col = idxs[j];
4712               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4713             }
4714             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4715             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4716           }
4717           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4718         }
4719         PetscCall(PetscObjectReference((PetscObject)A_RV));
4720         Brhs = A_RV;
4721       } else {
4722         Mat tA_RVT, A_RVT;
4723 
4724         if (!pcbddc->symmetric_primal) {
4725           /* A_RV already scaled by -1 */
4726           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4727         } else {
4728           restoreavr = PETSC_TRUE;
4729           PetscCall(MatScale(A_VR, -1.0));
4730           PetscCall(PetscObjectReference((PetscObject)A_VR));
4731           A_RVT = A_VR;
4732         }
4733         if (lda_rhs != n_R) {
4734           PetscScalar *aa;
4735           PetscInt     r, *ii, *jj;
4736           PetscBool    done;
4737 
4738           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4739           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4740           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4741           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4742           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4743           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4744         } else {
4745           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4746           tA_RVT = A_RVT;
4747         }
4748         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4749         PetscCall(MatDestroy(&tA_RVT));
4750         PetscCall(MatDestroy(&A_RVT));
4751       }
4752       if (F) {
4753         /* need to correct the rhs */
4754         if (need_benign_correction) {
4755           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4756           PetscScalar       *marr;
4757 
4758           PetscCall(MatDenseGetArray(Brhs, &marr));
4759           if (lda_rhs != n_R) {
4760             for (i = 0; i < n_eff_vertices; i++) {
4761               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4762               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4763               PetscCall(VecResetArray(dummy_vec));
4764             }
4765           } else {
4766             for (i = 0; i < n_eff_vertices; i++) {
4767               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4768               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4769               PetscCall(VecResetArray(pcbddc->vec1_R));
4770             }
4771           }
4772           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4773         }
4774         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4775         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4776         /* need to correct the solution */
4777         if (need_benign_correction) {
4778           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4779           PetscScalar       *marr;
4780 
4781           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4782           if (lda_rhs != n_R) {
4783             for (i = 0; i < n_eff_vertices; i++) {
4784               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4785               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4786               PetscCall(VecResetArray(dummy_vec));
4787             }
4788           } else {
4789             for (i = 0; i < n_eff_vertices; i++) {
4790               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4791               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4792               PetscCall(VecResetArray(pcbddc->vec1_R));
4793             }
4794           }
4795           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4796         }
4797       } else {
4798         const PetscScalar *barr;
4799         PetscScalar       *marr;
4800 
4801         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4802         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4803         for (i = 0; i < n_eff_vertices; i++) {
4804           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4805           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4806           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4807           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4808           PetscCall(VecResetArray(pcbddc->vec1_R));
4809           PetscCall(VecResetArray(pcbddc->vec2_R));
4810         }
4811         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4812         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4813       }
4814       PetscCall(MatDestroy(&A_RV));
4815       PetscCall(MatDestroy(&Brhs));
4816       /* S_VV and S_CV */
4817       if (n_constraints) {
4818         Mat B;
4819 
4820         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4821         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4822 
4823         /* S_CV = pcbddc->local_auxmat1 * B */
4824         if (multi_element) {
4825           Mat T;
4826 
4827           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4828           PetscCall(MatDestroy(&B));
4829           B = T;
4830         }
4831         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4832         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4833         PetscCall(MatProductSetFromOptions(S_CV));
4834         PetscCall(MatProductSymbolic(S_CV));
4835         PetscCall(MatProductNumeric(S_CV));
4836         PetscCall(MatProductClear(S_CV));
4837         PetscCall(MatDestroy(&B));
4838 
4839         /* B = local_auxmat2_R * S_CV */
4840         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4841         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4842         PetscCall(MatProductSetFromOptions(B));
4843         PetscCall(MatProductSymbolic(B));
4844         PetscCall(MatProductNumeric(B));
4845 
4846         PetscCall(MatScale(S_CV, m_one));
4847         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4848 
4849         if (multi_element) {
4850           Mat T;
4851 
4852           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4853           PetscCall(MatDestroy(&A_RRmA_RV));
4854           A_RRmA_RV = T;
4855         }
4856         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4857         PetscCall(MatDestroy(&B));
4858       } else if (multi_element) {
4859         Mat T;
4860 
4861         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4862         PetscCall(MatDestroy(&A_RRmA_RV));
4863         A_RRmA_RV = T;
4864       }
4865 
4866       if (lda_rhs != n_R) {
4867         Mat T;
4868 
4869         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4870         PetscCall(MatDestroy(&A_RRmA_RV));
4871         A_RRmA_RV = T;
4872       }
4873 
4874       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4875       if (need_benign_correction) { /* XXX SPARSE */
4876         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4877         PetscScalar       *sums;
4878         const PetscScalar *marr;
4879 
4880         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4881         PetscCall(PetscMalloc1(n_vertices, &sums));
4882         for (i = 0; i < reuse_solver->benign_n; i++) {
4883           const PetscScalar *vals;
4884           const PetscInt    *idxs, *idxs_zero;
4885           PetscInt           n, j, nz;
4886 
4887           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4888           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4889           for (j = 0; j < n_vertices; j++) {
4890             sums[j] = 0.;
4891             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4892           }
4893           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4894           for (j = 0; j < n; j++) {
4895             PetscScalar val = vals[j];
4896             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4897           }
4898           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4899           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4900         }
4901         PetscCall(PetscFree(sums));
4902         PetscCall(MatDestroy(&A_RV_bcorr));
4903         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4904       }
4905 
4906       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4907       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4908       PetscCall(MatDestroy(&S_VV));
4909     }
4910 
4911     /* coarse basis functions */
4912     if (coarse_phi_multi) {
4913       Mat Vid;
4914 
4915       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4916       PetscCall(MatShift_Basic(Vid, 1.0));
4917       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4918       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4919       PetscCall(MatDestroy(&Vid));
4920     } else {
4921       if (A_RRmA_RV) {
4922         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4923         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4924           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4925           if (pcbddc->benign_n) {
4926             for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
4927             PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4928             PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4929           }
4930         }
4931       }
4932       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4933       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4934       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4935     }
4936     PetscCall(MatDestroy(&A_RRmA_RV));
4937   }
4938   PetscCall(MatDestroy(&A_RV));
4939   PetscCall(VecDestroy(&dummy_vec));
4940 
4941   if (n_constraints) {
4942     Mat B, B2;
4943 
4944     PetscCall(MatScale(S_CC, m_one));
4945     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4946     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4947     PetscCall(MatProductSetFromOptions(B));
4948     PetscCall(MatProductSymbolic(B));
4949     PetscCall(MatProductNumeric(B));
4950 
4951     if (n_vertices) {
4952       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4953         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4954       } else {
4955         if (lda_rhs != n_R) {
4956           Mat tB;
4957 
4958           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4959           PetscCall(MatDestroy(&B));
4960           B = tB;
4961         }
4962         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
4963       }
4964       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4965     }
4966 
4967     /* coarse basis functions */
4968     if (coarse_phi_multi) {
4969       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4970     } else {
4971       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4972       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4973       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4974       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4975         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4976         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4977         if (pcbddc->benign_n) {
4978           for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
4979         }
4980         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
4981       }
4982     }
4983     PetscCall(MatDestroy(&B));
4984   }
4985 
4986   /* assemble sparse coarse basis functions */
4987   if (coarse_phi_multi) {
4988     Mat T;
4989 
4990     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
4991     PetscCall(MatDestroy(&coarse_phi_multi));
4992     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
4993     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
4994     PetscCall(MatDestroy(&T));
4995   }
4996   PetscCall(MatDestroy(&local_auxmat2_R));
4997   PetscCall(PetscFree(p0_lidx_I));
4998 
4999   /* coarse matrix entries relative to B_0 */
5000   if (pcbddc->benign_n) {
5001     Mat                B0_B, B0_BPHI;
5002     IS                 is_dummy;
5003     const PetscScalar *data;
5004     PetscInt           j;
5005 
5006     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5007     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5008     PetscCall(ISDestroy(&is_dummy));
5009     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5010     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5011     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5012     for (j = 0; j < pcbddc->benign_n; j++) {
5013       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5014       for (i = 0; i < pcbddc->local_primal_size; i++) {
5015         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5016         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5017       }
5018     }
5019     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5020     PetscCall(MatDestroy(&B0_B));
5021     PetscCall(MatDestroy(&B0_BPHI));
5022   }
5023 
5024   /* compute other basis functions for non-symmetric problems */
5025   if (!pcbddc->symmetric_primal) {
5026     Mat          B_V = NULL, B_C = NULL;
5027     PetscScalar *marray, *work;
5028 
5029     /* TODO multi_element MatDenseScatter */
5030     if (n_constraints) {
5031       Mat S_CCT, C_CRT;
5032 
5033       PetscCall(MatScale(S_CC, m_one));
5034       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5035       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5036       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5037       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5038       PetscCall(MatDestroy(&S_CCT));
5039       if (n_vertices) {
5040         Mat S_VCT;
5041 
5042         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5043         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5044         PetscCall(MatDestroy(&S_VCT));
5045         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5046       }
5047       PetscCall(MatDestroy(&C_CRT));
5048     } else {
5049       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5050     }
5051     if (n_vertices && n_R) {
5052       PetscScalar    *av, *marray;
5053       const PetscInt *xadj, *adjncy;
5054       PetscInt        n;
5055       PetscBool       flg_row;
5056 
5057       /* B_V = B_V - A_VR^T */
5058       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5059       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5060       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5061       PetscCall(MatDenseGetArray(B_V, &marray));
5062       for (i = 0; i < n; i++) {
5063         PetscInt j;
5064         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5065       }
5066       PetscCall(MatDenseRestoreArray(B_V, &marray));
5067       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5068       PetscCall(MatDestroy(&A_VR));
5069     }
5070 
5071     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5072     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5073     if (n_vertices) {
5074       PetscCall(MatDenseGetArray(B_V, &marray));
5075       for (i = 0; i < n_vertices; i++) {
5076         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5077         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5078         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5079         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5080         PetscCall(VecResetArray(pcbddc->vec1_R));
5081         PetscCall(VecResetArray(pcbddc->vec2_R));
5082       }
5083       PetscCall(MatDenseRestoreArray(B_V, &marray));
5084     }
5085     if (B_C) {
5086       PetscCall(MatDenseGetArray(B_C, &marray));
5087       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5088         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5089         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5090         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5091         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5092         PetscCall(VecResetArray(pcbddc->vec1_R));
5093         PetscCall(VecResetArray(pcbddc->vec2_R));
5094       }
5095       PetscCall(MatDenseRestoreArray(B_C, &marray));
5096     }
5097     /* coarse basis functions */
5098     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5099     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5100     for (i = 0; i < pcbddc->local_primal_size; i++) {
5101       Vec v;
5102 
5103       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5104       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5105       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5106       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5107       if (i < n_vertices) {
5108         PetscScalar one = 1.0;
5109         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5110         PetscCall(VecAssemblyBegin(v));
5111         PetscCall(VecAssemblyEnd(v));
5112       }
5113       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5114 
5115       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5116         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5117         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5118         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5119         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5120       }
5121       PetscCall(VecResetArray(pcbddc->vec1_R));
5122     }
5123     PetscCall(MatDestroy(&B_V));
5124     PetscCall(MatDestroy(&B_C));
5125     PetscCall(PetscFree(work));
5126   } else {
5127     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5128     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5129     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5130     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5131   }
5132   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5133   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5134 
5135   /* free memory */
5136   PetscCall(PetscFree(V_to_eff_V));
5137   PetscCall(PetscFree(C_to_eff_C));
5138   PetscCall(PetscFree(R_eff_V_J));
5139   PetscCall(PetscFree(R_eff_C_J));
5140   PetscCall(PetscFree(B_eff_V_J));
5141   PetscCall(PetscFree(B_eff_C_J));
5142   PetscCall(ISDestroy(&is_R));
5143   PetscCall(ISRestoreIndices(is_V, &idx_V));
5144   PetscCall(ISRestoreIndices(is_C, &idx_C));
5145   PetscCall(ISDestroy(&is_V));
5146   PetscCall(ISDestroy(&is_C));
5147   PetscCall(PetscFree(idx_V_B));
5148   PetscCall(MatDestroy(&S_CV));
5149   PetscCall(MatDestroy(&S_VC));
5150   PetscCall(MatDestroy(&S_CC));
5151   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5152   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5153   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5154 
5155   /* Checking coarse_sub_mat and coarse basis functions */
5156   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5157   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5158   if (pcbddc->dbg_flag) {
5159     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5160     Mat       coarse_phi_D, coarse_phi_B;
5161     Mat       coarse_psi_D, coarse_psi_B;
5162     Mat       A_II, A_BB, A_IB, A_BI;
5163     Mat       C_B, CPHI;
5164     IS        is_dummy;
5165     Vec       mones;
5166     MatType   checkmattype = MATSEQAIJ;
5167     PetscReal real_value;
5168 
5169     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5170       Mat A;
5171       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5172       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5173       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5174       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5175       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5176       PetscCall(MatDestroy(&A));
5177     } else {
5178       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5179       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5180       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5181       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5182     }
5183     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5184     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5185     if (!pcbddc->symmetric_primal) {
5186       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5187       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5188     }
5189     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5190     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5191     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5192     if (!pcbddc->symmetric_primal) {
5193       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5194       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5195       PetscCall(MatDestroy(&AUXMAT));
5196       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5197       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5198       PetscCall(MatDestroy(&AUXMAT));
5199       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5200       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5201       PetscCall(MatDestroy(&AUXMAT));
5202       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5203       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5204       PetscCall(MatDestroy(&AUXMAT));
5205     } else {
5206       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5207       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5208       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5209       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5210       PetscCall(MatDestroy(&AUXMAT));
5211       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5212       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5213       PetscCall(MatDestroy(&AUXMAT));
5214     }
5215     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5216     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5217     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5218     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5219     if (pcbddc->benign_n) {
5220       Mat                B0_B, B0_BPHI;
5221       const PetscScalar *data2;
5222       PetscScalar       *data;
5223       PetscInt           j;
5224 
5225       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5226       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5227       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5228       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5229       PetscCall(MatDenseGetArray(TM1, &data));
5230       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5231       for (j = 0; j < pcbddc->benign_n; j++) {
5232         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5233         for (i = 0; i < pcbddc->local_primal_size; i++) {
5234           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5235           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5236         }
5237       }
5238       PetscCall(MatDenseRestoreArray(TM1, &data));
5239       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5240       PetscCall(MatDestroy(&B0_B));
5241       PetscCall(ISDestroy(&is_dummy));
5242       PetscCall(MatDestroy(&B0_BPHI));
5243     }
5244     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5245     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5246     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5247     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5248 
5249     /* check constraints */
5250     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5251     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5252     if (!pcbddc->benign_n) { /* TODO: add benign case */
5253       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5254     } else {
5255       PetscScalar *data;
5256       Mat          tmat;
5257       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5258       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5259       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5260       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5261       PetscCall(MatDestroy(&tmat));
5262     }
5263     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5264     PetscCall(VecSet(mones, -1.0));
5265     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5266     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5267     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5268     if (!pcbddc->symmetric_primal) {
5269       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5270       PetscCall(VecSet(mones, -1.0));
5271       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5272       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5273       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5274     }
5275     PetscCall(MatDestroy(&C_B));
5276     PetscCall(MatDestroy(&CPHI));
5277     PetscCall(ISDestroy(&is_dummy));
5278     PetscCall(VecDestroy(&mones));
5279     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5280     PetscCall(MatDestroy(&A_II));
5281     PetscCall(MatDestroy(&A_BB));
5282     PetscCall(MatDestroy(&A_IB));
5283     PetscCall(MatDestroy(&A_BI));
5284     PetscCall(MatDestroy(&TM1));
5285     PetscCall(MatDestroy(&TM2));
5286     PetscCall(MatDestroy(&TM3));
5287     PetscCall(MatDestroy(&TM4));
5288     PetscCall(MatDestroy(&coarse_phi_D));
5289     PetscCall(MatDestroy(&coarse_phi_B));
5290     if (!pcbddc->symmetric_primal) {
5291       PetscCall(MatDestroy(&coarse_psi_D));
5292       PetscCall(MatDestroy(&coarse_psi_B));
5293     }
5294   }
5295 
5296 #if 0
5297   {
5298     PetscViewer viewer;
5299     char filename[256];
5300 
5301     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5302     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5303     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5304     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5305     PetscCall(MatView(*coarse_submat,viewer));
5306     if (pcbddc->coarse_phi_B) {
5307       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5308       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5309     }
5310     if (pcbddc->coarse_phi_D) {
5311       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5312       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5313     }
5314     if (pcbddc->coarse_psi_B) {
5315       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5316       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5317     }
5318     if (pcbddc->coarse_psi_D) {
5319       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5320       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5321     }
5322     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5323     PetscCall(MatView(pcbddc->local_mat,viewer));
5324     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5325     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5326     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5327     PetscCall(ISView(pcis->is_I_local,viewer));
5328     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5329     PetscCall(ISView(pcis->is_B_local,viewer));
5330     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5331     PetscCall(ISView(pcbddc->is_R_local,viewer));
5332     PetscCall(PetscViewerDestroy(&viewer));
5333   }
5334 #endif
5335 
5336   /* device support */
5337   {
5338     PetscBool iscuda, iship, iskokkos;
5339     MatType   mtype = NULL;
5340 
5341     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5342     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5343     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5344     if (iskokkos) {
5345       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5346       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5347     }
5348     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5349     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5350     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5351     if (mtype) {
5352       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5353       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5354       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5355       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5356       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5357       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5358     }
5359   }
5360   PetscFunctionReturn(PETSC_SUCCESS);
5361 }
5362 
5363 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5364 {
5365   Mat      *work_mat;
5366   IS        isrow_s, iscol_s;
5367   PetscBool rsorted, csorted;
5368   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5369 
5370   PetscFunctionBegin;
5371   PetscCall(ISSorted(isrow, &rsorted));
5372   PetscCall(ISSorted(iscol, &csorted));
5373   PetscCall(ISGetLocalSize(isrow, &rsize));
5374   PetscCall(ISGetLocalSize(iscol, &csize));
5375 
5376   if (!rsorted) {
5377     const PetscInt *idxs;
5378     PetscInt       *idxs_sorted, i;
5379 
5380     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5381     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5382     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5383     PetscCall(ISGetIndices(isrow, &idxs));
5384     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5385     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5386     PetscCall(ISRestoreIndices(isrow, &idxs));
5387     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5388   } else {
5389     PetscCall(PetscObjectReference((PetscObject)isrow));
5390     isrow_s = isrow;
5391   }
5392 
5393   if (!csorted) {
5394     if (isrow == iscol) {
5395       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5396       iscol_s = isrow_s;
5397     } else {
5398       const PetscInt *idxs;
5399       PetscInt       *idxs_sorted, i;
5400 
5401       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5402       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5403       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5404       PetscCall(ISGetIndices(iscol, &idxs));
5405       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5406       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5407       PetscCall(ISRestoreIndices(iscol, &idxs));
5408       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5409     }
5410   } else {
5411     PetscCall(PetscObjectReference((PetscObject)iscol));
5412     iscol_s = iscol;
5413   }
5414 
5415   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5416 
5417   if (!rsorted || !csorted) {
5418     Mat new_mat;
5419     IS  is_perm_r, is_perm_c;
5420 
5421     if (!rsorted) {
5422       PetscInt *idxs_r, i;
5423       PetscCall(PetscMalloc1(rsize, &idxs_r));
5424       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5425       PetscCall(PetscFree(idxs_perm_r));
5426       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5427     } else {
5428       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5429     }
5430     PetscCall(ISSetPermutation(is_perm_r));
5431 
5432     if (!csorted) {
5433       if (isrow_s == iscol_s) {
5434         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5435         is_perm_c = is_perm_r;
5436       } else {
5437         PetscInt *idxs_c, i;
5438         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5439         PetscCall(PetscMalloc1(csize, &idxs_c));
5440         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5441         PetscCall(PetscFree(idxs_perm_c));
5442         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5443       }
5444     } else {
5445       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5446     }
5447     PetscCall(ISSetPermutation(is_perm_c));
5448 
5449     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5450     PetscCall(MatDestroy(&work_mat[0]));
5451     work_mat[0] = new_mat;
5452     PetscCall(ISDestroy(&is_perm_r));
5453     PetscCall(ISDestroy(&is_perm_c));
5454   }
5455 
5456   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5457   *B = work_mat[0];
5458   PetscCall(MatDestroyMatrices(1, &work_mat));
5459   PetscCall(ISDestroy(&isrow_s));
5460   PetscCall(ISDestroy(&iscol_s));
5461   PetscFunctionReturn(PETSC_SUCCESS);
5462 }
5463 
5464 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5465 {
5466   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5467   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5468   Mat       new_mat, lA;
5469   IS        is_local, is_global;
5470   PetscInt  local_size;
5471   PetscBool isseqaij, issym, isset;
5472 
5473   PetscFunctionBegin;
5474   PetscCall(MatDestroy(&pcbddc->local_mat));
5475   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5476   if (pcbddc->mat_graph->multi_element) {
5477     Mat     *mats, *bdiags;
5478     IS      *gsubs;
5479     PetscInt nsubs = pcbddc->n_local_subs;
5480 
5481     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5482     PetscCall(PetscMalloc1(nsubs, &gsubs));
5483     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5484     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5485     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5486     PetscCall(PetscFree(gsubs));
5487 
5488     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5489     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5490     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5491     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5492     PetscCall(PetscFree(mats));
5493   } else {
5494     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5495     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5496     PetscCall(ISDestroy(&is_local));
5497     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5498     PetscCall(ISDestroy(&is_global));
5499   }
5500   if (pcbddc->dbg_flag) {
5501     Vec       x, x_change;
5502     PetscReal error;
5503 
5504     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5505     PetscCall(VecSetRandom(x, NULL));
5506     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5507     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5508     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5509     PetscCall(MatMult(new_mat, matis->x, matis->y));
5510     if (!pcbddc->change_interior) {
5511       const PetscScalar *x, *y, *v;
5512       PetscReal          lerror = 0.;
5513       PetscInt           i;
5514 
5515       PetscCall(VecGetArrayRead(matis->x, &x));
5516       PetscCall(VecGetArrayRead(matis->y, &y));
5517       PetscCall(VecGetArrayRead(matis->counter, &v));
5518       for (i = 0; i < local_size; i++)
5519         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5520       PetscCall(VecRestoreArrayRead(matis->x, &x));
5521       PetscCall(VecRestoreArrayRead(matis->y, &y));
5522       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5523       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5524       if (error > PETSC_SMALL) {
5525         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5526           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5527         } else {
5528           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5529         }
5530       }
5531     }
5532     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5533     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5534     PetscCall(VecAXPY(x, -1.0, x_change));
5535     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5536     if (error > PETSC_SMALL) {
5537       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5538         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5539       } else {
5540         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5541       }
5542     }
5543     PetscCall(VecDestroy(&x));
5544     PetscCall(VecDestroy(&x_change));
5545   }
5546 
5547   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5548   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5549 
5550   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5551   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5552   if (isseqaij) {
5553     PetscCall(MatDestroy(&pcbddc->local_mat));
5554     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5555     if (lA) {
5556       Mat work;
5557       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5558       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5559       PetscCall(MatDestroy(&work));
5560     }
5561   } else {
5562     Mat work_mat;
5563 
5564     PetscCall(MatDestroy(&pcbddc->local_mat));
5565     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5566     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5567     PetscCall(MatDestroy(&work_mat));
5568     if (lA) {
5569       Mat work;
5570       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5571       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5572       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5573       PetscCall(MatDestroy(&work));
5574     }
5575   }
5576   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5577   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5578   PetscCall(MatDestroy(&new_mat));
5579   PetscFunctionReturn(PETSC_SUCCESS);
5580 }
5581 
5582 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5583 {
5584   PC_IS          *pcis        = (PC_IS *)pc->data;
5585   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5586   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5587   PetscInt       *idx_R_local = NULL;
5588   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5589   PetscInt        vbs, bs;
5590   PetscBT         bitmask = NULL;
5591 
5592   PetscFunctionBegin;
5593   /*
5594     No need to setup local scatters if
5595       - primal space is unchanged
5596         AND
5597       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5598         AND
5599       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5600   */
5601   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5602   /* destroy old objects */
5603   PetscCall(ISDestroy(&pcbddc->is_R_local));
5604   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5605   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5606   /* Set Non-overlapping dimensions */
5607   n_B        = pcis->n_B;
5608   n_D        = pcis->n - n_B;
5609   n_vertices = pcbddc->n_vertices;
5610 
5611   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5612 
5613   /* create auxiliary bitmask and allocate workspace */
5614   if (!sub_schurs || !sub_schurs->reuse_solver) {
5615     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5616     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5617     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5618 
5619     for (i = 0, n_R = 0; i < pcis->n; i++) {
5620       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5621     }
5622   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5623     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5624 
5625     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5626     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5627   }
5628 
5629   /* Block code */
5630   vbs = 1;
5631   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5632   if (bs > 1 && !(n_vertices % bs)) {
5633     PetscBool is_blocked = PETSC_TRUE;
5634     PetscInt *vary;
5635     if (!sub_schurs || !sub_schurs->reuse_solver) {
5636       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5637       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5638       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5639       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5640       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5641       for (i = 0; i < pcis->n / bs; i++) {
5642         if (vary[i] != 0 && vary[i] != bs) {
5643           is_blocked = PETSC_FALSE;
5644           break;
5645         }
5646       }
5647       PetscCall(PetscFree(vary));
5648     } else {
5649       /* Verify directly the R set */
5650       for (i = 0; i < n_R / bs; i++) {
5651         PetscInt j, node = idx_R_local[bs * i];
5652         for (j = 1; j < bs; j++) {
5653           if (node != idx_R_local[bs * i + j] - j) {
5654             is_blocked = PETSC_FALSE;
5655             break;
5656           }
5657         }
5658       }
5659     }
5660     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5661       vbs = bs;
5662       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5663     }
5664   }
5665   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5666   if (sub_schurs && sub_schurs->reuse_solver) {
5667     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5668 
5669     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5670     PetscCall(ISDestroy(&reuse_solver->is_R));
5671     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5672     reuse_solver->is_R = pcbddc->is_R_local;
5673   } else {
5674     PetscCall(PetscFree(idx_R_local));
5675   }
5676 
5677   /* print some info if requested */
5678   if (pcbddc->dbg_flag) {
5679     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5680     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5681     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5682     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5683     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5684     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5685                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5686     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5687   }
5688 
5689   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5690   if (!sub_schurs || !sub_schurs->reuse_solver) {
5691     IS        is_aux1, is_aux2;
5692     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5693 
5694     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5695     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5696     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5697     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5698     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5699     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5700     for (i = 0, j = 0; i < n_R; i++) {
5701       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5702     }
5703     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5704     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5705     for (i = 0, j = 0; i < n_B; i++) {
5706       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5707     }
5708     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5709     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5710     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5711     PetscCall(ISDestroy(&is_aux1));
5712     PetscCall(ISDestroy(&is_aux2));
5713 
5714     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5715       PetscCall(PetscMalloc1(n_D, &aux_array1));
5716       for (i = 0, j = 0; i < n_R; i++) {
5717         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5718       }
5719       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5720       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5721       PetscCall(ISDestroy(&is_aux1));
5722     }
5723     PetscCall(PetscBTDestroy(&bitmask));
5724     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5725   } else {
5726     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5727     IS                 tis;
5728     PetscInt           schur_size;
5729 
5730     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5731     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5732     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5733     PetscCall(ISDestroy(&tis));
5734     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5735       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5736       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5737       PetscCall(ISDestroy(&tis));
5738     }
5739   }
5740   PetscFunctionReturn(PETSC_SUCCESS);
5741 }
5742 
5743 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5744 {
5745   MatNullSpace NullSpace;
5746   Mat          dmat;
5747   const Vec   *nullvecs;
5748   Vec          v, v2, *nullvecs2;
5749   VecScatter   sct = NULL;
5750   PetscScalar *ddata;
5751   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5752   PetscBool    nnsp_has_cnst;
5753 
5754   PetscFunctionBegin;
5755   if (!is && !B) { /* MATIS */
5756     Mat_IS *matis = (Mat_IS *)A->data;
5757 
5758     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5759     sct = matis->cctx;
5760     PetscCall(PetscObjectReference((PetscObject)sct));
5761   } else {
5762     PetscCall(MatGetNullSpace(B, &NullSpace));
5763     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5764     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5765   }
5766   PetscCall(MatGetNullSpace(A, &NullSpace));
5767   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5768   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5769 
5770   PetscCall(MatCreateVecs(A, &v, NULL));
5771   PetscCall(MatCreateVecs(B, &v2, NULL));
5772   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5773   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5774   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5775   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5776   PetscCall(VecGetBlockSize(v2, &bs));
5777   PetscCall(VecGetSize(v2, &N));
5778   PetscCall(VecGetLocalSize(v2, &n));
5779   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5780   for (k = 0; k < nnsp_size; k++) {
5781     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5782     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5783     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5784   }
5785   if (nnsp_has_cnst) {
5786     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5787     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5788   }
5789   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5790   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5791 
5792   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5793   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5794   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5795   PetscCall(MatDestroy(&dmat));
5796 
5797   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5798   PetscCall(PetscFree(nullvecs2));
5799   PetscCall(MatSetNearNullSpace(B, NullSpace));
5800   PetscCall(MatNullSpaceDestroy(&NullSpace));
5801   PetscCall(VecDestroy(&v));
5802   PetscCall(VecDestroy(&v2));
5803   PetscCall(VecScatterDestroy(&sct));
5804   PetscFunctionReturn(PETSC_SUCCESS);
5805 }
5806 
5807 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5808 {
5809   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5810   PC_IS       *pcis   = (PC_IS *)pc->data;
5811   PC           pc_temp;
5812   Mat          A_RR;
5813   MatNullSpace nnsp;
5814   MatReuse     reuse;
5815   PetscScalar  m_one = -1.0;
5816   PetscReal    value;
5817   PetscInt     n_D, n_R;
5818   PetscBool    issbaij, opts, isset, issym;
5819   PetscBool    f = PETSC_FALSE;
5820   char         dir_prefix[256], neu_prefix[256], str_level[16];
5821   size_t       len;
5822 
5823   PetscFunctionBegin;
5824   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5825   /* approximate solver, propagate NearNullSpace if needed */
5826   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5827     MatNullSpace gnnsp1, gnnsp2;
5828     PetscBool    lhas, ghas;
5829 
5830     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5831     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5832     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5833     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5834     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5835     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5836   }
5837 
5838   /* compute prefixes */
5839   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5840   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5841   if (!pcbddc->current_level) {
5842     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5843     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5844     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5845     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5846   } else {
5847     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5848     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5849     len -= 15;                                /* remove "pc_bddc_coarse_" */
5850     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5851     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5852     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5853     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5854     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5855     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5856     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5857     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5858     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5859   }
5860 
5861   /* DIRICHLET PROBLEM */
5862   if (dirichlet) {
5863     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5864     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5865       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5866       if (pcbddc->dbg_flag) {
5867         Mat A_IIn;
5868 
5869         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5870         PetscCall(MatDestroy(&pcis->A_II));
5871         pcis->A_II = A_IIn;
5872       }
5873     }
5874     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5875     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5876 
5877     /* Matrix for Dirichlet problem is pcis->A_II */
5878     n_D  = pcis->n - pcis->n_B;
5879     opts = PETSC_FALSE;
5880     if (!pcbddc->ksp_D) { /* create object if not yet build */
5881       opts = PETSC_TRUE;
5882       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5883       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5884       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5885       /* default */
5886       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5887       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5888       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5889       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5890       if (issbaij) {
5891         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5892       } else {
5893         PetscCall(PCSetType(pc_temp, PCLU));
5894       }
5895       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5896     }
5897     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5898     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5899     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5900     /* Allow user's customization */
5901     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5902     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5903     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5904       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5905     }
5906     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5907     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5908     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5909     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5910       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5911       const PetscInt *idxs;
5912       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5913 
5914       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5915       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5916       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5917       for (i = 0; i < nl; i++) {
5918         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5919       }
5920       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5921       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5922       PetscCall(PetscFree(scoords));
5923     }
5924     if (sub_schurs && sub_schurs->reuse_solver) {
5925       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5926 
5927       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5928     }
5929 
5930     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5931     if (!n_D) {
5932       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5933       PetscCall(PCSetType(pc_temp, PCNONE));
5934     }
5935     PetscCall(KSPSetUp(pcbddc->ksp_D));
5936     /* set ksp_D into pcis data */
5937     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5938     PetscCall(KSPDestroy(&pcis->ksp_D));
5939     pcis->ksp_D = pcbddc->ksp_D;
5940   }
5941 
5942   /* NEUMANN PROBLEM */
5943   A_RR = NULL;
5944   if (neumann) {
5945     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5946     PetscInt        ibs, mbs;
5947     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5948     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5949 
5950     reuse_neumann_solver = PETSC_FALSE;
5951     if (sub_schurs && sub_schurs->reuse_solver) {
5952       IS iP;
5953 
5954       reuse_neumann_solver = PETSC_TRUE;
5955       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5956       if (iP) reuse_neumann_solver = PETSC_FALSE;
5957     }
5958     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5959     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5960     if (pcbddc->ksp_R) { /* already created ksp */
5961       PetscInt nn_R;
5962       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5963       PetscCall(PetscObjectReference((PetscObject)A_RR));
5964       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5965       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5966         PetscCall(KSPReset(pcbddc->ksp_R));
5967         PetscCall(MatDestroy(&A_RR));
5968         reuse = MAT_INITIAL_MATRIX;
5969       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5970         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5971           PetscCall(MatDestroy(&A_RR));
5972           reuse = MAT_INITIAL_MATRIX;
5973         } else { /* safe to reuse the matrix */
5974           reuse = MAT_REUSE_MATRIX;
5975         }
5976       }
5977       /* last check */
5978       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5979         PetscCall(MatDestroy(&A_RR));
5980         reuse = MAT_INITIAL_MATRIX;
5981       }
5982     } else { /* first time, so we need to create the matrix */
5983       reuse = MAT_INITIAL_MATRIX;
5984     }
5985     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5986        TODO: Get Rid of these conversions */
5987     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5988     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5989     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5990     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5991       if (matis->A == pcbddc->local_mat) {
5992         PetscCall(MatDestroy(&pcbddc->local_mat));
5993         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5994       } else {
5995         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5996       }
5997     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
5998       if (matis->A == pcbddc->local_mat) {
5999         PetscCall(MatDestroy(&pcbddc->local_mat));
6000         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6001       } else {
6002         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6003       }
6004     }
6005     /* extract A_RR */
6006     if (reuse_neumann_solver) {
6007       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6008 
6009       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6010         PetscCall(MatDestroy(&A_RR));
6011         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6012           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6013         } else {
6014           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6015         }
6016       } else {
6017         PetscCall(MatDestroy(&A_RR));
6018         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6019         PetscCall(PetscObjectReference((PetscObject)A_RR));
6020       }
6021     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6022       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6023     }
6024     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6025     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6026     opts = PETSC_FALSE;
6027     if (!pcbddc->ksp_R) { /* create object if not present */
6028       opts = PETSC_TRUE;
6029       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6030       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6031       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6032       /* default */
6033       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6034       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6035       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6036       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6037       if (issbaij) {
6038         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6039       } else {
6040         PetscCall(PCSetType(pc_temp, PCLU));
6041       }
6042       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6043     }
6044     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6045     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6046     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6047     if (opts) { /* Allow user's customization once */
6048       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6049     }
6050     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6051     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6052       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6053     }
6054     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6055     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6056     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6057     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6058       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6059       const PetscInt *idxs;
6060       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6061 
6062       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6063       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6064       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6065       for (i = 0; i < nl; i++) {
6066         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6067       }
6068       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6069       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6070       PetscCall(PetscFree(scoords));
6071     }
6072 
6073     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6074     if (!n_R) {
6075       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6076       PetscCall(PCSetType(pc_temp, PCNONE));
6077     }
6078     /* Reuse solver if it is present */
6079     if (reuse_neumann_solver) {
6080       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6081 
6082       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6083     }
6084     PetscCall(KSPSetUp(pcbddc->ksp_R));
6085   }
6086 
6087   if (pcbddc->dbg_flag) {
6088     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6089     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6090     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6091   }
6092   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6093 
6094   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6095   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6096   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6097   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6098   /* check Dirichlet and Neumann solvers */
6099   if (pcbddc->dbg_flag) {
6100     if (dirichlet) { /* Dirichlet */
6101       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6102       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6103       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6104       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6105       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6106       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6107       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6108       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6109     }
6110     if (neumann) { /* Neumann */
6111       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6112       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6113       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6114       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6115       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6116       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6117       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6118       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6119     }
6120   }
6121   /* free Neumann problem's matrix */
6122   PetscCall(MatDestroy(&A_RR));
6123   PetscFunctionReturn(PETSC_SUCCESS);
6124 }
6125 
6126 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6127 {
6128   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6129   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6130   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6131 
6132   PetscFunctionBegin;
6133   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6134   if (!pcbddc->switch_static) {
6135     if (applytranspose && pcbddc->local_auxmat1) {
6136       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6137       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6138     }
6139     if (!reuse_solver) {
6140       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6141       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6142     } else {
6143       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6144 
6145       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6146       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6147     }
6148   } else {
6149     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6150     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6151     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6152     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6153     if (applytranspose && pcbddc->local_auxmat1) {
6154       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6155       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6156       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6157       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6158     }
6159   }
6160   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6161   if (!reuse_solver || pcbddc->switch_static) {
6162     if (applytranspose) {
6163       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6164     } else {
6165       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6166     }
6167     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6168   } else {
6169     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6170 
6171     if (applytranspose) {
6172       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6173     } else {
6174       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6175     }
6176   }
6177   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6178   PetscCall(VecSet(inout_B, 0.));
6179   if (!pcbddc->switch_static) {
6180     if (!reuse_solver) {
6181       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6182       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6183     } else {
6184       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6185 
6186       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6187       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6188     }
6189     if (!applytranspose && pcbddc->local_auxmat1) {
6190       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6191       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6192     }
6193   } else {
6194     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6195     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6196     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6197     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6198     if (!applytranspose && pcbddc->local_auxmat1) {
6199       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6200       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6201     }
6202     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6203     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6204     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6205     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6206   }
6207   PetscFunctionReturn(PETSC_SUCCESS);
6208 }
6209 
6210 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6211 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6212 {
6213   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6214   PC_IS            *pcis   = (PC_IS *)pc->data;
6215   const PetscScalar zero   = 0.0;
6216 
6217   PetscFunctionBegin;
6218   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6219   if (!pcbddc->benign_apply_coarse_only) {
6220     if (applytranspose) {
6221       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6222       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6223     } else {
6224       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6225       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6226     }
6227   } else {
6228     PetscCall(VecSet(pcbddc->vec1_P, zero));
6229   }
6230 
6231   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6232   if (pcbddc->benign_n) {
6233     PetscScalar *array;
6234     PetscInt     j;
6235 
6236     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6237     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6238     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6239   }
6240 
6241   /* start communications from local primal nodes to rhs of coarse solver */
6242   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6243   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6244   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6245 
6246   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6247   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6248   if (pcbddc->coarse_ksp) {
6249     Mat          coarse_mat;
6250     Vec          rhs, sol;
6251     MatNullSpace nullsp;
6252     PetscBool    isbddc = PETSC_FALSE;
6253 
6254     if (pcbddc->benign_have_null) {
6255       PC coarse_pc;
6256 
6257       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6258       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6259       /* we need to propagate to coarser levels the need for a possible benign correction */
6260       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6261         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6262         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6263         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6264       }
6265     }
6266     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6267     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6268     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6269     if (applytranspose) {
6270       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6271       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6272       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6273       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6274       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6275     } else {
6276       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6277       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6278         PC coarse_pc;
6279 
6280         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6281         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6282         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6283         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6284         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6285       } else {
6286         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6287         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6288         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6289       }
6290     }
6291     /* we don't need the benign correction at coarser levels anymore */
6292     if (pcbddc->benign_have_null && isbddc) {
6293       PC       coarse_pc;
6294       PC_BDDC *coarsepcbddc;
6295 
6296       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6297       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6298       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6299       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6300     }
6301   }
6302   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6303 
6304   /* Local solution on R nodes */
6305   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6306   /* communications from coarse sol to local primal nodes */
6307   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6308   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6309 
6310   /* Sum contributions from the two levels */
6311   if (!pcbddc->benign_apply_coarse_only) {
6312     if (applytranspose) {
6313       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6314       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6315     } else {
6316       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6317       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6318     }
6319     /* store p0 */
6320     if (pcbddc->benign_n) {
6321       PetscScalar *array;
6322       PetscInt     j;
6323 
6324       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6325       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6326       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6327     }
6328   } else { /* expand the coarse solution */
6329     if (applytranspose) {
6330       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6331     } else {
6332       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6333     }
6334   }
6335   PetscFunctionReturn(PETSC_SUCCESS);
6336 }
6337 
6338 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6339 {
6340   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6341   Vec                from, to;
6342   const PetscScalar *array;
6343 
6344   PetscFunctionBegin;
6345   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6346     from = pcbddc->coarse_vec;
6347     to   = pcbddc->vec1_P;
6348     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6349       Vec tvec;
6350 
6351       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6352       PetscCall(VecResetArray(tvec));
6353       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6354       PetscCall(VecGetArrayRead(tvec, &array));
6355       PetscCall(VecPlaceArray(from, array));
6356       PetscCall(VecRestoreArrayRead(tvec, &array));
6357     }
6358   } else { /* from local to global -> put data in coarse right-hand side */
6359     from = pcbddc->vec1_P;
6360     to   = pcbddc->coarse_vec;
6361   }
6362   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6363   PetscFunctionReturn(PETSC_SUCCESS);
6364 }
6365 
6366 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6367 {
6368   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6369   Vec                from, to;
6370   const PetscScalar *array;
6371 
6372   PetscFunctionBegin;
6373   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6374     from = pcbddc->coarse_vec;
6375     to   = pcbddc->vec1_P;
6376   } else { /* from local to global -> put data in coarse right-hand side */
6377     from = pcbddc->vec1_P;
6378     to   = pcbddc->coarse_vec;
6379   }
6380   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6381   if (smode == SCATTER_FORWARD) {
6382     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6383       Vec tvec;
6384 
6385       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6386       PetscCall(VecGetArrayRead(to, &array));
6387       PetscCall(VecPlaceArray(tvec, array));
6388       PetscCall(VecRestoreArrayRead(to, &array));
6389     }
6390   } else {
6391     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6392       PetscCall(VecResetArray(from));
6393     }
6394   }
6395   PetscFunctionReturn(PETSC_SUCCESS);
6396 }
6397 
6398 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6399 {
6400   PC_IS   *pcis   = (PC_IS *)pc->data;
6401   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6402   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6403   /* one and zero */
6404   PetscScalar one = 1.0, zero = 0.0;
6405   /* space to store constraints and their local indices */
6406   PetscScalar *constraints_data;
6407   PetscInt    *constraints_idxs, *constraints_idxs_B;
6408   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6409   PetscInt    *constraints_n;
6410   /* iterators */
6411   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6412   /* BLAS integers */
6413   PetscBLASInt lwork, lierr;
6414   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6415   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6416   /* reuse */
6417   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6418   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6419   /* change of basis */
6420   PetscBool qr_needed;
6421   PetscBT   change_basis, qr_needed_idx;
6422   /* auxiliary stuff */
6423   PetscInt *nnz, *is_indices;
6424   PetscInt  ncc;
6425   /* some quantities */
6426   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6427   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6428   PetscReal tol; /* tolerance for retaining eigenmodes */
6429 
6430   PetscFunctionBegin;
6431   tol = PetscSqrtReal(PETSC_SMALL);
6432   /* Destroy Mat objects computed previously */
6433   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6434   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6435   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6436   /* save info on constraints from previous setup (if any) */
6437   olocal_primal_size    = pcbddc->local_primal_size;
6438   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6439   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6440   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6441   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6442   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6443   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6444 
6445   if (!pcbddc->adaptive_selection) {
6446     IS           ISForVertices, *ISForFaces, *ISForEdges;
6447     MatNullSpace nearnullsp;
6448     const Vec   *nearnullvecs;
6449     Vec         *localnearnullsp;
6450     PetscScalar *array;
6451     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6452     PetscBool    nnsp_has_cnst;
6453     /* LAPACK working arrays for SVD or POD */
6454     PetscBool    skip_lapack, boolforchange;
6455     PetscScalar *work;
6456     PetscReal   *singular_vals;
6457 #if defined(PETSC_USE_COMPLEX)
6458     PetscReal *rwork;
6459 #endif
6460     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6461     PetscBLASInt dummy_int    = 1;
6462     PetscScalar  dummy_scalar = 1.;
6463     PetscBool    use_pod      = PETSC_FALSE;
6464 
6465     /* MKL SVD with same input gives different results on different processes! */
6466 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6467     use_pod = PETSC_TRUE;
6468 #endif
6469     /* Get index sets for faces, edges and vertices from graph */
6470     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6471     o_nf       = n_ISForFaces;
6472     o_ne       = n_ISForEdges;
6473     n_vertices = 0;
6474     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6475     /* print some info */
6476     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6477       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6478       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6479       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6480       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6481       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6482       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6483       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6484       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6485       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6486     }
6487 
6488     if (!pcbddc->use_vertices) n_vertices = 0;
6489     if (!pcbddc->use_edges) n_ISForEdges = 0;
6490     if (!pcbddc->use_faces) n_ISForFaces = 0;
6491 
6492     /* check if near null space is attached to global mat */
6493     if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6494     else nearnullsp = NULL;
6495 
6496     if (nearnullsp) {
6497       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6498       /* remove any stored info */
6499       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6500       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6501       /* store information for BDDC solver reuse */
6502       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6503       pcbddc->onearnullspace = nearnullsp;
6504       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6505       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6506     } else { /* if near null space is not provided BDDC uses constants by default */
6507       nnsp_size     = 0;
6508       nnsp_has_cnst = PETSC_TRUE;
6509     }
6510     /* get max number of constraints on a single cc */
6511     max_constraints = nnsp_size;
6512     if (nnsp_has_cnst) max_constraints++;
6513 
6514     /*
6515          Evaluate maximum storage size needed by the procedure
6516          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6517          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6518          There can be multiple constraints per connected component
6519                                                                                                                                                            */
6520     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6521     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6522 
6523     total_counts = n_ISForFaces + n_ISForEdges;
6524     total_counts *= max_constraints;
6525     total_counts += n_vertices;
6526     PetscCall(PetscBTCreate(total_counts, &change_basis));
6527 
6528     total_counts           = 0;
6529     max_size_of_constraint = 0;
6530     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6531       IS used_is;
6532       if (i < n_ISForEdges) {
6533         used_is = ISForEdges[i];
6534       } else {
6535         used_is = ISForFaces[i - n_ISForEdges];
6536       }
6537       PetscCall(ISGetSize(used_is, &j));
6538       total_counts += j;
6539       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6540     }
6541     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6542 
6543     /* get local part of global near null space vectors */
6544     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6545     for (k = 0; k < nnsp_size; k++) {
6546       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6547       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6548       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6549     }
6550 
6551     /* whether or not to skip lapack calls */
6552     skip_lapack = PETSC_TRUE;
6553     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6554 
6555     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6556     if (!skip_lapack) {
6557       PetscScalar temp_work;
6558 
6559       if (use_pod) {
6560         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6561         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6562         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6563         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6564 #if defined(PETSC_USE_COMPLEX)
6565         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6566 #endif
6567         /* now we evaluate the optimal workspace using query with lwork=-1 */
6568         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6569         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6570         lwork = -1;
6571         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6572 #if !defined(PETSC_USE_COMPLEX)
6573         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6574 #else
6575         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6576 #endif
6577         PetscCall(PetscFPTrapPop());
6578         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6579       } else {
6580 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6581         /* SVD */
6582         PetscInt max_n, min_n;
6583         max_n = max_size_of_constraint;
6584         min_n = max_constraints;
6585         if (max_size_of_constraint < max_constraints) {
6586           min_n = max_size_of_constraint;
6587           max_n = max_constraints;
6588         }
6589         PetscCall(PetscMalloc1(min_n, &singular_vals));
6590   #if defined(PETSC_USE_COMPLEX)
6591         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6592   #endif
6593         /* now we evaluate the optimal workspace using query with lwork=-1 */
6594         lwork = -1;
6595         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6596         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6597         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6598         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6599   #if !defined(PETSC_USE_COMPLEX)
6600         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6601   #else
6602         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6603   #endif
6604         PetscCall(PetscFPTrapPop());
6605         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6606 #else
6607         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6608 #endif /* on missing GESVD */
6609       }
6610       /* Allocate optimal workspace */
6611       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6612       PetscCall(PetscMalloc1(lwork, &work));
6613     }
6614     /* Now we can loop on constraining sets */
6615     total_counts            = 0;
6616     constraints_idxs_ptr[0] = 0;
6617     constraints_data_ptr[0] = 0;
6618     /* vertices */
6619     if (n_vertices) {
6620       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6621       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6622       for (i = 0; i < n_vertices; i++) {
6623         constraints_n[total_counts]            = 1;
6624         constraints_data[total_counts]         = 1.0;
6625         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6626         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6627         total_counts++;
6628       }
6629       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6630     }
6631 
6632     /* edges and faces */
6633     total_counts_cc = total_counts;
6634     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6635       IS        used_is;
6636       PetscBool idxs_copied = PETSC_FALSE;
6637 
6638       if (ncc < n_ISForEdges) {
6639         used_is       = ISForEdges[ncc];
6640         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6641       } else {
6642         used_is       = ISForFaces[ncc - n_ISForEdges];
6643         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6644       }
6645       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6646 
6647       PetscCall(ISGetSize(used_is, &size_of_constraint));
6648       if (!size_of_constraint) continue;
6649       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6650       if (nnsp_has_cnst) {
6651         PetscScalar quad_value;
6652 
6653         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6654         idxs_copied = PETSC_TRUE;
6655 
6656         if (!pcbddc->use_nnsp_true) {
6657           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6658         } else {
6659           quad_value = 1.0;
6660         }
6661         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6662         temp_constraints++;
6663         total_counts++;
6664       }
6665       for (k = 0; k < nnsp_size; k++) {
6666         PetscReal    real_value;
6667         PetscScalar *ptr_to_data;
6668 
6669         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6670         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6671         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6672         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6673         /* check if array is null on the connected component */
6674         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6675         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6676         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6677           temp_constraints++;
6678           total_counts++;
6679           if (!idxs_copied) {
6680             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6681             idxs_copied = PETSC_TRUE;
6682           }
6683         }
6684       }
6685       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6686       valid_constraints = temp_constraints;
6687       if (!pcbddc->use_nnsp_true && temp_constraints) {
6688         if (temp_constraints == 1) { /* just normalize the constraint */
6689           PetscScalar norm, *ptr_to_data;
6690 
6691           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6692           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6693           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6694           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6695           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6696         } else { /* perform SVD */
6697           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6698 
6699           if (use_pod) {
6700             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6701                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6702                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6703                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6704                   from that computed using LAPACKgesvd
6705                -> This is due to a different computation of eigenvectors in LAPACKheev
6706                -> The quality of the POD-computed basis will be the same */
6707             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6708             /* Store upper triangular part of correlation matrix */
6709             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6710             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6711             for (j = 0; j < temp_constraints; j++) {
6712               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6713             }
6714             /* compute eigenvalues and eigenvectors of correlation matrix */
6715             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6716             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6717 #if !defined(PETSC_USE_COMPLEX)
6718             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6719 #else
6720             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6721 #endif
6722             PetscCall(PetscFPTrapPop());
6723             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6724             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6725             j = 0;
6726             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6727             total_counts      = total_counts - j;
6728             valid_constraints = temp_constraints - j;
6729             /* scale and copy POD basis into used quadrature memory */
6730             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6731             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6732             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6733             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6734             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6735             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6736             if (j < temp_constraints) {
6737               PetscInt ii;
6738               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6739               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6740               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6741               PetscCall(PetscFPTrapPop());
6742               for (k = 0; k < temp_constraints - j; k++) {
6743                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6744               }
6745             }
6746           } else {
6747 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6748             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6749             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6750             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6751             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6752   #if !defined(PETSC_USE_COMPLEX)
6753             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6754   #else
6755             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6756   #endif
6757             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6758             PetscCall(PetscFPTrapPop());
6759             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6760             k = temp_constraints;
6761             if (k > size_of_constraint) k = size_of_constraint;
6762             j = 0;
6763             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6764             valid_constraints = k - j;
6765             total_counts      = total_counts - temp_constraints + valid_constraints;
6766 #else
6767             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6768 #endif /* on missing GESVD */
6769           }
6770         }
6771       }
6772       /* update pointers information */
6773       if (valid_constraints) {
6774         constraints_n[total_counts_cc]            = valid_constraints;
6775         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6776         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6777         /* set change_of_basis flag */
6778         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6779         total_counts_cc++;
6780       }
6781     }
6782     /* free workspace */
6783     if (!skip_lapack) {
6784       PetscCall(PetscFree(work));
6785 #if defined(PETSC_USE_COMPLEX)
6786       PetscCall(PetscFree(rwork));
6787 #endif
6788       PetscCall(PetscFree(singular_vals));
6789       PetscCall(PetscFree(correlation_mat));
6790       PetscCall(PetscFree(temp_basis));
6791     }
6792     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6793     PetscCall(PetscFree(localnearnullsp));
6794     /* free index sets of faces, edges and vertices */
6795     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6796   } else {
6797     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6798 
6799     total_counts = 0;
6800     n_vertices   = 0;
6801     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6802     max_constraints = 0;
6803     total_counts_cc = 0;
6804     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6805       total_counts += pcbddc->adaptive_constraints_n[i];
6806       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6807       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6808     }
6809     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6810     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6811     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6812     constraints_data     = pcbddc->adaptive_constraints_data;
6813     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6814     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6815     total_counts_cc = 0;
6816     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6817       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6818     }
6819 
6820     max_size_of_constraint = 0;
6821     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6822     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6823     /* Change of basis */
6824     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6825     if (pcbddc->use_change_of_basis) {
6826       for (i = 0; i < sub_schurs->n_subs; i++) {
6827         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6828       }
6829     }
6830   }
6831   pcbddc->local_primal_size = total_counts;
6832   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6833 
6834   /* map constraints_idxs in boundary numbering */
6835   if (pcbddc->use_change_of_basis) {
6836     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6837     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6838   }
6839 
6840   /* Create constraint matrix */
6841   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6842   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6843   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6844 
6845   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6846   /* determine if a QR strategy is needed for change of basis */
6847   qr_needed = pcbddc->use_qr_single;
6848   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6849   total_primal_vertices        = 0;
6850   pcbddc->local_primal_size_cc = 0;
6851   for (i = 0; i < total_counts_cc; i++) {
6852     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6853     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6854       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6855       pcbddc->local_primal_size_cc += 1;
6856     } else if (PetscBTLookup(change_basis, i)) {
6857       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6858       pcbddc->local_primal_size_cc += constraints_n[i];
6859       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6860         PetscCall(PetscBTSet(qr_needed_idx, i));
6861         qr_needed = PETSC_TRUE;
6862       }
6863     } else {
6864       pcbddc->local_primal_size_cc += 1;
6865     }
6866   }
6867   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6868   pcbddc->n_vertices = total_primal_vertices;
6869   /* permute indices in order to have a sorted set of vertices */
6870   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6871   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6872   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6873   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6874 
6875   /* nonzero structure of constraint matrix */
6876   /* and get reference dof for local constraints */
6877   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6878   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6879 
6880   j            = total_primal_vertices;
6881   total_counts = total_primal_vertices;
6882   cum          = total_primal_vertices;
6883   for (i = n_vertices; i < total_counts_cc; i++) {
6884     if (!PetscBTLookup(change_basis, i)) {
6885       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6886       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6887       cum++;
6888       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6889       for (k = 0; k < constraints_n[i]; k++) {
6890         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6891         nnz[j + k]                                        = size_of_constraint;
6892       }
6893       j += constraints_n[i];
6894     }
6895   }
6896   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6897   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6898   PetscCall(PetscFree(nnz));
6899 
6900   /* set values in constraint matrix */
6901   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6902   total_counts = total_primal_vertices;
6903   for (i = n_vertices; i < total_counts_cc; i++) {
6904     if (!PetscBTLookup(change_basis, i)) {
6905       PetscInt *cols;
6906 
6907       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6908       cols               = constraints_idxs + constraints_idxs_ptr[i];
6909       for (k = 0; k < constraints_n[i]; k++) {
6910         PetscInt     row = total_counts + k;
6911         PetscScalar *vals;
6912 
6913         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6914         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6915       }
6916       total_counts += constraints_n[i];
6917     }
6918   }
6919   /* assembling */
6920   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6921   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6922   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6923 
6924   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6925   if (pcbddc->use_change_of_basis) {
6926     /* dual and primal dofs on a single cc */
6927     PetscInt dual_dofs, primal_dofs;
6928     /* working stuff for GEQRF */
6929     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6930     PetscBLASInt lqr_work;
6931     /* working stuff for UNGQR */
6932     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6933     PetscBLASInt lgqr_work;
6934     /* working stuff for TRTRS */
6935     PetscScalar *trs_rhs = NULL;
6936     PetscBLASInt Blas_NRHS;
6937     /* pointers for values insertion into change of basis matrix */
6938     PetscInt    *start_rows, *start_cols;
6939     PetscScalar *start_vals;
6940     /* working stuff for values insertion */
6941     PetscBT   is_primal;
6942     PetscInt *aux_primal_numbering_B;
6943     /* matrix sizes */
6944     PetscInt global_size, local_size;
6945     /* temporary change of basis */
6946     Mat localChangeOfBasisMatrix;
6947     /* extra space for debugging */
6948     PetscScalar *dbg_work = NULL;
6949 
6950     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6951     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6952     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6953     /* nonzeros for local mat */
6954     PetscCall(PetscMalloc1(pcis->n, &nnz));
6955     if (!pcbddc->benign_change || pcbddc->fake_change) {
6956       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6957     } else {
6958       const PetscInt *ii;
6959       PetscInt        n;
6960       PetscBool       flg_row;
6961       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6962       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6963       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6964     }
6965     for (i = n_vertices; i < total_counts_cc; i++) {
6966       if (PetscBTLookup(change_basis, i)) {
6967         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6968         if (PetscBTLookup(qr_needed_idx, i)) {
6969           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6970         } else {
6971           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6972           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6973         }
6974       }
6975     }
6976     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6977     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6978     PetscCall(PetscFree(nnz));
6979     /* Set interior change in the matrix */
6980     if (!pcbddc->benign_change || pcbddc->fake_change) {
6981       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6982     } else {
6983       const PetscInt *ii, *jj;
6984       PetscScalar    *aa;
6985       PetscInt        n;
6986       PetscBool       flg_row;
6987       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6988       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6989       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6990       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6991       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6992     }
6993 
6994     if (pcbddc->dbg_flag) {
6995       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6996       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6997     }
6998 
6999     /* Now we loop on the constraints which need a change of basis */
7000     /*
7001        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7002        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7003 
7004        Basic blocks of change of basis matrix T computed:
7005 
7006           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7007 
7008             | 1        0   ...        0         s_1/S |
7009             | 0        1   ...        0         s_2/S |
7010             |              ...                        |
7011             | 0        ...            1     s_{n-1}/S |
7012             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7013 
7014             with S = \sum_{i=1}^n s_i^2
7015             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7016                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7017 
7018           - QR decomposition of constraints otherwise
7019     */
7020     if (qr_needed && max_size_of_constraint) {
7021       /* space to store Q */
7022       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7023       /* array to store scaling factors for reflectors */
7024       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7025       /* first we issue queries for optimal work */
7026       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7027       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7028       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7029       lqr_work = -1;
7030       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7031       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7032       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7033       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7034       lgqr_work = -1;
7035       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7036       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7037       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7038       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7039       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7040       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7041       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7042       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7043       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7044       /* array to store rhs and solution of triangular solver */
7045       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7046       /* allocating workspace for check */
7047       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7048     }
7049     /* array to store whether a node is primal or not */
7050     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7051     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7052     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7053     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7054     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7055     PetscCall(PetscFree(aux_primal_numbering_B));
7056 
7057     /* loop on constraints and see whether or not they need a change of basis and compute it */
7058     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7059       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7060       if (PetscBTLookup(change_basis, total_counts)) {
7061         /* get constraint info */
7062         primal_dofs = constraints_n[total_counts];
7063         dual_dofs   = size_of_constraint - primal_dofs;
7064 
7065         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7066 
7067         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7068 
7069           /* copy quadrature constraints for change of basis check */
7070           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7071           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7072           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7073 
7074           /* compute QR decomposition of constraints */
7075           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7076           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7077           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7078           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7079           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7080           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7081           PetscCall(PetscFPTrapPop());
7082 
7083           /* explicitly compute R^-T */
7084           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7085           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7086           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7087           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7088           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7089           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7090           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7091           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7092           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7093           PetscCall(PetscFPTrapPop());
7094 
7095           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7096           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7097           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7098           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7099           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7100           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7101           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7102           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7103           PetscCall(PetscFPTrapPop());
7104 
7105           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7106              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7107              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7108           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7109           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7110           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7111           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7112           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7113           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7114           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7115           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7116           PetscCall(PetscFPTrapPop());
7117           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7118 
7119           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7120           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7121           /* insert cols for primal dofs */
7122           for (j = 0; j < primal_dofs; j++) {
7123             start_vals = &qr_basis[j * size_of_constraint];
7124             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7125             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7126           }
7127           /* insert cols for dual dofs */
7128           for (j = 0, k = 0; j < dual_dofs; k++) {
7129             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7130               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7131               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7132               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7133               j++;
7134             }
7135           }
7136 
7137           /* check change of basis */
7138           if (pcbddc->dbg_flag) {
7139             PetscInt  ii, jj;
7140             PetscBool valid_qr = PETSC_TRUE;
7141             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7142             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7143             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7144             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7145             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7146             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7147             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7148             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7149             PetscCall(PetscFPTrapPop());
7150             for (jj = 0; jj < size_of_constraint; jj++) {
7151               for (ii = 0; ii < primal_dofs; ii++) {
7152                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7153                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7154               }
7155             }
7156             if (!valid_qr) {
7157               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7158               for (jj = 0; jj < size_of_constraint; jj++) {
7159                 for (ii = 0; ii < primal_dofs; ii++) {
7160                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7161                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7162                   }
7163                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7164                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7165                   }
7166                 }
7167               }
7168             } else {
7169               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7170             }
7171           }
7172         } else { /* simple transformation block */
7173           PetscInt    row, col;
7174           PetscScalar val, norm;
7175 
7176           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7177           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7178           for (j = 0; j < size_of_constraint; j++) {
7179             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7180             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7181             if (!PetscBTLookup(is_primal, row_B)) {
7182               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7183               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7184               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7185             } else {
7186               for (k = 0; k < size_of_constraint; k++) {
7187                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7188                 if (row != col) {
7189                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7190                 } else {
7191                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7192                 }
7193                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7194               }
7195             }
7196           }
7197           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7198         }
7199       } else {
7200         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7201       }
7202     }
7203 
7204     /* free workspace */
7205     if (qr_needed) {
7206       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7207       PetscCall(PetscFree(trs_rhs));
7208       PetscCall(PetscFree(qr_tau));
7209       PetscCall(PetscFree(qr_work));
7210       PetscCall(PetscFree(gqr_work));
7211       PetscCall(PetscFree(qr_basis));
7212     }
7213     PetscCall(PetscBTDestroy(&is_primal));
7214     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7215     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7216 
7217     /* assembling of global change of variable */
7218     if (!pcbddc->fake_change) {
7219       Mat      tmat;
7220       PetscInt bs;
7221 
7222       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7223       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7224       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7225       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7226       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7227       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7228       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
7229       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
7230       PetscCall(MatGetBlockSize(pc->pmat, &bs));
7231       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
7232       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
7233       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
7234       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7235       PetscCall(MatDestroy(&tmat));
7236       PetscCall(VecSet(pcis->vec1_global, 0.0));
7237       PetscCall(VecSet(pcis->vec1_N, 1.0));
7238       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7239       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7240       PetscCall(VecReciprocal(pcis->vec1_global));
7241       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7242 
7243       /* check */
7244       if (pcbddc->dbg_flag) {
7245         PetscReal error;
7246         Vec       x, x_change;
7247 
7248         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7249         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7250         PetscCall(VecSetRandom(x, NULL));
7251         PetscCall(VecCopy(x, pcis->vec1_global));
7252         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7253         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7254         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7255         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7256         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7257         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7258         PetscCall(VecAXPY(x, -1.0, x_change));
7259         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7260         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7261         PetscCall(VecDestroy(&x));
7262         PetscCall(VecDestroy(&x_change));
7263       }
7264       /* adapt sub_schurs computed (if any) */
7265       if (pcbddc->use_deluxe_scaling) {
7266         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7267 
7268         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7269         if (sub_schurs && sub_schurs->S_Ej_all) {
7270           Mat S_new, tmat;
7271           IS  is_all_N, is_V_Sall = NULL;
7272 
7273           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7274           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7275           if (pcbddc->deluxe_zerorows) {
7276             ISLocalToGlobalMapping NtoSall;
7277             IS                     is_V;
7278             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7279             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7280             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7281             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7282             PetscCall(ISDestroy(&is_V));
7283           }
7284           PetscCall(ISDestroy(&is_all_N));
7285           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7286           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7287           PetscCall(PetscObjectReference((PetscObject)S_new));
7288           if (pcbddc->deluxe_zerorows) {
7289             const PetscScalar *array;
7290             const PetscInt    *idxs_V, *idxs_all;
7291             PetscInt           i, n_V;
7292 
7293             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7294             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7295             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7296             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7297             PetscCall(VecGetArrayRead(pcis->D, &array));
7298             for (i = 0; i < n_V; i++) {
7299               PetscScalar val;
7300               PetscInt    idx;
7301 
7302               idx = idxs_V[i];
7303               val = array[idxs_all[idxs_V[i]]];
7304               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7305             }
7306             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7307             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7308             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7309             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7310             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7311           }
7312           sub_schurs->S_Ej_all = S_new;
7313           PetscCall(MatDestroy(&S_new));
7314           if (sub_schurs->sum_S_Ej_all) {
7315             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7316             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7317             PetscCall(PetscObjectReference((PetscObject)S_new));
7318             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7319             sub_schurs->sum_S_Ej_all = S_new;
7320             PetscCall(MatDestroy(&S_new));
7321           }
7322           PetscCall(ISDestroy(&is_V_Sall));
7323           PetscCall(MatDestroy(&tmat));
7324         }
7325         /* destroy any change of basis context in sub_schurs */
7326         if (sub_schurs && sub_schurs->change) {
7327           PetscInt i;
7328 
7329           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7330           PetscCall(PetscFree(sub_schurs->change));
7331         }
7332       }
7333       if (pcbddc->switch_static) { /* need to save the local change */
7334         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7335       } else {
7336         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7337       }
7338       /* determine if any process has changed the pressures locally */
7339       pcbddc->change_interior = pcbddc->benign_have_null;
7340     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7341       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7342       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7343       pcbddc->use_qr_single    = qr_needed;
7344     }
7345   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7346     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7347       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7348       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7349     } else {
7350       Mat benign_global = NULL;
7351       if (pcbddc->benign_have_null) {
7352         Mat M;
7353 
7354         pcbddc->change_interior = PETSC_TRUE;
7355         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7356         PetscCall(VecReciprocal(pcis->vec1_N));
7357         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7358         if (pcbddc->benign_change) {
7359           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7360           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7361         } else {
7362           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7363           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7364         }
7365         PetscCall(MatISSetLocalMat(benign_global, M));
7366         PetscCall(MatDestroy(&M));
7367         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7368         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7369       }
7370       if (pcbddc->user_ChangeOfBasisMatrix) {
7371         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7372         PetscCall(MatDestroy(&benign_global));
7373       } else if (pcbddc->benign_have_null) {
7374         pcbddc->ChangeOfBasisMatrix = benign_global;
7375       }
7376     }
7377     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7378       IS              is_global;
7379       const PetscInt *gidxs;
7380 
7381       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7382       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7383       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7384       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7385       PetscCall(ISDestroy(&is_global));
7386     }
7387   }
7388   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7389 
7390   if (!pcbddc->fake_change) {
7391     /* add pressure dofs to set of primal nodes for numbering purposes */
7392     for (i = 0; i < pcbddc->benign_n; i++) {
7393       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7394       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7395       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7396       pcbddc->local_primal_size_cc++;
7397       pcbddc->local_primal_size++;
7398     }
7399 
7400     /* check if a new primal space has been introduced (also take into account benign trick) */
7401     pcbddc->new_primal_space_local = PETSC_TRUE;
7402     if (olocal_primal_size == pcbddc->local_primal_size) {
7403       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7404       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7405       if (!pcbddc->new_primal_space_local) {
7406         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7407         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7408       }
7409     }
7410     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7411     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7412   }
7413   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7414 
7415   /* flush dbg viewer */
7416   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7417 
7418   /* free workspace */
7419   PetscCall(PetscBTDestroy(&qr_needed_idx));
7420   PetscCall(PetscBTDestroy(&change_basis));
7421   if (!pcbddc->adaptive_selection) {
7422     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7423     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7424   } else {
7425     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7426     PetscCall(PetscFree(constraints_n));
7427     PetscCall(PetscFree(constraints_idxs_B));
7428   }
7429   PetscFunctionReturn(PETSC_SUCCESS);
7430 }
7431 
7432 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7433 {
7434   ISLocalToGlobalMapping map;
7435   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7436   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7437   PetscInt               i, N;
7438   PetscBool              rcsr = PETSC_FALSE;
7439 
7440   PetscFunctionBegin;
7441   if (pcbddc->recompute_topography) {
7442     pcbddc->graphanalyzed = PETSC_FALSE;
7443     /* Reset previously computed graph */
7444     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7445     /* Init local Graph struct */
7446     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7447     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7448     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7449 
7450     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7451     /* Check validity of the csr graph passed in by the user */
7452     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7453                pcbddc->mat_graph->nvtxs);
7454 
7455     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7456     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7457       PetscInt *xadj, *adjncy;
7458       PetscInt  nvtxs;
7459       PetscBool flg_row;
7460       Mat       A;
7461 
7462       PetscCall(PetscObjectReference((PetscObject)matis->A));
7463       A = matis->A;
7464       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7465         Mat AtA;
7466 
7467         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7468         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7469         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7470         PetscCall(MatProductSetFromOptions(AtA));
7471         PetscCall(MatProductSymbolic(AtA));
7472         PetscCall(MatProductClear(AtA));
7473         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7474         AtA->assembled = PETSC_TRUE;
7475         PetscCall(MatDestroy(&A));
7476         A = AtA;
7477       }
7478       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7479       if (flg_row) {
7480         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7481         pcbddc->computed_rowadj = PETSC_TRUE;
7482         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7483         rcsr = PETSC_TRUE;
7484       }
7485       PetscCall(MatDestroy(&A));
7486     }
7487     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7488 
7489     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7490       PetscReal   *lcoords;
7491       PetscInt     n;
7492       MPI_Datatype dimrealtype;
7493       PetscMPIInt  cdimi;
7494 
7495       /* TODO: support for blocked */
7496       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7497       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7498       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7499       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7500       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7501       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7502       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7503       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7504       PetscCallMPI(MPI_Type_free(&dimrealtype));
7505       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7506 
7507       pcbddc->mat_graph->coords = lcoords;
7508       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7509       pcbddc->mat_graph->cnloc  = n;
7510     }
7511     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7512                pcbddc->mat_graph->nvtxs);
7513     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7514 
7515     /* attach info on disconnected subdomains if present */
7516     if (pcbddc->n_local_subs) {
7517       PetscInt *local_subs, n, totn;
7518 
7519       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7520       PetscCall(PetscMalloc1(n, &local_subs));
7521       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7522       for (i = 0; i < pcbddc->n_local_subs; i++) {
7523         const PetscInt *idxs;
7524         PetscInt        nl, j;
7525 
7526         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7527         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7528         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7529         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7530       }
7531       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7532       pcbddc->mat_graph->n_local_subs = totn + 1;
7533       pcbddc->mat_graph->local_subs   = local_subs;
7534     }
7535 
7536     /* Setup of Graph */
7537     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7538   }
7539 
7540   if (!pcbddc->graphanalyzed) {
7541     /* Graph's connected components analysis */
7542     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7543     pcbddc->graphanalyzed   = PETSC_TRUE;
7544     pcbddc->corner_selected = pcbddc->corner_selection;
7545   }
7546   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7547   PetscFunctionReturn(PETSC_SUCCESS);
7548 }
7549 
7550 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7551 {
7552   PetscInt     i, j, n;
7553   PetscScalar *alphas;
7554   PetscReal    norm, *onorms;
7555 
7556   PetscFunctionBegin;
7557   n = *nio;
7558   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7559   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7560   PetscCall(VecNormalize(vecs[0], &norm));
7561   if (norm < PETSC_SMALL) {
7562     onorms[0] = 0.0;
7563     PetscCall(VecSet(vecs[0], 0.0));
7564   } else {
7565     onorms[0] = norm;
7566   }
7567 
7568   for (i = 1; i < n; i++) {
7569     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7570     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7571     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7572     PetscCall(VecNormalize(vecs[i], &norm));
7573     if (norm < PETSC_SMALL) {
7574       onorms[i] = 0.0;
7575       PetscCall(VecSet(vecs[i], 0.0));
7576     } else {
7577       onorms[i] = norm;
7578     }
7579   }
7580   /* push nonzero vectors at the beginning */
7581   for (i = 0; i < n; i++) {
7582     if (onorms[i] == 0.0) {
7583       for (j = i + 1; j < n; j++) {
7584         if (onorms[j] != 0.0) {
7585           PetscCall(VecCopy(vecs[j], vecs[i]));
7586           onorms[i] = onorms[j];
7587           onorms[j] = 0.0;
7588           break;
7589         }
7590       }
7591     }
7592   }
7593   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7594   PetscCall(PetscFree2(alphas, onorms));
7595   PetscFunctionReturn(PETSC_SUCCESS);
7596 }
7597 
7598 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7599 {
7600   ISLocalToGlobalMapping mapping;
7601   Mat                    A;
7602   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7603   PetscMPIInt            size, rank, color;
7604   PetscInt              *xadj, *adjncy;
7605   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7606   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7607   PetscInt               void_procs, *procs_candidates = NULL;
7608   PetscInt               xadj_count, *count;
7609   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7610   PetscSubcomm           psubcomm;
7611   MPI_Comm               subcomm;
7612 
7613   PetscFunctionBegin;
7614   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7615   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7616   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7617   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7618   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7619   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7620 
7621   if (have_void) *have_void = PETSC_FALSE;
7622   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7623   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7624   PetscCall(MatISGetLocalMat(mat, &A));
7625   PetscCall(MatGetLocalSize(A, &n, NULL));
7626   im_active = !!n;
7627   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7628   void_procs = size - active_procs;
7629   /* get ranks of non-active processes in mat communicator */
7630   if (void_procs) {
7631     PetscInt ncand;
7632 
7633     if (have_void) *have_void = PETSC_TRUE;
7634     PetscCall(PetscMalloc1(size, &procs_candidates));
7635     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7636     for (i = 0, ncand = 0; i < size; i++) {
7637       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7638     }
7639     /* force n_subdomains to be not greater that the number of non-active processes */
7640     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7641   }
7642 
7643   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7644      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7645   PetscCall(MatGetSize(mat, &N, NULL));
7646   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7647     PetscInt issize, isidx, dest;
7648     if (*n_subdomains == 1) dest = 0;
7649     else dest = rank;
7650     if (im_active) {
7651       issize = 1;
7652       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7653         isidx = procs_candidates[dest];
7654       } else {
7655         isidx = dest;
7656       }
7657     } else {
7658       issize = 0;
7659       isidx  = -1;
7660     }
7661     if (*n_subdomains != 1) *n_subdomains = active_procs;
7662     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7663     PetscCall(PetscFree(procs_candidates));
7664     PetscFunctionReturn(PETSC_SUCCESS);
7665   }
7666   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7667   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7668   threshold = PetscMax(threshold, 2);
7669 
7670   /* Get info on mapping */
7671   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7672   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7673 
7674   /* build local CSR graph of subdomains' connectivity */
7675   PetscCall(PetscMalloc1(2, &xadj));
7676   xadj[0] = 0;
7677   xadj[1] = PetscMax(n_neighs - 1, 0);
7678   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7679   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7680   PetscCall(PetscCalloc1(n, &count));
7681   for (i = 1; i < n_neighs; i++)
7682     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7683 
7684   xadj_count = 0;
7685   for (i = 1; i < n_neighs; i++) {
7686     for (j = 0; j < n_shared[i]; j++) {
7687       if (count[shared[i][j]] < threshold) {
7688         adjncy[xadj_count]     = neighs[i];
7689         adjncy_wgt[xadj_count] = n_shared[i];
7690         xadj_count++;
7691         break;
7692       }
7693     }
7694   }
7695   xadj[1] = xadj_count;
7696   PetscCall(PetscFree(count));
7697   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7698   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7699 
7700   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7701 
7702   /* Restrict work on active processes only */
7703   PetscCall(PetscMPIIntCast(im_active, &color));
7704   if (void_procs) {
7705     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7706     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7707     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7708     subcomm = PetscSubcommChild(psubcomm);
7709   } else {
7710     psubcomm = NULL;
7711     subcomm  = PetscObjectComm((PetscObject)mat);
7712   }
7713 
7714   v_wgt = NULL;
7715   if (!color) {
7716     PetscCall(PetscFree(xadj));
7717     PetscCall(PetscFree(adjncy));
7718     PetscCall(PetscFree(adjncy_wgt));
7719   } else {
7720     Mat             subdomain_adj;
7721     IS              new_ranks, new_ranks_contig;
7722     MatPartitioning partitioner;
7723     PetscInt        rstart, rend;
7724     PetscMPIInt     irstart = 0, irend = 0;
7725     PetscInt       *is_indices, *oldranks;
7726     PetscMPIInt     size;
7727     PetscBool       aggregate;
7728 
7729     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7730     if (void_procs) {
7731       PetscInt prank = rank;
7732       PetscCall(PetscMalloc1(size, &oldranks));
7733       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7734       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7735       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7736     } else {
7737       oldranks = NULL;
7738     }
7739     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7740     if (aggregate) { /* TODO: all this part could be made more efficient */
7741       PetscInt     lrows, row, ncols, *cols;
7742       PetscMPIInt  nrank;
7743       PetscScalar *vals;
7744 
7745       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7746       lrows = 0;
7747       if (nrank < redprocs) {
7748         lrows = size / redprocs;
7749         if (nrank < size % redprocs) lrows++;
7750       }
7751       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7752       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7753       PetscCall(PetscMPIIntCast(rstart, &irstart));
7754       PetscCall(PetscMPIIntCast(rend, &irend));
7755       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7756       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7757       row   = nrank;
7758       ncols = xadj[1] - xadj[0];
7759       cols  = adjncy;
7760       PetscCall(PetscMalloc1(ncols, &vals));
7761       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7762       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7763       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7764       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7765       PetscCall(PetscFree(xadj));
7766       PetscCall(PetscFree(adjncy));
7767       PetscCall(PetscFree(adjncy_wgt));
7768       PetscCall(PetscFree(vals));
7769       if (use_vwgt) {
7770         Vec                v;
7771         const PetscScalar *array;
7772         PetscInt           nl;
7773 
7774         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7775         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7776         PetscCall(VecAssemblyBegin(v));
7777         PetscCall(VecAssemblyEnd(v));
7778         PetscCall(VecGetLocalSize(v, &nl));
7779         PetscCall(VecGetArrayRead(v, &array));
7780         PetscCall(PetscMalloc1(nl, &v_wgt));
7781         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7782         PetscCall(VecRestoreArrayRead(v, &array));
7783         PetscCall(VecDestroy(&v));
7784       }
7785     } else {
7786       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7787       if (use_vwgt) {
7788         PetscCall(PetscMalloc1(1, &v_wgt));
7789         v_wgt[0] = n;
7790       }
7791     }
7792     /* PetscCall(MatView(subdomain_adj,0)); */
7793 
7794     /* Partition */
7795     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7796 #if defined(PETSC_HAVE_PTSCOTCH)
7797     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7798 #elif defined(PETSC_HAVE_PARMETIS)
7799     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7800 #else
7801     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7802 #endif
7803     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7804     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7805     *n_subdomains = PetscMin(size, *n_subdomains);
7806     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7807     PetscCall(MatPartitioningSetFromOptions(partitioner));
7808     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7809     /* PetscCall(MatPartitioningView(partitioner,0)); */
7810 
7811     /* renumber new_ranks to avoid "holes" in new set of processors */
7812     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7813     PetscCall(ISDestroy(&new_ranks));
7814     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7815     if (!aggregate) {
7816       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7817         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7818         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7819       } else if (oldranks) {
7820         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7821       } else {
7822         ranks_send_to_idx[0] = is_indices[0];
7823       }
7824     } else {
7825       PetscInt     idx = 0;
7826       PetscMPIInt  tag;
7827       MPI_Request *reqs;
7828 
7829       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7830       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7831       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7832       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7833       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7834       PetscCall(PetscFree(reqs));
7835       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7836         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7837         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7838       } else if (oldranks) {
7839         ranks_send_to_idx[0] = oldranks[idx];
7840       } else {
7841         ranks_send_to_idx[0] = idx;
7842       }
7843     }
7844     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7845     /* clean up */
7846     PetscCall(PetscFree(oldranks));
7847     PetscCall(ISDestroy(&new_ranks_contig));
7848     PetscCall(MatDestroy(&subdomain_adj));
7849     PetscCall(MatPartitioningDestroy(&partitioner));
7850   }
7851   PetscCall(PetscSubcommDestroy(&psubcomm));
7852   PetscCall(PetscFree(procs_candidates));
7853 
7854   /* assemble parallel IS for sends */
7855   i = 1;
7856   if (!color) i = 0;
7857   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7858   PetscFunctionReturn(PETSC_SUCCESS);
7859 }
7860 
7861 typedef enum {
7862   MATDENSE_PRIVATE = 0,
7863   MATAIJ_PRIVATE,
7864   MATBAIJ_PRIVATE,
7865   MATSBAIJ_PRIVATE
7866 } MatTypePrivate;
7867 
7868 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7869 {
7870   Mat                    local_mat;
7871   IS                     is_sends_internal;
7872   PetscInt               rows, cols, new_local_rows;
7873   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7874   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7875   ISLocalToGlobalMapping l2gmap;
7876   PetscInt              *l2gmap_indices;
7877   const PetscInt        *is_indices;
7878   MatType                new_local_type;
7879   /* buffers */
7880   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7881   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7882   PetscInt          *recv_buffer_idxs_local;
7883   PetscScalar       *ptr_vals, *recv_buffer_vals;
7884   const PetscScalar *send_buffer_vals;
7885   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7886   /* MPI */
7887   MPI_Comm     comm, comm_n;
7888   PetscSubcomm subcomm;
7889   PetscMPIInt  n_sends, n_recvs, size;
7890   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7891   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7892   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7893   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7894   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7895 
7896   PetscFunctionBegin;
7897   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7898   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7899   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7900   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7901   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7902   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7903   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7904   PetscValidLogicalCollectiveInt(mat, nis, 8);
7905   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7906   if (nvecs) {
7907     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7908     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7909   }
7910   /* further checks */
7911   PetscCall(MatISGetLocalMat(mat, &local_mat));
7912   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7913   /* XXX hack for multi_element */
7914   if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat));
7915   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7916   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7917 
7918   PetscCall(MatGetSize(local_mat, &rows, &cols));
7919   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7920   if (reuse && *mat_n) {
7921     PetscInt mrows, mcols, mnrows, mncols;
7922     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7923     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7924     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7925     PetscCall(MatGetSize(mat, &mrows, &mcols));
7926     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7927     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7928     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7929   }
7930   PetscCall(MatGetBlockSize(local_mat, &bs));
7931   PetscValidLogicalCollectiveInt(mat, bs, 1);
7932 
7933   /* prepare IS for sending if not provided */
7934   if (!is_sends) {
7935     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7936     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7937   } else {
7938     PetscCall(PetscObjectReference((PetscObject)is_sends));
7939     is_sends_internal = is_sends;
7940   }
7941 
7942   /* get comm */
7943   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7944 
7945   /* compute number of sends */
7946   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7947   PetscCall(PetscMPIIntCast(i, &n_sends));
7948 
7949   /* compute number of receives */
7950   PetscCallMPI(MPI_Comm_size(comm, &size));
7951   PetscCall(PetscMalloc1(size, &iflags));
7952   PetscCall(PetscArrayzero(iflags, size));
7953   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7954   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7955   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7956   PetscCall(PetscFree(iflags));
7957 
7958   /* restrict comm if requested */
7959   subcomm     = NULL;
7960   destroy_mat = PETSC_FALSE;
7961   if (restrict_comm) {
7962     PetscMPIInt color, subcommsize;
7963 
7964     color = 0;
7965     if (restrict_full) {
7966       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7967     } else {
7968       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7969     }
7970     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7971     subcommsize = size - subcommsize;
7972     /* check if reuse has been requested */
7973     if (reuse) {
7974       if (*mat_n) {
7975         PetscMPIInt subcommsize2;
7976         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7977         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7978         comm_n = PetscObjectComm((PetscObject)*mat_n);
7979       } else {
7980         comm_n = PETSC_COMM_SELF;
7981       }
7982     } else { /* MAT_INITIAL_MATRIX */
7983       PetscMPIInt rank;
7984 
7985       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7986       PetscCall(PetscSubcommCreate(comm, &subcomm));
7987       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7988       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7989       comm_n = PetscSubcommChild(subcomm);
7990     }
7991     /* flag to destroy *mat_n if not significative */
7992     if (color) destroy_mat = PETSC_TRUE;
7993   } else {
7994     comm_n = comm;
7995   }
7996 
7997   /* prepare send/receive buffers */
7998   PetscCall(PetscMalloc1(size, &ilengths_idxs));
7999   PetscCall(PetscArrayzero(ilengths_idxs, size));
8000   PetscCall(PetscMalloc1(size, &ilengths_vals));
8001   PetscCall(PetscArrayzero(ilengths_vals, size));
8002   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8003 
8004   /* Get data from local matrices */
8005   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8006   /* TODO: See below some guidelines on how to prepare the local buffers */
8007   /*
8008        send_buffer_vals should contain the raw values of the local matrix
8009        send_buffer_idxs should contain:
8010        - MatType_PRIVATE type
8011        - PetscInt        size_of_l2gmap
8012        - PetscInt        global_row_indices[size_of_l2gmap]
8013        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8014     */
8015   {
8016     ISLocalToGlobalMapping mapping;
8017 
8018     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8019     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8020     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8021     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8022     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8023     send_buffer_idxs[1] = i;
8024     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8025     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8026     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8027     PetscCall(PetscMPIIntCast(i, &len));
8028     for (i = 0; i < n_sends; i++) {
8029       ilengths_vals[is_indices[i]] = len * len;
8030       ilengths_idxs[is_indices[i]] = len + 2;
8031     }
8032   }
8033   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8034   /* additional is (if any) */
8035   if (nis) {
8036     PetscMPIInt psum;
8037     PetscInt    j;
8038     for (j = 0, psum = 0; j < nis; j++) {
8039       PetscInt plen;
8040       PetscCall(ISGetLocalSize(isarray[j], &plen));
8041       PetscCall(PetscMPIIntCast(plen, &len));
8042       psum += len + 1; /* indices + length */
8043     }
8044     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8045     for (j = 0, psum = 0; j < nis; j++) {
8046       PetscInt        plen;
8047       const PetscInt *is_array_idxs;
8048       PetscCall(ISGetLocalSize(isarray[j], &plen));
8049       send_buffer_idxs_is[psum] = plen;
8050       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8051       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8052       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8053       psum += plen + 1; /* indices + length */
8054     }
8055     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8056     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8057   }
8058   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8059 
8060   buf_size_idxs    = 0;
8061   buf_size_vals    = 0;
8062   buf_size_idxs_is = 0;
8063   buf_size_vecs    = 0;
8064   for (i = 0; i < n_recvs; i++) {
8065     buf_size_idxs += olengths_idxs[i];
8066     buf_size_vals += olengths_vals[i];
8067     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8068     if (nvecs) buf_size_vecs += olengths_idxs[i];
8069   }
8070   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8071   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8072   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8073   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8074 
8075   /* get new tags for clean communications */
8076   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8077   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8078   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8079   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8080 
8081   /* allocate for requests */
8082   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8083   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8084   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8085   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8086   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8087   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8088   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8089   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8090 
8091   /* communications */
8092   ptr_idxs    = recv_buffer_idxs;
8093   ptr_vals    = recv_buffer_vals;
8094   ptr_idxs_is = recv_buffer_idxs_is;
8095   ptr_vecs    = recv_buffer_vecs;
8096   for (i = 0; i < n_recvs; i++) {
8097     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8098     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8099     ptr_idxs += olengths_idxs[i];
8100     ptr_vals += olengths_vals[i];
8101     if (nis) {
8102       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8103       ptr_idxs_is += olengths_idxs_is[i];
8104     }
8105     if (nvecs) {
8106       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8107       ptr_vecs += olengths_idxs[i] - 2;
8108     }
8109   }
8110   for (i = 0; i < n_sends; i++) {
8111     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8112     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8113     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8114     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8115     if (nvecs) {
8116       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8117       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8118     }
8119   }
8120   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8121   PetscCall(ISDestroy(&is_sends_internal));
8122 
8123   /* assemble new l2g map */
8124   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8125   ptr_idxs       = recv_buffer_idxs;
8126   new_local_rows = 0;
8127   for (i = 0; i < n_recvs; i++) {
8128     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8129     ptr_idxs += olengths_idxs[i];
8130   }
8131   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8132   ptr_idxs       = recv_buffer_idxs;
8133   new_local_rows = 0;
8134   for (i = 0; i < n_recvs; i++) {
8135     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8136     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8137     ptr_idxs += olengths_idxs[i];
8138   }
8139   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8140   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8141   PetscCall(PetscFree(l2gmap_indices));
8142 
8143   /* infer new local matrix type from received local matrices type */
8144   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8145   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8146   if (n_recvs) {
8147     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8148     ptr_idxs                              = recv_buffer_idxs;
8149     for (i = 0; i < n_recvs; i++) {
8150       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8151         new_local_type_private = MATAIJ_PRIVATE;
8152         break;
8153       }
8154       ptr_idxs += olengths_idxs[i];
8155     }
8156     switch (new_local_type_private) {
8157     case MATDENSE_PRIVATE:
8158       new_local_type = MATSEQAIJ;
8159       bs             = 1;
8160       break;
8161     case MATAIJ_PRIVATE:
8162       new_local_type = MATSEQAIJ;
8163       bs             = 1;
8164       break;
8165     case MATBAIJ_PRIVATE:
8166       new_local_type = MATSEQBAIJ;
8167       break;
8168     case MATSBAIJ_PRIVATE:
8169       new_local_type = MATSEQSBAIJ;
8170       break;
8171     default:
8172       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8173     }
8174   } else { /* by default, new_local_type is seqaij */
8175     new_local_type = MATSEQAIJ;
8176     bs             = 1;
8177   }
8178 
8179   /* create MATIS object if needed */
8180   if (!reuse) {
8181     PetscCall(MatGetSize(mat, &rows, &cols));
8182     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8183   } else {
8184     /* it also destroys the local matrices */
8185     if (*mat_n) {
8186       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8187     } else { /* this is a fake object */
8188       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8189     }
8190   }
8191   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8192   PetscCall(MatSetType(local_mat, new_local_type));
8193 
8194   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8195 
8196   /* Global to local map of received indices */
8197   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8198   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8199   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8200 
8201   /* restore attributes -> type of incoming data and its size */
8202   buf_size_idxs = 0;
8203   for (i = 0; i < n_recvs; i++) {
8204     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8205     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8206     buf_size_idxs += olengths_idxs[i];
8207   }
8208   PetscCall(PetscFree(recv_buffer_idxs));
8209 
8210   /* set preallocation */
8211   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8212   if (!newisdense) {
8213     PetscInt *new_local_nnz = NULL;
8214 
8215     ptr_idxs = recv_buffer_idxs_local;
8216     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8217     for (i = 0; i < n_recvs; i++) {
8218       PetscInt j;
8219       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8220         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8221       } else {
8222         /* TODO */
8223       }
8224       ptr_idxs += olengths_idxs[i];
8225     }
8226     if (new_local_nnz) {
8227       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8228       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8229       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8230       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8231       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8232       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8233     } else {
8234       PetscCall(MatSetUp(local_mat));
8235     }
8236     PetscCall(PetscFree(new_local_nnz));
8237   } else {
8238     PetscCall(MatSetUp(local_mat));
8239   }
8240 
8241   /* set values */
8242   ptr_vals = recv_buffer_vals;
8243   ptr_idxs = recv_buffer_idxs_local;
8244   for (i = 0; i < n_recvs; i++) {
8245     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8246       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8247       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8248       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8249       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8250       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8251     } else {
8252       /* TODO */
8253     }
8254     ptr_idxs += olengths_idxs[i];
8255     ptr_vals += olengths_vals[i];
8256   }
8257   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8258   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8259   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8260   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8261   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8262   PetscCall(PetscFree(recv_buffer_vals));
8263 
8264 #if 0
8265   if (!restrict_comm) { /* check */
8266     Vec       lvec,rvec;
8267     PetscReal infty_error;
8268 
8269     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8270     PetscCall(VecSetRandom(rvec,NULL));
8271     PetscCall(MatMult(mat,rvec,lvec));
8272     PetscCall(VecScale(lvec,-1.0));
8273     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8274     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8275     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8276     PetscCall(VecDestroy(&rvec));
8277     PetscCall(VecDestroy(&lvec));
8278   }
8279 #endif
8280 
8281   /* assemble new additional is (if any) */
8282   if (nis) {
8283     PetscInt **temp_idxs, *count_is, j, psum;
8284 
8285     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8286     PetscCall(PetscCalloc1(nis, &count_is));
8287     ptr_idxs = recv_buffer_idxs_is;
8288     psum     = 0;
8289     for (i = 0; i < n_recvs; i++) {
8290       for (j = 0; j < nis; j++) {
8291         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8292         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8293         psum += plen;
8294         ptr_idxs += plen + 1; /* shift pointer to received data */
8295       }
8296     }
8297     PetscCall(PetscMalloc1(nis, &temp_idxs));
8298     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8299     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8300     PetscCall(PetscArrayzero(count_is, nis));
8301     ptr_idxs = recv_buffer_idxs_is;
8302     for (i = 0; i < n_recvs; i++) {
8303       for (j = 0; j < nis; j++) {
8304         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8305         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8306         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8307         ptr_idxs += plen + 1; /* shift pointer to received data */
8308       }
8309     }
8310     for (i = 0; i < nis; i++) {
8311       PetscCall(ISDestroy(&isarray[i]));
8312       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8313       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8314     }
8315     PetscCall(PetscFree(count_is));
8316     PetscCall(PetscFree(temp_idxs[0]));
8317     PetscCall(PetscFree(temp_idxs));
8318   }
8319   /* free workspace */
8320   PetscCall(PetscFree(recv_buffer_idxs_is));
8321   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8322   PetscCall(PetscFree(send_buffer_idxs));
8323   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8324   if (isdense) {
8325     PetscCall(MatISGetLocalMat(mat, &local_mat));
8326     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8327     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8328   } else {
8329     /* PetscCall(PetscFree(send_buffer_vals)); */
8330   }
8331   if (nis) {
8332     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8333     PetscCall(PetscFree(send_buffer_idxs_is));
8334   }
8335 
8336   if (nvecs) {
8337     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8338     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8339     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8340     PetscCall(VecDestroy(&nnsp_vec[0]));
8341     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8342     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8343     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8344     /* set values */
8345     ptr_vals = recv_buffer_vecs;
8346     ptr_idxs = recv_buffer_idxs_local;
8347     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8348     for (i = 0; i < n_recvs; i++) {
8349       PetscInt j;
8350       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8351       ptr_idxs += olengths_idxs[i];
8352       ptr_vals += olengths_idxs[i] - 2;
8353     }
8354     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8355     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8356     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8357   }
8358 
8359   PetscCall(PetscFree(recv_buffer_vecs));
8360   PetscCall(PetscFree(recv_buffer_idxs_local));
8361   PetscCall(PetscFree(recv_req_idxs));
8362   PetscCall(PetscFree(recv_req_vals));
8363   PetscCall(PetscFree(recv_req_vecs));
8364   PetscCall(PetscFree(recv_req_idxs_is));
8365   PetscCall(PetscFree(send_req_idxs));
8366   PetscCall(PetscFree(send_req_vals));
8367   PetscCall(PetscFree(send_req_vecs));
8368   PetscCall(PetscFree(send_req_idxs_is));
8369   PetscCall(PetscFree(ilengths_vals));
8370   PetscCall(PetscFree(ilengths_idxs));
8371   PetscCall(PetscFree(olengths_vals));
8372   PetscCall(PetscFree(olengths_idxs));
8373   PetscCall(PetscFree(onodes));
8374   if (nis) {
8375     PetscCall(PetscFree(ilengths_idxs_is));
8376     PetscCall(PetscFree(olengths_idxs_is));
8377     PetscCall(PetscFree(onodes_is));
8378   }
8379   PetscCall(PetscSubcommDestroy(&subcomm));
8380   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8381     PetscCall(MatDestroy(mat_n));
8382     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8383     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8384       PetscCall(VecDestroy(&nnsp_vec[0]));
8385     }
8386     *mat_n = NULL;
8387   }
8388   PetscFunctionReturn(PETSC_SUCCESS);
8389 }
8390 
8391 /* temporary hack into ksp private data structure */
8392 #include <petsc/private/kspimpl.h>
8393 
8394 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8395 {
8396   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8397   PC_IS                 *pcis   = (PC_IS *)pc->data;
8398   PCBDDCGraph            graph  = pcbddc->mat_graph;
8399   Mat                    coarse_mat, coarse_mat_is;
8400   Mat                    coarsedivudotp = NULL;
8401   Mat                    coarseG, t_coarse_mat_is;
8402   MatNullSpace           CoarseNullSpace = NULL;
8403   ISLocalToGlobalMapping coarse_islg;
8404   IS                     coarse_is, *isarray, corners;
8405   PetscInt               i, im_active = -1, active_procs = -1;
8406   PetscInt               nis, nisdofs, nisneu, nisvert;
8407   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8408   PC                     pc_temp;
8409   PCType                 coarse_pc_type;
8410   KSPType                coarse_ksp_type;
8411   PetscBool              multilevel_requested, multilevel_allowed;
8412   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8413   PetscInt               ncoarse, nedcfield;
8414   PetscBool              compute_vecs = PETSC_FALSE;
8415   PetscScalar           *array;
8416   MatReuse               coarse_mat_reuse;
8417   PetscBool              restr, full_restr, have_void;
8418   PetscMPIInt            size;
8419 
8420   PetscFunctionBegin;
8421   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8422   /* Assign global numbering to coarse dofs */
8423   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8424     PetscInt ocoarse_size;
8425     compute_vecs = PETSC_TRUE;
8426 
8427     pcbddc->new_primal_space = PETSC_TRUE;
8428     ocoarse_size             = pcbddc->coarse_size;
8429     PetscCall(PetscFree(pcbddc->global_primal_indices));
8430     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8431     /* see if we can avoid some work */
8432     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8433       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8434       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8435         PetscCall(KSPReset(pcbddc->coarse_ksp));
8436         coarse_reuse = PETSC_FALSE;
8437       } else { /* we can safely reuse already computed coarse matrix */
8438         coarse_reuse = PETSC_TRUE;
8439       }
8440     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8441       coarse_reuse = PETSC_FALSE;
8442     }
8443     /* reset any subassembling information */
8444     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8445   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8446     coarse_reuse = PETSC_TRUE;
8447   }
8448   if (coarse_reuse && pcbddc->coarse_ksp) {
8449     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8450     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8451     coarse_mat_reuse = MAT_REUSE_MATRIX;
8452   } else {
8453     coarse_mat       = NULL;
8454     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8455   }
8456 
8457   /* creates temporary l2gmap and IS for coarse indexes */
8458   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8459   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8460 
8461   /* creates temporary MATIS object for coarse matrix */
8462   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8463   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8464   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8465   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE));
8466   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8467   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8468   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8469   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8470   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8471 
8472   /* count "active" (i.e. with positive local size) and "void" processes */
8473   im_active = !!pcis->n;
8474   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8475 
8476   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8477   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8478   /* full_restr : just use the receivers from the subassembling pattern */
8479   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8480   coarse_mat_is        = NULL;
8481   multilevel_allowed   = PETSC_FALSE;
8482   multilevel_requested = PETSC_FALSE;
8483   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8484   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8485   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8486   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8487   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8488   if (multilevel_requested) {
8489     ncoarse    = active_procs / coarsening_ratio;
8490     restr      = PETSC_FALSE;
8491     full_restr = PETSC_FALSE;
8492   } else {
8493     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8494     restr      = PETSC_TRUE;
8495     full_restr = PETSC_TRUE;
8496   }
8497   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8498   ncoarse = PetscMax(1, ncoarse);
8499   if (!pcbddc->coarse_subassembling) {
8500     if (coarsening_ratio > 1) {
8501       if (multilevel_requested) {
8502         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8503       } else {
8504         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8505       }
8506     } else {
8507       PetscMPIInt rank;
8508 
8509       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8510       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8511       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8512     }
8513   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8514     PetscInt psum;
8515     if (pcbddc->coarse_ksp) psum = 1;
8516     else psum = 0;
8517     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8518     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8519   }
8520   /* determine if we can go multilevel */
8521   if (multilevel_requested) {
8522     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8523     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8524   }
8525   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8526 
8527   /* dump subassembling pattern */
8528   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8529   /* compute dofs splitting and neumann boundaries for coarse dofs */
8530   nedcfield = -1;
8531   corners   = NULL;
8532   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8533     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8534     const PetscInt        *idxs;
8535     ISLocalToGlobalMapping tmap;
8536 
8537     /* create map between primal indices (in local representative ordering) and local primal numbering */
8538     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8539     /* allocate space for temporary storage */
8540     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8541     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8542     /* allocate for IS array */
8543     nisdofs = pcbddc->n_ISForDofsLocal;
8544     if (pcbddc->nedclocal) {
8545       if (pcbddc->nedfield > -1) {
8546         nedcfield = pcbddc->nedfield;
8547       } else {
8548         nedcfield = 0;
8549         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8550         nisdofs = 1;
8551       }
8552     }
8553     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8554     nisvert = 0; /* nisvert is not used */
8555     nis     = nisdofs + nisneu + nisvert;
8556     PetscCall(PetscMalloc1(nis, &isarray));
8557     /* dofs splitting */
8558     for (i = 0; i < nisdofs; i++) {
8559       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8560       if (nedcfield != i) {
8561         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8562         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8563         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8564         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8565       } else {
8566         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8567         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8568         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8569         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8570         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8571       }
8572       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8573       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8574       /* PetscCall(ISView(isarray[i],0)); */
8575     }
8576     /* neumann boundaries */
8577     if (pcbddc->NeumannBoundariesLocal) {
8578       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8579       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8580       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8581       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8582       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8583       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8584       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8585       /* PetscCall(ISView(isarray[nisdofs],0)); */
8586     }
8587     /* coordinates */
8588     if (pcbddc->corner_selected) {
8589       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8590       PetscCall(ISGetLocalSize(corners, &tsize));
8591       PetscCall(ISGetIndices(corners, &idxs));
8592       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8593       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8594       PetscCall(ISRestoreIndices(corners, &idxs));
8595       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8596       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8597       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8598     }
8599     PetscCall(PetscFree(tidxs));
8600     PetscCall(PetscFree(tidxs2));
8601     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8602   } else {
8603     nis     = 0;
8604     nisdofs = 0;
8605     nisneu  = 0;
8606     nisvert = 0;
8607     isarray = NULL;
8608   }
8609   /* destroy no longer needed map */
8610   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8611 
8612   /* subassemble */
8613   if (multilevel_allowed) {
8614     Vec       vp[1];
8615     PetscInt  nvecs = 0;
8616     PetscBool reuse;
8617 
8618     vp[0] = NULL;
8619     /* XXX HDIV also */
8620     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8621       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8622       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8623       PetscCall(VecSetType(vp[0], VECSTANDARD));
8624       nvecs = 1;
8625 
8626       if (pcbddc->divudotp) {
8627         Mat      B, loc_divudotp;
8628         Vec      v, p;
8629         IS       dummy;
8630         PetscInt np;
8631 
8632         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8633         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8634         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8635         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8636         PetscCall(MatCreateVecs(B, &v, &p));
8637         PetscCall(VecSet(p, 1.));
8638         PetscCall(MatMultTranspose(B, p, v));
8639         PetscCall(VecDestroy(&p));
8640         PetscCall(MatDestroy(&B));
8641         PetscCall(VecGetArray(vp[0], &array));
8642         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8643         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8644         PetscCall(VecResetArray(pcbddc->vec1_P));
8645         PetscCall(VecRestoreArray(vp[0], &array));
8646         PetscCall(ISDestroy(&dummy));
8647         PetscCall(VecDestroy(&v));
8648       }
8649     }
8650     if (coarse_mat) reuse = PETSC_TRUE;
8651     else reuse = PETSC_FALSE;
8652     if (multi_element) {
8653       /* XXX divudotp */
8654       PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE));
8655       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8656       coarse_mat_is = t_coarse_mat_is;
8657     } else {
8658       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8659       if (reuse) {
8660         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8661       } else {
8662         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8663       }
8664       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8665         PetscScalar       *arraym;
8666         const PetscScalar *arrayv;
8667         PetscInt           nl;
8668         PetscCall(VecGetLocalSize(vp[0], &nl));
8669         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8670         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8671         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8672         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8673         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8674         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8675         PetscCall(VecDestroy(&vp[0]));
8676       } else {
8677         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8678       }
8679     }
8680   } else {
8681     if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8682     else {
8683       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8684       coarse_mat_is = t_coarse_mat_is;
8685     }
8686   }
8687   if (coarse_mat_is || coarse_mat) {
8688     if (!multilevel_allowed) {
8689       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8690     } else {
8691       /* if this matrix is present, it means we are not reusing the coarse matrix */
8692       if (coarse_mat_is) {
8693         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8694         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8695         coarse_mat = coarse_mat_is;
8696       }
8697     }
8698   }
8699   PetscCall(MatDestroy(&t_coarse_mat_is));
8700   PetscCall(MatDestroy(&coarse_mat_is));
8701 
8702   /* create local to global scatters for coarse problem */
8703   if (compute_vecs) {
8704     PetscInt lrows;
8705     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8706     if (coarse_mat) {
8707       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8708     } else {
8709       lrows = 0;
8710     }
8711     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8712     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8713     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8714     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8715     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8716   }
8717   PetscCall(ISDestroy(&coarse_is));
8718 
8719   /* set defaults for coarse KSP and PC */
8720   if (multilevel_allowed) {
8721     coarse_ksp_type = KSPRICHARDSON;
8722     coarse_pc_type  = PCBDDC;
8723   } else {
8724     coarse_ksp_type = KSPPREONLY;
8725     coarse_pc_type  = PCREDUNDANT;
8726   }
8727 
8728   /* print some info if requested */
8729   if (pcbddc->dbg_flag) {
8730     if (!multilevel_allowed) {
8731       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8732       if (multilevel_requested) {
8733         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8734       } else if (pcbddc->max_levels) {
8735         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8736       }
8737       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8738     }
8739   }
8740 
8741   /* communicate coarse discrete gradient */
8742   coarseG = NULL;
8743   if (pcbddc->nedcG && multilevel_allowed) {
8744     MPI_Comm ccomm;
8745     if (coarse_mat) {
8746       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8747     } else {
8748       ccomm = MPI_COMM_NULL;
8749     }
8750     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8751   }
8752 
8753   /* create the coarse KSP object only once with defaults */
8754   if (coarse_mat) {
8755     PetscBool   isredundant, isbddc, force, valid;
8756     PetscViewer dbg_viewer = NULL;
8757     PetscBool   isset, issym, isher, isspd;
8758 
8759     if (pcbddc->dbg_flag) {
8760       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8761       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8762     }
8763     if (!pcbddc->coarse_ksp) {
8764       char   prefix[256], str_level[16];
8765       size_t len;
8766 
8767       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8768       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8769       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8770       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8771       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8772       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8773       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8774       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8775       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8776       /* TODO is this logic correct? should check for coarse_mat type */
8777       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8778       /* prefix */
8779       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8780       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8781       if (!pcbddc->current_level) {
8782         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8783         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8784       } else {
8785         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8786         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8787         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8788         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8789         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8790         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8791         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8792       }
8793       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8794       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8795       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8796       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8797       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8798       /* allow user customization */
8799       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8800       /* get some info after set from options */
8801       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8802       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8803       force = PETSC_FALSE;
8804       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8805       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8806       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8807       if (multilevel_allowed && !force && !valid) {
8808         isbddc = PETSC_TRUE;
8809         PetscCall(PCSetType(pc_temp, PCBDDC));
8810         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8811         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8812         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8813         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8814           PetscObjectOptionsBegin((PetscObject)pc_temp);
8815           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8816           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8817           PetscOptionsEnd();
8818           pc_temp->setfromoptionscalled++;
8819         }
8820       }
8821     }
8822     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8823     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8824     if (nisdofs) {
8825       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8826       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8827     }
8828     if (nisneu) {
8829       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8830       PetscCall(ISDestroy(&isarray[nisdofs]));
8831     }
8832     if (nisvert) {
8833       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8834       PetscCall(ISDestroy(&isarray[nis - 1]));
8835     }
8836     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8837 
8838     /* get some info after set from options */
8839     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8840 
8841     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8842     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8843     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8844     force = PETSC_FALSE;
8845     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8846     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8847     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8848     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8849     if (isredundant) {
8850       KSP inner_ksp;
8851       PC  inner_pc;
8852 
8853       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8854       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8855     }
8856 
8857     /* parameters which miss an API */
8858     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8859     if (isbddc) {
8860       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8861 
8862       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8863       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8864       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8865       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8866       if (pcbddc_coarse->benign_saddle_point) {
8867         Mat                    coarsedivudotp_is;
8868         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8869         IS                     row, col;
8870         const PetscInt        *gidxs;
8871         PetscInt               n, st, M, N;
8872 
8873         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8874         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8875         st = st - n;
8876         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8877         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8878         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8879         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8880         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8881         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8882         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8883         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8884         PetscCall(ISGetSize(row, &M));
8885         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8886         PetscCall(ISDestroy(&row));
8887         PetscCall(ISDestroy(&col));
8888         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8889         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8890         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8891         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8892         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8893         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8894         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8895         PetscCall(MatDestroy(&coarsedivudotp));
8896         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8897         PetscCall(MatDestroy(&coarsedivudotp_is));
8898         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8899         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8900       }
8901     }
8902 
8903     /* propagate symmetry info of coarse matrix */
8904     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8905     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8906     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8907     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8908     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8909     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8910     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8911 
8912     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8913     /* set operators */
8914     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8915     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8916     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8917     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8918   }
8919   PetscCall(MatDestroy(&coarseG));
8920   PetscCall(PetscFree(isarray));
8921 #if 0
8922   {
8923     PetscViewer viewer;
8924     char filename[256];
8925     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8926     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8927     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8928     PetscCall(MatView(coarse_mat,viewer));
8929     PetscCall(PetscViewerPopFormat(viewer));
8930     PetscCall(PetscViewerDestroy(&viewer));
8931   }
8932 #endif
8933 
8934   if (corners) {
8935     Vec             gv;
8936     IS              is;
8937     const PetscInt *idxs;
8938     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8939     PetscScalar    *coords;
8940 
8941     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8942     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8943     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8944     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8945     PetscCall(VecSetBlockSize(gv, cdim));
8946     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8947     PetscCall(VecSetType(gv, VECSTANDARD));
8948     PetscCall(VecSetFromOptions(gv));
8949     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8950 
8951     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8952     PetscCall(ISGetLocalSize(is, &n));
8953     PetscCall(ISGetIndices(is, &idxs));
8954     PetscCall(PetscMalloc1(n * cdim, &coords));
8955     for (i = 0; i < n; i++) {
8956       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8957     }
8958     PetscCall(ISRestoreIndices(is, &idxs));
8959     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8960 
8961     PetscCall(ISGetLocalSize(corners, &n));
8962     PetscCall(ISGetIndices(corners, &idxs));
8963     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8964     PetscCall(ISRestoreIndices(corners, &idxs));
8965     PetscCall(PetscFree(coords));
8966     PetscCall(VecAssemblyBegin(gv));
8967     PetscCall(VecAssemblyEnd(gv));
8968     PetscCall(VecGetArray(gv, &coords));
8969     if (pcbddc->coarse_ksp) {
8970       PC        coarse_pc;
8971       PetscBool isbddc;
8972 
8973       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8974       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8975       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8976         PetscReal *realcoords;
8977 
8978         PetscCall(VecGetLocalSize(gv, &n));
8979 #if defined(PETSC_USE_COMPLEX)
8980         PetscCall(PetscMalloc1(n, &realcoords));
8981         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8982 #else
8983         realcoords = coords;
8984 #endif
8985         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8986 #if defined(PETSC_USE_COMPLEX)
8987         PetscCall(PetscFree(realcoords));
8988 #endif
8989       }
8990     }
8991     PetscCall(VecRestoreArray(gv, &coords));
8992     PetscCall(VecDestroy(&gv));
8993   }
8994   PetscCall(ISDestroy(&corners));
8995 
8996   if (pcbddc->coarse_ksp) {
8997     Vec crhs, csol;
8998 
8999     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9000     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9001     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9002     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9003   }
9004   PetscCall(MatDestroy(&coarsedivudotp));
9005 
9006   /* compute null space for coarse solver if the benign trick has been requested */
9007   if (pcbddc->benign_null) {
9008     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9009     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9010     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9011     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9012     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9013     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9014     if (coarse_mat) {
9015       Vec          nullv;
9016       PetscScalar *array, *array2;
9017       PetscInt     nl;
9018 
9019       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9020       PetscCall(VecGetLocalSize(nullv, &nl));
9021       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9022       PetscCall(VecGetArray(nullv, &array2));
9023       PetscCall(PetscArraycpy(array2, array, nl));
9024       PetscCall(VecRestoreArray(nullv, &array2));
9025       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9026       PetscCall(VecNormalize(nullv, NULL));
9027       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9028       PetscCall(VecDestroy(&nullv));
9029     }
9030   }
9031   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9032 
9033   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9034   if (pcbddc->coarse_ksp) {
9035     PetscBool ispreonly;
9036 
9037     if (CoarseNullSpace) {
9038       PetscBool isnull;
9039 
9040       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9041       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9042       /* TODO: add local nullspaces (if any) */
9043     }
9044     /* setup coarse ksp */
9045     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9046     /* Check coarse problem if in debug mode or if solving with an iterative method */
9047     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9048     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9049       KSP         check_ksp;
9050       KSPType     check_ksp_type;
9051       PC          check_pc;
9052       Vec         check_vec, coarse_vec;
9053       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9054       PetscInt    its;
9055       PetscBool   compute_eigs;
9056       PetscReal  *eigs_r, *eigs_c;
9057       PetscInt    neigs;
9058       const char *prefix;
9059 
9060       /* Create ksp object suitable for estimation of extreme eigenvalues */
9061       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9062       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9063       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9064       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9065       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9066       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9067       /* prevent from setup unneeded object */
9068       PetscCall(KSPGetPC(check_ksp, &check_pc));
9069       PetscCall(PCSetType(check_pc, PCNONE));
9070       if (ispreonly) {
9071         check_ksp_type = KSPPREONLY;
9072         compute_eigs   = PETSC_FALSE;
9073       } else {
9074         check_ksp_type = KSPGMRES;
9075         compute_eigs   = PETSC_TRUE;
9076       }
9077       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9078       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9079       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9080       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9081       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9082       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9083       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9084       PetscCall(KSPSetFromOptions(check_ksp));
9085       PetscCall(KSPSetUp(check_ksp));
9086       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9087       PetscCall(KSPSetPC(check_ksp, check_pc));
9088       /* create random vec */
9089       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9090       PetscCall(VecSetRandom(check_vec, NULL));
9091       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9092       /* solve coarse problem */
9093       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9094       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9095       /* set eigenvalue estimation if preonly has not been requested */
9096       if (compute_eigs) {
9097         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9098         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9099         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9100         if (neigs) {
9101           lambda_max = eigs_r[neigs - 1];
9102           lambda_min = eigs_r[0];
9103           if (pcbddc->use_coarse_estimates) {
9104             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9105               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9106               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9107             }
9108           }
9109         }
9110       }
9111 
9112       /* check coarse problem residual error */
9113       if (pcbddc->dbg_flag) {
9114         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9115         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9116         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9117         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9118         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9119         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9120         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9121         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9122         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9123         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9124         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9125         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9126         if (compute_eigs) {
9127           PetscReal          lambda_max_s, lambda_min_s;
9128           KSPConvergedReason reason;
9129           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9130           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9131           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9132           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9133           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9134           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9135         }
9136         PetscCall(PetscViewerFlush(dbg_viewer));
9137         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9138       }
9139       PetscCall(VecDestroy(&check_vec));
9140       PetscCall(VecDestroy(&coarse_vec));
9141       PetscCall(KSPDestroy(&check_ksp));
9142       if (compute_eigs) {
9143         PetscCall(PetscFree(eigs_r));
9144         PetscCall(PetscFree(eigs_c));
9145       }
9146     }
9147   }
9148   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9149   /* print additional info */
9150   if (pcbddc->dbg_flag) {
9151     /* waits until all processes reaches this point */
9152     PetscCall(PetscBarrier((PetscObject)pc));
9153     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9154     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9155   }
9156 
9157   /* free memory */
9158   PetscCall(MatDestroy(&coarse_mat));
9159   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9160   PetscFunctionReturn(PETSC_SUCCESS);
9161 }
9162 
9163 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9164 {
9165   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9166   PC_IS          *pcis   = (PC_IS *)pc->data;
9167   IS              subset, subset_mult, subset_n;
9168   PetscInt        local_size, coarse_size = 0;
9169   PetscInt       *local_primal_indices = NULL;
9170   const PetscInt *t_local_primal_indices;
9171 
9172   PetscFunctionBegin;
9173   /* Compute global number of coarse dofs */
9174   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9175   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9176   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9177   PetscCall(ISDestroy(&subset_n));
9178   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9179   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9180   PetscCall(ISDestroy(&subset));
9181   PetscCall(ISDestroy(&subset_mult));
9182   PetscCall(ISGetLocalSize(subset_n, &local_size));
9183   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9184   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9185   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9186   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9187   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9188   PetscCall(ISDestroy(&subset_n));
9189 
9190   if (pcbddc->dbg_flag) {
9191     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9192     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9193     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9194     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9195   }
9196 
9197   /* get back data */
9198   *coarse_size_n          = coarse_size;
9199   *local_primal_indices_n = local_primal_indices;
9200   PetscFunctionReturn(PETSC_SUCCESS);
9201 }
9202 
9203 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9204 {
9205   IS           localis_t;
9206   PetscInt     i, lsize, *idxs, n;
9207   PetscScalar *vals;
9208 
9209   PetscFunctionBegin;
9210   /* get indices in local ordering exploiting local to global map */
9211   PetscCall(ISGetLocalSize(globalis, &lsize));
9212   PetscCall(PetscMalloc1(lsize, &vals));
9213   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9214   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9215   PetscCall(VecSet(gwork, 0.0));
9216   PetscCall(VecSet(lwork, 0.0));
9217   if (idxs) { /* multilevel guard */
9218     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9219     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9220   }
9221   PetscCall(VecAssemblyBegin(gwork));
9222   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9223   PetscCall(PetscFree(vals));
9224   PetscCall(VecAssemblyEnd(gwork));
9225   /* now compute set in local ordering */
9226   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9227   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9228   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9229   PetscCall(VecGetSize(lwork, &n));
9230   for (i = 0, lsize = 0; i < n; i++) {
9231     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9232   }
9233   PetscCall(PetscMalloc1(lsize, &idxs));
9234   for (i = 0, lsize = 0; i < n; i++) {
9235     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9236   }
9237   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9238   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9239   *localis = localis_t;
9240   PetscFunctionReturn(PETSC_SUCCESS);
9241 }
9242 
9243 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9244 {
9245   PC_IS   *pcis   = (PC_IS *)pc->data;
9246   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9247   PC_IS   *pcisf;
9248   PC_BDDC *pcbddcf;
9249   PC       pcf;
9250 
9251   PetscFunctionBegin;
9252   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9253   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9254   PetscCall(PCSetType(pcf, PCBDDC));
9255 
9256   pcisf   = (PC_IS *)pcf->data;
9257   pcbddcf = (PC_BDDC *)pcf->data;
9258 
9259   pcisf->is_B_local = pcis->is_B_local;
9260   pcisf->vec1_N     = pcis->vec1_N;
9261   pcisf->BtoNmap    = pcis->BtoNmap;
9262   pcisf->n          = pcis->n;
9263   pcisf->n_B        = pcis->n_B;
9264 
9265   PetscCall(PetscFree(pcbddcf->mat_graph));
9266   PetscCall(PetscFree(pcbddcf->sub_schurs));
9267   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9268   pcbddcf->sub_schurs            = schurs;
9269   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9270   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9271   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9272   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9273   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9274   pcbddcf->use_faces             = PETSC_TRUE;
9275   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9276   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9277   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9278   pcbddcf->fake_change           = PETSC_TRUE;
9279   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9280 
9281   PetscCall(PCBDDCAdaptiveSelection(pcf));
9282   PetscCall(PCBDDCConstraintsSetUp(pcf));
9283 
9284   *change = pcbddcf->ConstraintMatrix;
9285   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9286   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9287   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9288 
9289   if (schurs) pcbddcf->sub_schurs = NULL;
9290   pcbddcf->ConstraintMatrix = NULL;
9291   pcbddcf->mat_graph        = NULL;
9292   pcisf->is_B_local         = NULL;
9293   pcisf->vec1_N             = NULL;
9294   pcisf->BtoNmap            = NULL;
9295   PetscCall(PCDestroy(&pcf));
9296   PetscFunctionReturn(PETSC_SUCCESS);
9297 }
9298 
9299 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9300 {
9301   PC_IS          *pcis       = (PC_IS *)pc->data;
9302   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9303   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9304   Mat             S_j;
9305   PetscInt       *used_xadj, *used_adjncy;
9306   PetscBool       free_used_adj;
9307 
9308   PetscFunctionBegin;
9309   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9310   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9311   free_used_adj = PETSC_FALSE;
9312   if (pcbddc->sub_schurs_layers == -1) {
9313     used_xadj   = NULL;
9314     used_adjncy = NULL;
9315   } else {
9316     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9317       used_xadj   = pcbddc->mat_graph->xadj;
9318       used_adjncy = pcbddc->mat_graph->adjncy;
9319     } else if (pcbddc->computed_rowadj) {
9320       used_xadj   = pcbddc->mat_graph->xadj;
9321       used_adjncy = pcbddc->mat_graph->adjncy;
9322     } else {
9323       PetscBool       flg_row = PETSC_FALSE;
9324       const PetscInt *xadj, *adjncy;
9325       PetscInt        nvtxs;
9326 
9327       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9328       if (flg_row) {
9329         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9330         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9331         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9332         free_used_adj = PETSC_TRUE;
9333       } else {
9334         pcbddc->sub_schurs_layers = -1;
9335         used_xadj                 = NULL;
9336         used_adjncy               = NULL;
9337       }
9338       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9339     }
9340   }
9341 
9342   /* setup sub_schurs data */
9343   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9344   if (!sub_schurs->schur_explicit) {
9345     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9346     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9347     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9348   } else {
9349     Mat       change        = NULL;
9350     Vec       scaling       = NULL;
9351     IS        change_primal = NULL, iP;
9352     PetscInt  benign_n;
9353     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9354     PetscBool need_change       = PETSC_FALSE;
9355     PetscBool discrete_harmonic = PETSC_FALSE;
9356 
9357     if (!pcbddc->use_vertices && reuse_solvers) {
9358       PetscInt n_vertices;
9359 
9360       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9361       reuse_solvers = (PetscBool)!n_vertices;
9362     }
9363     if (!pcbddc->benign_change_explicit) {
9364       benign_n = pcbddc->benign_n;
9365     } else {
9366       benign_n = 0;
9367     }
9368     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9369        We need a global reduction to avoid possible deadlocks.
9370        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9371     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9372       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9373       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9374       need_change = (PetscBool)(!need_change);
9375     }
9376     /* If the user defines additional constraints, we import them here */
9377     if (need_change) {
9378       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9379       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9380     }
9381     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9382 
9383     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9384     if (iP) {
9385       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9386       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9387       PetscOptionsEnd();
9388     }
9389     if (discrete_harmonic) {
9390       Mat A;
9391       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9392       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9393       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9394       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9395                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9396       PetscCall(MatDestroy(&A));
9397     } else {
9398       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9399                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9400     }
9401     PetscCall(MatDestroy(&change));
9402     PetscCall(ISDestroy(&change_primal));
9403   }
9404   PetscCall(MatDestroy(&S_j));
9405 
9406   /* free adjacency */
9407   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9408   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9409   PetscFunctionReturn(PETSC_SUCCESS);
9410 }
9411 
9412 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9413 {
9414   PC_IS      *pcis   = (PC_IS *)pc->data;
9415   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9416   PCBDDCGraph graph;
9417 
9418   PetscFunctionBegin;
9419   /* attach interface graph for determining subsets */
9420   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9421     IS       verticesIS, verticescomm;
9422     PetscInt vsize, *idxs;
9423 
9424     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9425     PetscCall(ISGetSize(verticesIS, &vsize));
9426     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9427     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9428     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9429     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9430     PetscCall(PCBDDCGraphCreate(&graph));
9431     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9432     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9433     PetscCall(ISDestroy(&verticescomm));
9434     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9435   } else {
9436     graph = pcbddc->mat_graph;
9437   }
9438   /* print some info */
9439   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9440     IS       vertices;
9441     PetscInt nv, nedges, nfaces;
9442     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9443     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9444     PetscCall(ISGetSize(vertices, &nv));
9445     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9446     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9447     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9448     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9449     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9450     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9451     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9452     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9453   }
9454 
9455   /* sub_schurs init */
9456   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9457   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9458 
9459   /* free graph struct */
9460   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9461   PetscFunctionReturn(PETSC_SUCCESS);
9462 }
9463 
9464 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9465 {
9466   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9467   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9468   const PetscInt *idxs;
9469   IS              gis;
9470 
9471   PetscFunctionBegin;
9472   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9473   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9474   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9475   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9476   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9477   PetscCall(ISGetLocalSize(is, &ni));
9478   PetscCall(ISGetIndices(is, &idxs));
9479   for (PetscInt i = 0; i < ni; i++) {
9480     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9481     matis->sf_leafdata[idxs[i]] = 1;
9482   }
9483   PetscCall(ISRestoreIndices(is, &idxs));
9484   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9485   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9486   ln = 0;
9487   for (PetscInt i = 0; i < n; i++) {
9488     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9489   }
9490   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9491   PetscCall(ISView(gis, viewer));
9492   PetscCall(ISDestroy(&gis));
9493   PetscFunctionReturn(PETSC_SUCCESS);
9494 }
9495 
9496 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9497 {
9498   PetscInt    header[11];
9499   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9500   PetscViewer viewer;
9501   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9502 
9503   PetscFunctionBegin;
9504   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9505   if (load) {
9506     IS  is;
9507     Mat A;
9508 
9509     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9510     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9511     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9512     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9513     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9514     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9515     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9516     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9517     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9518     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9519     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9520     if (header[0]) {
9521       PetscCall(ISCreate(comm, &is));
9522       PetscCall(ISLoad(is, viewer));
9523       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9524       PetscCall(ISDestroy(&is));
9525     }
9526     if (header[1]) {
9527       PetscCall(ISCreate(comm, &is));
9528       PetscCall(ISLoad(is, viewer));
9529       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9530       PetscCall(ISDestroy(&is));
9531     }
9532     if (header[2]) {
9533       IS *isarray;
9534 
9535       PetscCall(PetscMalloc1(header[2], &isarray));
9536       for (PetscInt i = 0; i < header[2]; i++) {
9537         PetscCall(ISCreate(comm, &isarray[i]));
9538         PetscCall(ISLoad(isarray[i], viewer));
9539       }
9540       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9541       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9542       PetscCall(PetscFree(isarray));
9543     }
9544     if (header[3]) {
9545       PetscCall(ISCreate(comm, &is));
9546       PetscCall(ISLoad(is, viewer));
9547       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9548       PetscCall(ISDestroy(&is));
9549     }
9550     if (header[4]) {
9551       PetscCall(MatCreate(comm, &A));
9552       PetscCall(MatSetType(A, MATAIJ));
9553       PetscCall(MatLoad(A, viewer));
9554       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9555       PetscCall(MatDestroy(&A));
9556     }
9557     if (header[9]) {
9558       PetscCall(MatCreate(comm, &A));
9559       PetscCall(MatSetType(A, MATIS));
9560       PetscCall(MatLoad(A, viewer));
9561       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9562       PetscCall(MatDestroy(&A));
9563     }
9564   } else {
9565     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9566     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9567     header[2]  = pcbddc->n_ISForDofsLocal;
9568     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9569     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9570     header[5]  = pcbddc->nedorder;
9571     header[6]  = pcbddc->nedfield;
9572     header[7]  = (PetscInt)pcbddc->nedglobal;
9573     header[8]  = (PetscInt)pcbddc->conforming;
9574     header[9]  = (PetscInt)!!pcbddc->divudotp;
9575     header[10] = (PetscInt)pcbddc->divudotp_trans;
9576     if (header[4]) header[3] = 0;
9577 
9578     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9579     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9580     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9581     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9582     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9583     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9584     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9585   }
9586   PetscCall(PetscViewerDestroy(&viewer));
9587   PetscFunctionReturn(PETSC_SUCCESS);
9588 }
9589 
9590 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9591 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9592 {
9593   Mat         At;
9594   IS          rows;
9595   PetscInt    rst, ren;
9596   PetscLayout rmap;
9597 
9598   PetscFunctionBegin;
9599   rst = ren = 0;
9600   if (ccomm != MPI_COMM_NULL) {
9601     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9602     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9603     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9604     PetscCall(PetscLayoutSetUp(rmap));
9605     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9606   }
9607   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9608   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9609   PetscCall(ISDestroy(&rows));
9610 
9611   if (ccomm != MPI_COMM_NULL) {
9612     Mat_MPIAIJ *a, *b;
9613     IS          from, to;
9614     Vec         gvec;
9615     PetscInt    lsize;
9616 
9617     PetscCall(MatCreate(ccomm, B));
9618     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9619     PetscCall(MatSetType(*B, MATAIJ));
9620     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9621     PetscCall(PetscLayoutSetUp((*B)->cmap));
9622     a = (Mat_MPIAIJ *)At->data;
9623     b = (Mat_MPIAIJ *)(*B)->data;
9624     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9625     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9626     PetscCall(PetscObjectReference((PetscObject)a->A));
9627     PetscCall(PetscObjectReference((PetscObject)a->B));
9628     b->A = a->A;
9629     b->B = a->B;
9630 
9631     b->donotstash   = a->donotstash;
9632     b->roworiented  = a->roworiented;
9633     b->rowindices   = NULL;
9634     b->rowvalues    = NULL;
9635     b->getrowactive = PETSC_FALSE;
9636 
9637     (*B)->rmap         = rmap;
9638     (*B)->factortype   = A->factortype;
9639     (*B)->assembled    = PETSC_TRUE;
9640     (*B)->insertmode   = NOT_SET_VALUES;
9641     (*B)->preallocated = PETSC_TRUE;
9642 
9643     if (a->colmap) {
9644 #if defined(PETSC_USE_CTABLE)
9645       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9646 #else
9647       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9648       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9649 #endif
9650     } else b->colmap = NULL;
9651     if (a->garray) {
9652       PetscInt len;
9653       len = a->B->cmap->n;
9654       PetscCall(PetscMalloc1(len + 1, &b->garray));
9655       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9656     } else b->garray = NULL;
9657 
9658     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9659     b->lvec = a->lvec;
9660 
9661     /* cannot use VecScatterCopy */
9662     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9663     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9664     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9665     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9666     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9667     PetscCall(ISDestroy(&from));
9668     PetscCall(ISDestroy(&to));
9669     PetscCall(VecDestroy(&gvec));
9670   }
9671   PetscCall(MatDestroy(&At));
9672   PetscFunctionReturn(PETSC_SUCCESS);
9673 }
9674 
9675 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9676 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9677 {
9678   PetscBool isaij;
9679   MPI_Comm  comm;
9680 
9681   PetscFunctionBegin;
9682   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9683   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9684   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9685   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9686   if (isaij) { /* SeqAIJ supports repeated rows */
9687     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9688   } else {
9689     Mat                A_loc;
9690     Mat_SeqAIJ        *da;
9691     PetscSF            sf;
9692     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9693     PetscScalar       *daa;
9694     const PetscInt    *idxs;
9695     const PetscSFNode *iremotes;
9696     PetscSFNode       *remotes;
9697 
9698     /* SF for incoming rows */
9699     PetscCall(PetscSFCreate(comm, &sf));
9700     PetscCall(ISGetLocalSize(rows, &ni));
9701     PetscCall(ISGetIndices(rows, &idxs));
9702     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9703     PetscCall(ISRestoreIndices(rows, &idxs));
9704 
9705     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9706     da = (Mat_SeqAIJ *)A_loc->data;
9707     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9708     for (PetscInt i = 0; i < m; i++) {
9709       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9710       rdata[2 * i + 1] = da->i[i];
9711     }
9712     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9713     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9714     PetscCall(PetscMalloc1(ni + 1, &di));
9715     di[0] = 0;
9716     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9717     PetscCall(PetscMalloc1(di[ni], &dj));
9718     PetscCall(PetscMalloc1(di[ni], &daa));
9719     PetscCall(PetscMalloc1(di[ni], &remotes));
9720 
9721     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9722 
9723     /* SF graph for nonzeros */
9724     c = 0;
9725     for (PetscInt i = 0; i < ni; i++) {
9726       const PetscInt rank  = iremotes[i].rank;
9727       const PetscInt rsize = ldata[2 * i];
9728       for (PetscInt j = 0; j < rsize; j++) {
9729         remotes[c].rank  = rank;
9730         remotes[c].index = ldata[2 * i + 1] + j;
9731         c++;
9732       }
9733     }
9734     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9735     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9736     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9737     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9738     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9739     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9740 
9741     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9742     PetscCall(MatDestroy(&A_loc));
9743     PetscCall(PetscSFDestroy(&sf));
9744     PetscCall(PetscFree(di));
9745     PetscCall(PetscFree(dj));
9746     PetscCall(PetscFree(daa));
9747     PetscCall(PetscFree(remotes));
9748     PetscCall(PetscFree2(ldata, rdata));
9749   }
9750   PetscFunctionReturn(PETSC_SUCCESS);
9751 }
9752