xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 #include <petscdmplex.h>
6 #include <petscblaslapack.h>
7 #include <petsc/private/sfimpl.h>
8 #include <petsc/private/dmpleximpl.h>
9 #include <petscdmda.h>
10 
11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
12 
13 /* if range is true,  it returns B s.t. span{B} = range(A)
14    if range is false, it returns B s.t. range(B) _|_ range(A) */
15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16 {
17   PetscScalar    *uwork,*data,*U, ds = 0.;
18   PetscReal      *sing;
19   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
20   PetscInt       ulw,i,nr,nc,n;
21 #if defined(PETSC_USE_COMPLEX)
22   PetscReal      *rwork2;
23 #endif
24 
25   PetscFunctionBegin;
26   CHKERRQ(MatGetSize(A,&nr,&nc));
27   if (!nr || !nc) PetscFunctionReturn(0);
28 
29   /* workspace */
30   if (!work) {
31     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
32     CHKERRQ(PetscMalloc1(ulw,&uwork));
33   } else {
34     ulw   = lw;
35     uwork = work;
36   }
37   n = PetscMin(nr,nc);
38   if (!rwork) {
39     CHKERRQ(PetscMalloc1(n,&sing));
40   } else {
41     sing = rwork;
42   }
43 
44   /* SVD */
45   CHKERRQ(PetscMalloc1(nr*nr,&U));
46   CHKERRQ(PetscBLASIntCast(nr,&bM));
47   CHKERRQ(PetscBLASIntCast(nc,&bN));
48   CHKERRQ(PetscBLASIntCast(ulw,&lwork));
49   CHKERRQ(MatDenseGetArray(A,&data));
50   CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
51 #if !defined(PETSC_USE_COMPLEX)
52   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
53 #else
54   CHKERRQ(PetscMalloc1(5*n,&rwork2));
55   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr));
56   CHKERRQ(PetscFree(rwork2));
57 #endif
58   CHKERRQ(PetscFPTrapPop());
59   PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
60   CHKERRQ(MatDenseRestoreArray(A,&data));
61   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
62   if (!rwork) {
63     CHKERRQ(PetscFree(sing));
64   }
65   if (!work) {
66     CHKERRQ(PetscFree(uwork));
67   }
68   /* create B */
69   if (!range) {
70     CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B));
71     CHKERRQ(MatDenseGetArray(*B,&data));
72     CHKERRQ(PetscArraycpy(data,U+nr*i,(nr-i)*nr));
73   } else {
74     CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B));
75     CHKERRQ(MatDenseGetArray(*B,&data));
76     CHKERRQ(PetscArraycpy(data,U,i*nr));
77   }
78   CHKERRQ(MatDenseRestoreArray(*B,&data));
79   CHKERRQ(PetscFree(U));
80   PetscFunctionReturn(0);
81 }
82 
83 /* TODO REMOVE */
84 #if defined(PRINT_GDET)
85 static int inc = 0;
86 static int lev = 0;
87 #endif
88 
89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
90 {
91   Mat            GE,GEd;
92   PetscInt       rsize,csize,esize;
93   PetscScalar    *ptr;
94 
95   PetscFunctionBegin;
96   CHKERRQ(ISGetSize(edge,&esize));
97   if (!esize) PetscFunctionReturn(0);
98   CHKERRQ(ISGetSize(extrow,&rsize));
99   CHKERRQ(ISGetSize(extcol,&csize));
100 
101   /* gradients */
102   ptr  = work + 5*esize;
103   CHKERRQ(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE));
104   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins));
105   CHKERRQ(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins));
106   CHKERRQ(MatDestroy(&GE));
107 
108   /* constants */
109   ptr += rsize*csize;
110   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd));
111   CHKERRQ(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE));
112   CHKERRQ(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd));
113   CHKERRQ(MatDestroy(&GE));
114   CHKERRQ(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins));
115   CHKERRQ(MatDestroy(&GEd));
116 
117   if (corners) {
118     Mat               GEc;
119     const PetscScalar *vals;
120     PetscScalar       v;
121 
122     CHKERRQ(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc));
123     CHKERRQ(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd));
124     CHKERRQ(MatDenseGetArrayRead(GEd,&vals));
125     /* v    = PetscAbsScalar(vals[0]) */;
126     v    = 1.;
127     cvals[0] = vals[0]/v;
128     cvals[1] = vals[1]/v;
129     CHKERRQ(MatDenseRestoreArrayRead(GEd,&vals));
130     CHKERRQ(MatScale(*GKins,1./v));
131 #if defined(PRINT_GDET)
132     {
133       PetscViewer viewer;
134       char filename[256];
135       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
136       CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
137       CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
138       CHKERRQ(PetscObjectSetName((PetscObject)GEc,"GEc"));
139       CHKERRQ(MatView(GEc,viewer));
140       CHKERRQ(PetscObjectSetName((PetscObject)(*GKins),"GK"));
141       CHKERRQ(MatView(*GKins,viewer));
142       CHKERRQ(PetscObjectSetName((PetscObject)GEd,"Gproj"));
143       CHKERRQ(MatView(GEd,viewer));
144       CHKERRQ(PetscViewerDestroy(&viewer));
145     }
146 #endif
147     CHKERRQ(MatDestroy(&GEd));
148     CHKERRQ(MatDestroy(&GEc));
149   }
150 
151   PetscFunctionReturn(0);
152 }
153 
154 PetscErrorCode PCBDDCNedelecSupport(PC pc)
155 {
156   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
157   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
158   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
159   Vec                    tvec;
160   PetscSF                sfv;
161   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
162   MPI_Comm               comm;
163   IS                     lned,primals,allprimals,nedfieldlocal;
164   IS                     *eedges,*extrows,*extcols,*alleedges;
165   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
166   PetscScalar            *vals,*work;
167   PetscReal              *rwork;
168   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
169   PetscInt               ne,nv,Lv,order,n,field;
170   PetscInt               n_neigh,*neigh,*n_shared,**shared;
171   PetscInt               i,j,extmem,cum,maxsize,nee;
172   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
173   PetscInt               *sfvleaves,*sfvroots;
174   PetscInt               *corners,*cedges;
175   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
176   PetscInt               *emarks;
177   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
178   PetscErrorCode         ierr;
179 
180   PetscFunctionBegin;
181   /* If the discrete gradient is defined for a subset of dofs and global is true,
182      it assumes G is given in global ordering for all the dofs.
183      Otherwise, the ordering is global for the Nedelec field */
184   order      = pcbddc->nedorder;
185   conforming = pcbddc->conforming;
186   field      = pcbddc->nedfield;
187   global     = pcbddc->nedglobal;
188   setprimal  = PETSC_FALSE;
189   print      = PETSC_FALSE;
190   singular   = PETSC_FALSE;
191 
192   /* Command line customization */
193   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr);
194   CHKERRQ(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL));
195   CHKERRQ(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL));
196   CHKERRQ(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL));
197   /* print debug info TODO: to be removed */
198   CHKERRQ(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL));
199   ierr = PetscOptionsEnd();CHKERRQ(ierr);
200 
201   /* Return if there are no edges in the decomposition and the problem is not singular */
202   CHKERRQ(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL));
203   CHKERRQ(ISLocalToGlobalMappingGetSize(al2g,&n));
204   CHKERRQ(PetscObjectGetComm((PetscObject)pc,&comm));
205   if (!singular) {
206     CHKERRQ(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals));
207     lrc[0] = PETSC_FALSE;
208     for (i=0;i<n;i++) {
209       if (PetscRealPart(vals[i]) > 2.) {
210         lrc[0] = PETSC_TRUE;
211         break;
212       }
213     }
214     CHKERRQ(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals));
215     CHKERRMPI(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm));
216     if (!lrc[1]) PetscFunctionReturn(0);
217   }
218 
219   /* Get Nedelec field */
220   PetscCheckFalse(pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
221   if (pcbddc->n_ISForDofsLocal && field >= 0) {
222     CHKERRQ(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
223     nedfieldlocal = pcbddc->ISForDofsLocal[field];
224     CHKERRQ(ISGetLocalSize(nedfieldlocal,&ne));
225   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
226     ne            = n;
227     nedfieldlocal = NULL;
228     global        = PETSC_TRUE;
229   } else if (field == PETSC_DECIDE) {
230     PetscInt rst,ren,*idx;
231 
232     CHKERRQ(PetscArrayzero(matis->sf_leafdata,n));
233     CHKERRQ(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n));
234     CHKERRQ(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren));
235     for (i=rst;i<ren;i++) {
236       PetscInt nc;
237 
238       CHKERRQ(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL));
239       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
240       CHKERRQ(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL));
241     }
242     CHKERRQ(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE));
243     CHKERRQ(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE));
244     CHKERRQ(PetscMalloc1(n,&idx));
245     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
246     CHKERRQ(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal));
247   } else {
248     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
249   }
250 
251   /* Sanity checks */
252   PetscCheckFalse(!order && !conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
253   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
254   PetscCheckFalse(order && ne%order,PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
255 
256   /* Just set primal dofs and return */
257   if (setprimal) {
258     IS       enedfieldlocal;
259     PetscInt *eidxs;
260 
261     CHKERRQ(PetscMalloc1(ne,&eidxs));
262     CHKERRQ(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals));
263     if (nedfieldlocal) {
264       CHKERRQ(ISGetIndices(nedfieldlocal,&idxs));
265       for (i=0,cum=0;i<ne;i++) {
266         if (PetscRealPart(vals[idxs[i]]) > 2.) {
267           eidxs[cum++] = idxs[i];
268         }
269       }
270       CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs));
271     } else {
272       for (i=0,cum=0;i<ne;i++) {
273         if (PetscRealPart(vals[i]) > 2.) {
274           eidxs[cum++] = i;
275         }
276       }
277     }
278     CHKERRQ(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals));
279     CHKERRQ(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal));
280     CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal));
281     CHKERRQ(PetscFree(eidxs));
282     CHKERRQ(ISDestroy(&nedfieldlocal));
283     CHKERRQ(ISDestroy(&enedfieldlocal));
284     PetscFunctionReturn(0);
285   }
286 
287   /* Compute some l2g maps */
288   if (nedfieldlocal) {
289     IS is;
290 
291     /* need to map from the local Nedelec field to local numbering */
292     CHKERRQ(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g));
293     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
294     CHKERRQ(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is));
295     CHKERRQ(ISLocalToGlobalMappingCreateIS(is,&al2g));
296     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
297     if (global) {
298       CHKERRQ(PetscObjectReference((PetscObject)al2g));
299       el2g = al2g;
300     } else {
301       IS gis;
302 
303       CHKERRQ(ISRenumber(is,NULL,NULL,&gis));
304       CHKERRQ(ISLocalToGlobalMappingCreateIS(gis,&el2g));
305       CHKERRQ(ISDestroy(&gis));
306     }
307     CHKERRQ(ISDestroy(&is));
308   } else {
309     /* restore default */
310     pcbddc->nedfield = -1;
311     /* one ref for the destruction of al2g, one for el2g */
312     CHKERRQ(PetscObjectReference((PetscObject)al2g));
313     CHKERRQ(PetscObjectReference((PetscObject)al2g));
314     el2g = al2g;
315     fl2g = NULL;
316   }
317 
318   /* Start communication to drop connections for interior edges (for cc analysis only) */
319   CHKERRQ(PetscArrayzero(matis->sf_leafdata,n));
320   CHKERRQ(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n));
321   if (nedfieldlocal) {
322     CHKERRQ(ISGetIndices(nedfieldlocal,&idxs));
323     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
324     CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs));
325   } else {
326     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
327   }
328   CHKERRQ(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM));
329   CHKERRQ(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM));
330 
331   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
332     CHKERRQ(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G));
333     CHKERRQ(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
334     if (global) {
335       PetscInt rst;
336 
337       CHKERRQ(MatGetOwnershipRange(G,&rst,NULL));
338       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
339         if (matis->sf_rootdata[i] < 2) {
340           matis->sf_rootdata[cum++] = i + rst;
341         }
342       }
343       CHKERRQ(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE));
344       CHKERRQ(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL));
345     } else {
346       PetscInt *tbz;
347 
348       CHKERRQ(PetscMalloc1(ne,&tbz));
349       CHKERRQ(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE));
350       CHKERRQ(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE));
351       CHKERRQ(ISGetIndices(nedfieldlocal,&idxs));
352       for (i=0,cum=0;i<ne;i++)
353         if (matis->sf_leafdata[idxs[i]] == 1)
354           tbz[cum++] = i;
355       CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs));
356       CHKERRQ(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz));
357       CHKERRQ(MatZeroRows(G,cum,tbz,0.,NULL,NULL));
358       CHKERRQ(PetscFree(tbz));
359     }
360   } else { /* we need the entire G to infer the nullspace */
361     CHKERRQ(PetscObjectReference((PetscObject)pcbddc->discretegradient));
362     G    = pcbddc->discretegradient;
363   }
364 
365   /* Extract subdomain relevant rows of G */
366   CHKERRQ(ISLocalToGlobalMappingGetIndices(el2g,&idxs));
367   CHKERRQ(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned));
368   CHKERRQ(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall));
369   CHKERRQ(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs));
370   CHKERRQ(ISDestroy(&lned));
371   CHKERRQ(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis));
372   CHKERRQ(MatDestroy(&lGall));
373   CHKERRQ(MatISGetLocalMat(lGis,&lG));
374 
375   /* SF for nodal dofs communications */
376   CHKERRQ(MatGetLocalSize(G,NULL,&Lv));
377   CHKERRQ(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g));
378   CHKERRQ(PetscObjectReference((PetscObject)vl2g));
379   CHKERRQ(ISLocalToGlobalMappingGetSize(vl2g,&nv));
380   CHKERRQ(PetscSFCreate(comm,&sfv));
381   CHKERRQ(ISLocalToGlobalMappingGetIndices(vl2g,&idxs));
382   CHKERRQ(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs));
383   CHKERRQ(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs));
384   i    = singular ? 2 : 1;
385   CHKERRQ(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots));
386 
387   /* Destroy temporary G created in MATIS format and modified G */
388   CHKERRQ(PetscObjectReference((PetscObject)lG));
389   CHKERRQ(MatDestroy(&lGis));
390   CHKERRQ(MatDestroy(&G));
391 
392   if (print) {
393     CHKERRQ(PetscObjectSetName((PetscObject)lG,"initial_lG"));
394     CHKERRQ(MatView(lG,NULL));
395   }
396 
397   /* Save lG for values insertion in change of basis */
398   CHKERRQ(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit));
399 
400   /* Analyze the edge-nodes connections (duplicate lG) */
401   CHKERRQ(MatDuplicate(lG,MAT_COPY_VALUES,&lGe));
402   CHKERRQ(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
403   CHKERRQ(PetscBTCreate(nv,&btv));
404   CHKERRQ(PetscBTCreate(ne,&bte));
405   CHKERRQ(PetscBTCreate(ne,&btb));
406   CHKERRQ(PetscBTCreate(ne,&btbd));
407   CHKERRQ(PetscBTCreate(nv,&btvcand));
408   /* need to import the boundary specification to ensure the
409      proper detection of coarse edges' endpoints */
410   if (pcbddc->DirichletBoundariesLocal) {
411     IS is;
412 
413     if (fl2g) {
414       CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is));
415     } else {
416       is = pcbddc->DirichletBoundariesLocal;
417     }
418     CHKERRQ(ISGetLocalSize(is,&cum));
419     CHKERRQ(ISGetIndices(is,&idxs));
420     for (i=0;i<cum;i++) {
421       if (idxs[i] >= 0) {
422         CHKERRQ(PetscBTSet(btb,idxs[i]));
423         CHKERRQ(PetscBTSet(btbd,idxs[i]));
424       }
425     }
426     CHKERRQ(ISRestoreIndices(is,&idxs));
427     if (fl2g) {
428       CHKERRQ(ISDestroy(&is));
429     }
430   }
431   if (pcbddc->NeumannBoundariesLocal) {
432     IS is;
433 
434     if (fl2g) {
435       CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is));
436     } else {
437       is = pcbddc->NeumannBoundariesLocal;
438     }
439     CHKERRQ(ISGetLocalSize(is,&cum));
440     CHKERRQ(ISGetIndices(is,&idxs));
441     for (i=0;i<cum;i++) {
442       if (idxs[i] >= 0) {
443         CHKERRQ(PetscBTSet(btb,idxs[i]));
444       }
445     }
446     CHKERRQ(ISRestoreIndices(is,&idxs));
447     if (fl2g) {
448       CHKERRQ(ISDestroy(&is));
449     }
450   }
451 
452   /* Count neighs per dof */
453   CHKERRQ(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs));
454   CHKERRQ(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs));
455 
456   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
457      for proper detection of coarse edges' endpoints */
458   CHKERRQ(PetscBTCreate(ne,&btee));
459   for (i=0;i<ne;i++) {
460     if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
461       CHKERRQ(PetscBTSet(btee,i));
462     }
463   }
464   CHKERRQ(PetscMalloc1(ne,&marks));
465   if (!conforming) {
466     CHKERRQ(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt));
467     CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
468   }
469   CHKERRQ(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
470   CHKERRQ(MatSeqAIJGetArray(lGe,&vals));
471   cum  = 0;
472   for (i=0;i<ne;i++) {
473     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
474     if (!PetscBTLookup(btee,i)) {
475       marks[cum++] = i;
476       continue;
477     }
478     /* set badly connected edge dofs as primal */
479     if (!conforming) {
480       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
481         marks[cum++] = i;
482         CHKERRQ(PetscBTSet(bte,i));
483         for (j=ii[i];j<ii[i+1];j++) {
484           CHKERRQ(PetscBTSet(btv,jj[j]));
485         }
486       } else {
487         /* every edge dofs should be connected trough a certain number of nodal dofs
488            to other edge dofs belonging to coarse edges
489            - at most 2 endpoints
490            - order-1 interior nodal dofs
491            - no undefined nodal dofs (nconn < order)
492         */
493         PetscInt ends = 0,ints = 0, undef = 0;
494         for (j=ii[i];j<ii[i+1];j++) {
495           PetscInt v = jj[j],k;
496           PetscInt nconn = iit[v+1]-iit[v];
497           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
498           if (nconn > order) ends++;
499           else if (nconn == order) ints++;
500           else undef++;
501         }
502         if (undef || ends > 2 || ints != order -1) {
503           marks[cum++] = i;
504           CHKERRQ(PetscBTSet(bte,i));
505           for (j=ii[i];j<ii[i+1];j++) {
506             CHKERRQ(PetscBTSet(btv,jj[j]));
507           }
508         }
509       }
510     }
511     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
512     if (!order && ii[i+1] != ii[i]) {
513       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
514       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
515     }
516   }
517   CHKERRQ(PetscBTDestroy(&btee));
518   CHKERRQ(MatSeqAIJRestoreArray(lGe,&vals));
519   CHKERRQ(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
520   if (!conforming) {
521     CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
522     CHKERRQ(MatDestroy(&lGt));
523   }
524   CHKERRQ(MatZeroRows(lGe,cum,marks,0.,NULL,NULL));
525 
526   /* identify splitpoints and corner candidates */
527   CHKERRQ(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt));
528   if (print) {
529     CHKERRQ(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG"));
530     CHKERRQ(MatView(lGe,NULL));
531     CHKERRQ(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt"));
532     CHKERRQ(MatView(lGt,NULL));
533   }
534   CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
535   CHKERRQ(MatSeqAIJGetArray(lGt,&vals));
536   for (i=0;i<nv;i++) {
537     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
538     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
539     if (!order) { /* variable order */
540       PetscReal vorder = 0.;
541 
542       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
543       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
544       PetscCheckFalse(vorder-test > PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
545       ord  = 1;
546     }
547     PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord);
548     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
549       if (PetscBTLookup(btbd,jj[j])) {
550         bdir = PETSC_TRUE;
551         break;
552       }
553       if (vc != ecount[jj[j]]) {
554         sneighs = PETSC_FALSE;
555       } else {
556         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
557         for (k=0;k<vc;k++) {
558           if (vn[k] != en[k]) {
559             sneighs = PETSC_FALSE;
560             break;
561           }
562         }
563       }
564     }
565     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
566       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
567       CHKERRQ(PetscBTSet(btv,i));
568     } else if (test == ord) {
569       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
570         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
571         CHKERRQ(PetscBTSet(btv,i));
572       } else {
573         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
574         CHKERRQ(PetscBTSet(btvcand,i));
575       }
576     }
577   }
578   CHKERRQ(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs));
579   CHKERRQ(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs));
580   CHKERRQ(PetscBTDestroy(&btbd));
581 
582   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
583   if (order != 1) {
584     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
585     CHKERRQ(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
586     for (i=0;i<nv;i++) {
587       if (PetscBTLookup(btvcand,i)) {
588         PetscBool found = PETSC_FALSE;
589         for (j=ii[i];j<ii[i+1] && !found;j++) {
590           PetscInt k,e = jj[j];
591           if (PetscBTLookup(bte,e)) continue;
592           for (k=iit[e];k<iit[e+1];k++) {
593             PetscInt v = jjt[k];
594             if (v != i && PetscBTLookup(btvcand,v)) {
595               found = PETSC_TRUE;
596               break;
597             }
598           }
599         }
600         if (!found) {
601           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D CLEARED\n",i);
602           CHKERRQ(PetscBTClear(btvcand,i));
603         } else {
604           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D ACCEPTED\n",i);
605         }
606       }
607     }
608     CHKERRQ(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
609   }
610   CHKERRQ(MatSeqAIJRestoreArray(lGt,&vals));
611   CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
612   CHKERRQ(MatDestroy(&lGe));
613 
614   /* Get the local G^T explicitly */
615   CHKERRQ(MatDestroy(&lGt));
616   CHKERRQ(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt));
617   CHKERRQ(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
618 
619   /* Mark interior nodal dofs */
620   CHKERRQ(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared));
621   CHKERRQ(PetscBTCreate(nv,&btvi));
622   for (i=1;i<n_neigh;i++) {
623     for (j=0;j<n_shared[i];j++) {
624       CHKERRQ(PetscBTSet(btvi,shared[i][j]));
625     }
626   }
627   CHKERRQ(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared));
628 
629   /* communicate corners and splitpoints */
630   CHKERRQ(PetscMalloc1(nv,&vmarks));
631   CHKERRQ(PetscArrayzero(sfvleaves,nv));
632   CHKERRQ(PetscArrayzero(sfvroots,Lv));
633   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
634 
635   if (print) {
636     IS tbz;
637 
638     cum = 0;
639     for (i=0;i<nv;i++)
640       if (sfvleaves[i])
641         vmarks[cum++] = i;
642 
643     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz));
644     CHKERRQ(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local"));
645     CHKERRQ(ISView(tbz,NULL));
646     CHKERRQ(ISDestroy(&tbz));
647   }
648 
649   CHKERRQ(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM));
650   CHKERRQ(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM));
651   CHKERRQ(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE));
652   CHKERRQ(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE));
653 
654   /* Zero rows of lGt corresponding to identified corners
655      and interior nodal dofs */
656   cum = 0;
657   for (i=0;i<nv;i++) {
658     if (sfvleaves[i]) {
659       vmarks[cum++] = i;
660       CHKERRQ(PetscBTSet(btv,i));
661     }
662     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
663   }
664   CHKERRQ(PetscBTDestroy(&btvi));
665   if (print) {
666     IS tbz;
667 
668     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz));
669     CHKERRQ(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior"));
670     CHKERRQ(ISView(tbz,NULL));
671     CHKERRQ(ISDestroy(&tbz));
672   }
673   CHKERRQ(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL));
674   CHKERRQ(PetscFree(vmarks));
675   CHKERRQ(PetscSFDestroy(&sfv));
676   CHKERRQ(PetscFree2(sfvleaves,sfvroots));
677 
678   /* Recompute G */
679   CHKERRQ(MatDestroy(&lG));
680   CHKERRQ(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG));
681   if (print) {
682     CHKERRQ(PetscObjectSetName((PetscObject)lG,"used_lG"));
683     CHKERRQ(MatView(lG,NULL));
684     CHKERRQ(PetscObjectSetName((PetscObject)lGt,"used_lGt"));
685     CHKERRQ(MatView(lGt,NULL));
686   }
687 
688   /* Get primal dofs (if any) */
689   cum = 0;
690   for (i=0;i<ne;i++) {
691     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
692   }
693   if (fl2g) {
694     CHKERRQ(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks));
695   }
696   CHKERRQ(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals));
697   if (print) {
698     CHKERRQ(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs"));
699     CHKERRQ(ISView(primals,NULL));
700   }
701   CHKERRQ(PetscBTDestroy(&bte));
702   /* TODO: what if the user passed in some of them ?  */
703   CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,primals));
704   CHKERRQ(ISDestroy(&primals));
705 
706   /* Compute edge connectivity */
707   CHKERRQ(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_"));
708 
709   /* Symbolic conn = lG*lGt */
710   CHKERRQ(MatProductCreate(lG,lGt,NULL,&conn));
711   CHKERRQ(MatProductSetType(conn,MATPRODUCT_AB));
712   CHKERRQ(MatProductSetAlgorithm(conn,"default"));
713   CHKERRQ(MatProductSetFill(conn,PETSC_DEFAULT));
714   CHKERRQ(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_"));
715   CHKERRQ(MatProductSetFromOptions(conn));
716   CHKERRQ(MatProductSymbolic(conn));
717 
718   CHKERRQ(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
719   if (fl2g) {
720     PetscBT   btf;
721     PetscInt  *iia,*jja,*iiu,*jju;
722     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
723 
724     /* create CSR for all local dofs */
725     CHKERRQ(PetscMalloc1(n+1,&iia));
726     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
727       PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr != n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
728       iiu = pcbddc->mat_graph->xadj;
729       jju = pcbddc->mat_graph->adjncy;
730     } else if (pcbddc->use_local_adj) {
731       rest = PETSC_TRUE;
732       CHKERRQ(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done));
733     } else {
734       free   = PETSC_TRUE;
735       CHKERRQ(PetscMalloc2(n+1,&iiu,n,&jju));
736       iiu[0] = 0;
737       for (i=0;i<n;i++) {
738         iiu[i+1] = i+1;
739         jju[i]   = -1;
740       }
741     }
742 
743     /* import sizes of CSR */
744     iia[0] = 0;
745     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
746 
747     /* overwrite entries corresponding to the Nedelec field */
748     CHKERRQ(PetscBTCreate(n,&btf));
749     CHKERRQ(ISGetIndices(nedfieldlocal,&idxs));
750     for (i=0;i<ne;i++) {
751       CHKERRQ(PetscBTSet(btf,idxs[i]));
752       iia[idxs[i]+1] = ii[i+1]-ii[i];
753     }
754 
755     /* iia in CSR */
756     for (i=0;i<n;i++) iia[i+1] += iia[i];
757 
758     /* jja in CSR */
759     CHKERRQ(PetscMalloc1(iia[n],&jja));
760     for (i=0;i<n;i++)
761       if (!PetscBTLookup(btf,i))
762         for (j=0;j<iiu[i+1]-iiu[i];j++)
763           jja[iia[i]+j] = jju[iiu[i]+j];
764 
765     /* map edge dofs connectivity */
766     if (jj) {
767       CHKERRQ(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj));
768       for (i=0;i<ne;i++) {
769         PetscInt e = idxs[i];
770         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
771       }
772     }
773     CHKERRQ(ISRestoreIndices(nedfieldlocal,&idxs));
774     CHKERRQ(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER));
775     if (rest) {
776       CHKERRQ(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done));
777     }
778     if (free) {
779       CHKERRQ(PetscFree2(iiu,jju));
780     }
781     CHKERRQ(PetscBTDestroy(&btf));
782   } else {
783     CHKERRQ(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER));
784   }
785 
786   /* Analyze interface for edge dofs */
787   CHKERRQ(PCBDDCAnalyzeInterface(pc));
788   pcbddc->mat_graph->twodim = PETSC_FALSE;
789 
790   /* Get coarse edges in the edge space */
791   CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals));
792   CHKERRQ(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
793 
794   if (fl2g) {
795     CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals));
796     CHKERRQ(PetscMalloc1(nee,&eedges));
797     for (i=0;i<nee;i++) {
798       CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]));
799     }
800   } else {
801     eedges  = alleedges;
802     primals = allprimals;
803   }
804 
805   /* Mark fine edge dofs with their coarse edge id */
806   CHKERRQ(PetscArrayzero(marks,ne));
807   CHKERRQ(ISGetLocalSize(primals,&cum));
808   CHKERRQ(ISGetIndices(primals,&idxs));
809   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
810   CHKERRQ(ISRestoreIndices(primals,&idxs));
811   if (print) {
812     CHKERRQ(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs"));
813     CHKERRQ(ISView(primals,NULL));
814   }
815 
816   maxsize = 0;
817   for (i=0;i<nee;i++) {
818     PetscInt size,mark = i+1;
819 
820     CHKERRQ(ISGetLocalSize(eedges[i],&size));
821     CHKERRQ(ISGetIndices(eedges[i],&idxs));
822     for (j=0;j<size;j++) marks[idxs[j]] = mark;
823     CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
824     maxsize = PetscMax(maxsize,size);
825   }
826 
827   /* Find coarse edge endpoints */
828   CHKERRQ(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
829   CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
830   for (i=0;i<nee;i++) {
831     PetscInt mark = i+1,size;
832 
833     CHKERRQ(ISGetLocalSize(eedges[i],&size));
834     if (!size && nedfieldlocal) continue;
835     PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
836     CHKERRQ(ISGetIndices(eedges[i],&idxs));
837     if (print) {
838       CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i));
839       CHKERRQ(ISView(eedges[i],NULL));
840     }
841     for (j=0;j<size;j++) {
842       PetscInt k, ee = idxs[j];
843       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %D\n",ee);
844       for (k=ii[ee];k<ii[ee+1];k++) {
845         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %D\n",jj[k]);
846         if (PetscBTLookup(btv,jj[k])) {
847           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %D\n",jj[k]);
848         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
849           PetscInt  k2;
850           PetscBool corner = PETSC_FALSE;
851           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
852             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
853             /* it's a corner if either is connected with an edge dof belonging to a different cc or
854                if the edge dof lie on the natural part of the boundary */
855             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
856               corner = PETSC_TRUE;
857               break;
858             }
859           }
860           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
861             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %D\n",jj[k]);
862             CHKERRQ(PetscBTSet(btv,jj[k]));
863           } else {
864             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
865           }
866         }
867       }
868     }
869     CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
870   }
871   CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
872   CHKERRQ(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
873   CHKERRQ(PetscBTDestroy(&btb));
874 
875   /* Reset marked primal dofs */
876   CHKERRQ(ISGetLocalSize(primals,&cum));
877   CHKERRQ(ISGetIndices(primals,&idxs));
878   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
879   CHKERRQ(ISRestoreIndices(primals,&idxs));
880 
881   /* Now use the initial lG */
882   CHKERRQ(MatDestroy(&lG));
883   CHKERRQ(MatDestroy(&lGt));
884   lG   = lGinit;
885   CHKERRQ(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt));
886 
887   /* Compute extended cols indices */
888   CHKERRQ(PetscBTCreate(nv,&btvc));
889   CHKERRQ(PetscBTCreate(nee,&bter));
890   CHKERRQ(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
891   CHKERRQ(MatSeqAIJGetMaxRowNonzeros(lG,&i));
892   i   *= maxsize;
893   CHKERRQ(PetscCalloc1(nee,&extcols));
894   CHKERRQ(PetscMalloc2(i,&extrow,i,&gidxs));
895   eerr = PETSC_FALSE;
896   for (i=0;i<nee;i++) {
897     PetscInt size,found = 0;
898 
899     cum  = 0;
900     CHKERRQ(ISGetLocalSize(eedges[i],&size));
901     if (!size && nedfieldlocal) continue;
902     PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
903     CHKERRQ(ISGetIndices(eedges[i],&idxs));
904     CHKERRQ(PetscBTMemzero(nv,btvc));
905     for (j=0;j<size;j++) {
906       PetscInt k,ee = idxs[j];
907       for (k=ii[ee];k<ii[ee+1];k++) {
908         PetscInt vv = jj[k];
909         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
910         else if (!PetscBTLookupSet(btvc,vv)) found++;
911       }
912     }
913     CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
914     CHKERRQ(PetscSortRemoveDupsInt(&cum,extrow));
915     CHKERRQ(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs));
916     CHKERRQ(PetscSortIntWithArray(cum,gidxs,extrow));
917     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]));
918     /* it may happen that endpoints are not defined at this point
919        if it is the case, mark this edge for a second pass */
920     if (cum != size -1 || found != 2) {
921       CHKERRQ(PetscBTSet(bter,i));
922       if (print) {
923         CHKERRQ(PetscObjectSetName((PetscObject)eedges[i],"error_edge"));
924         CHKERRQ(ISView(eedges[i],NULL));
925         CHKERRQ(PetscObjectSetName((PetscObject)extcols[i],"error_extcol"));
926         CHKERRQ(ISView(extcols[i],NULL));
927       }
928       eerr = PETSC_TRUE;
929     }
930   }
931   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
932   CHKERRMPI(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm));
933   if (done) {
934     PetscInt *newprimals;
935 
936     CHKERRQ(PetscMalloc1(ne,&newprimals));
937     CHKERRQ(ISGetLocalSize(primals,&cum));
938     CHKERRQ(ISGetIndices(primals,&idxs));
939     CHKERRQ(PetscArraycpy(newprimals,idxs,cum));
940     CHKERRQ(ISRestoreIndices(primals,&idxs));
941     CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
942     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
943     for (i=0;i<nee;i++) {
944       PetscBool has_candidates = PETSC_FALSE;
945       if (PetscBTLookup(bter,i)) {
946         PetscInt size,mark = i+1;
947 
948         CHKERRQ(ISGetLocalSize(eedges[i],&size));
949         CHKERRQ(ISGetIndices(eedges[i],&idxs));
950         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
951         for (j=0;j<size;j++) {
952           PetscInt k,ee = idxs[j];
953           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
954           for (k=ii[ee];k<ii[ee+1];k++) {
955             /* set all candidates located on the edge as corners */
956             if (PetscBTLookup(btvcand,jj[k])) {
957               PetscInt k2,vv = jj[k];
958               has_candidates = PETSC_TRUE;
959               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %D\n",vv);
960               CHKERRQ(PetscBTSet(btv,vv));
961               /* set all edge dofs connected to candidate as primals */
962               for (k2=iit[vv];k2<iit[vv+1];k2++) {
963                 if (marks[jjt[k2]] == mark) {
964                   PetscInt k3,ee2 = jjt[k2];
965                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %D\n",ee2);
966                   newprimals[cum++] = ee2;
967                   /* finally set the new corners */
968                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
969                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %D\n",jj[k3]);
970                     CHKERRQ(PetscBTSet(btv,jj[k3]));
971                   }
972                 }
973               }
974             } else {
975               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %D\n",jj[k]);
976             }
977           }
978         }
979         if (!has_candidates) { /* circular edge */
980           PetscInt k, ee = idxs[0],*tmarks;
981 
982           CHKERRQ(PetscCalloc1(ne,&tmarks));
983           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %D\n",i);
984           for (k=ii[ee];k<ii[ee+1];k++) {
985             PetscInt k2;
986             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %D\n",jj[k]);
987             CHKERRQ(PetscBTSet(btv,jj[k]));
988             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
989           }
990           for (j=0;j<size;j++) {
991             if (tmarks[idxs[j]] > 1) {
992               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %D\n",idxs[j]);
993               newprimals[cum++] = idxs[j];
994             }
995           }
996           CHKERRQ(PetscFree(tmarks));
997         }
998         CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
999       }
1000       CHKERRQ(ISDestroy(&extcols[i]));
1001     }
1002     CHKERRQ(PetscFree(extcols));
1003     CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done));
1004     CHKERRQ(PetscSortRemoveDupsInt(&cum,newprimals));
1005     if (fl2g) {
1006       CHKERRQ(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals));
1007       CHKERRQ(ISDestroy(&primals));
1008       for (i=0;i<nee;i++) {
1009         CHKERRQ(ISDestroy(&eedges[i]));
1010       }
1011       CHKERRQ(PetscFree(eedges));
1012     }
1013     CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals));
1014     CHKERRQ(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals));
1015     CHKERRQ(PetscFree(newprimals));
1016     CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,primals));
1017     CHKERRQ(ISDestroy(&primals));
1018     CHKERRQ(PCBDDCAnalyzeInterface(pc));
1019     pcbddc->mat_graph->twodim = PETSC_FALSE;
1020     CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals));
1021     if (fl2g) {
1022       CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals));
1023       CHKERRQ(PetscMalloc1(nee,&eedges));
1024       for (i=0;i<nee;i++) {
1025         CHKERRQ(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]));
1026       }
1027     } else {
1028       eedges  = alleedges;
1029       primals = allprimals;
1030     }
1031     CHKERRQ(PetscCalloc1(nee,&extcols));
1032 
1033     /* Mark again */
1034     CHKERRQ(PetscArrayzero(marks,ne));
1035     for (i=0;i<nee;i++) {
1036       PetscInt size,mark = i+1;
1037 
1038       CHKERRQ(ISGetLocalSize(eedges[i],&size));
1039       CHKERRQ(ISGetIndices(eedges[i],&idxs));
1040       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1041       CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
1042     }
1043     if (print) {
1044       CHKERRQ(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass"));
1045       CHKERRQ(ISView(primals,NULL));
1046     }
1047 
1048     /* Recompute extended cols */
1049     eerr = PETSC_FALSE;
1050     for (i=0;i<nee;i++) {
1051       PetscInt size;
1052 
1053       cum  = 0;
1054       CHKERRQ(ISGetLocalSize(eedges[i],&size));
1055       if (!size && nedfieldlocal) continue;
1056       PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1057       CHKERRQ(ISGetIndices(eedges[i],&idxs));
1058       for (j=0;j<size;j++) {
1059         PetscInt k,ee = idxs[j];
1060         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1061       }
1062       CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
1063       CHKERRQ(PetscSortRemoveDupsInt(&cum,extrow));
1064       CHKERRQ(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs));
1065       CHKERRQ(PetscSortIntWithArray(cum,gidxs,extrow));
1066       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]));
1067       if (cum != size -1) {
1068         if (print) {
1069           CHKERRQ(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass"));
1070           CHKERRQ(ISView(eedges[i],NULL));
1071           CHKERRQ(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass"));
1072           CHKERRQ(ISView(extcols[i],NULL));
1073         }
1074         eerr = PETSC_TRUE;
1075       }
1076     }
1077   }
1078   CHKERRQ(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1079   CHKERRQ(PetscFree2(extrow,gidxs));
1080   CHKERRQ(PetscBTDestroy(&bter));
1081   if (print) CHKERRQ(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF));
1082   /* an error should not occur at this point */
1083   PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1084 
1085   /* Check the number of endpoints */
1086   CHKERRQ(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1087   CHKERRQ(PetscMalloc1(2*nee,&corners));
1088   CHKERRQ(PetscMalloc1(nee,&cedges));
1089   for (i=0;i<nee;i++) {
1090     PetscInt size, found = 0, gc[2];
1091 
1092     /* init with defaults */
1093     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1094     CHKERRQ(ISGetLocalSize(eedges[i],&size));
1095     if (!size && nedfieldlocal) continue;
1096     PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1097     CHKERRQ(ISGetIndices(eedges[i],&idxs));
1098     CHKERRQ(PetscBTMemzero(nv,btvc));
1099     for (j=0;j<size;j++) {
1100       PetscInt k,ee = idxs[j];
1101       for (k=ii[ee];k<ii[ee+1];k++) {
1102         PetscInt vv = jj[k];
1103         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1104           PetscCheckFalse(found == 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1105           corners[i*2+found++] = vv;
1106         }
1107       }
1108     }
1109     if (found != 2) {
1110       PetscInt e;
1111       if (fl2g) {
1112         CHKERRQ(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e));
1113       } else {
1114         e = idxs[0];
1115       }
1116       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1117     }
1118 
1119     /* get primal dof index on this coarse edge */
1120     CHKERRQ(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc));
1121     if (gc[0] > gc[1]) {
1122       PetscInt swap  = corners[2*i];
1123       corners[2*i]   = corners[2*i+1];
1124       corners[2*i+1] = swap;
1125     }
1126     cedges[i] = idxs[size-1];
1127     CHKERRQ(ISRestoreIndices(eedges[i],&idxs));
1128     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1129   }
1130   CHKERRQ(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1131   CHKERRQ(PetscBTDestroy(&btvc));
1132 
1133   if (PetscDefined(USE_DEBUG)) {
1134     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1135      not interfere with neighbouring coarse edges */
1136     CHKERRQ(PetscMalloc1(nee+1,&emarks));
1137     CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1138     for (i=0;i<nv;i++) {
1139       PetscInt emax = 0,eemax = 0;
1140 
1141       if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1142       CHKERRQ(PetscArrayzero(emarks,nee+1));
1143       for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1144       for (j=1;j<nee+1;j++) {
1145         if (emax < emarks[j]) {
1146           emax = emarks[j];
1147           eemax = j;
1148         }
1149       }
1150       /* not relevant for edges */
1151       if (!eemax) continue;
1152 
1153       for (j=ii[i];j<ii[i+1];j++) {
1154         if (marks[jj[j]] && marks[jj[j]] != eemax) {
1155           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1156         }
1157       }
1158     }
1159     CHKERRQ(PetscFree(emarks));
1160     CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1161   }
1162 
1163   /* Compute extended rows indices for edge blocks of the change of basis */
1164   CHKERRQ(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1165   CHKERRQ(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem));
1166   extmem *= maxsize;
1167   CHKERRQ(PetscMalloc1(extmem*nee,&extrow));
1168   CHKERRQ(PetscMalloc1(nee,&extrows));
1169   CHKERRQ(PetscCalloc1(nee,&extrowcum));
1170   for (i=0;i<nv;i++) {
1171     PetscInt mark = 0,size,start;
1172 
1173     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1174     for (j=ii[i];j<ii[i+1];j++)
1175       if (marks[jj[j]] && !mark)
1176         mark = marks[jj[j]];
1177 
1178     /* not relevant */
1179     if (!mark) continue;
1180 
1181     /* import extended row */
1182     mark--;
1183     start = mark*extmem+extrowcum[mark];
1184     size = ii[i+1]-ii[i];
1185     PetscCheckFalse(extrowcum[mark] + size > extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1186     CHKERRQ(PetscArraycpy(extrow+start,jj+ii[i],size));
1187     extrowcum[mark] += size;
1188   }
1189   CHKERRQ(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done));
1190   CHKERRQ(MatDestroy(&lGt));
1191   CHKERRQ(PetscFree(marks));
1192 
1193   /* Compress extrows */
1194   cum  = 0;
1195   for (i=0;i<nee;i++) {
1196     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1197     CHKERRQ(PetscSortRemoveDupsInt(&size,start));
1198     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]));
1199     cum  = PetscMax(cum,size);
1200   }
1201   CHKERRQ(PetscFree(extrowcum));
1202   CHKERRQ(PetscBTDestroy(&btv));
1203   CHKERRQ(PetscBTDestroy(&btvcand));
1204 
1205   /* Workspace for lapack inner calls and VecSetValues */
1206   CHKERRQ(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork));
1207 
1208   /* Create change of basis matrix (preallocation can be improved) */
1209   CHKERRQ(MatCreate(comm,&T));
1210   ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1211                        pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr);
1212   CHKERRQ(MatSetType(T,MATAIJ));
1213   CHKERRQ(MatSeqAIJSetPreallocation(T,10,NULL));
1214   CHKERRQ(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL));
1215   CHKERRQ(MatSetLocalToGlobalMapping(T,al2g,al2g));
1216   CHKERRQ(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
1217   CHKERRQ(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE));
1218   CHKERRQ(ISLocalToGlobalMappingDestroy(&al2g));
1219 
1220   /* Defaults to identity */
1221   CHKERRQ(MatCreateVecs(pc->pmat,&tvec,NULL));
1222   CHKERRQ(VecSet(tvec,1.0));
1223   CHKERRQ(MatDiagonalSet(T,tvec,INSERT_VALUES));
1224   CHKERRQ(VecDestroy(&tvec));
1225 
1226   /* Create discrete gradient for the coarser level if needed */
1227   CHKERRQ(MatDestroy(&pcbddc->nedcG));
1228   CHKERRQ(ISDestroy(&pcbddc->nedclocal));
1229   if (pcbddc->current_level < pcbddc->max_levels) {
1230     ISLocalToGlobalMapping cel2g,cvl2g;
1231     IS                     wis,gwis;
1232     PetscInt               cnv,cne;
1233 
1234     CHKERRQ(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis));
1235     if (fl2g) {
1236       CHKERRQ(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal));
1237     } else {
1238       CHKERRQ(PetscObjectReference((PetscObject)wis));
1239       pcbddc->nedclocal = wis;
1240     }
1241     CHKERRQ(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis));
1242     CHKERRQ(ISDestroy(&wis));
1243     CHKERRQ(ISRenumber(gwis,NULL,&cne,&wis));
1244     CHKERRQ(ISLocalToGlobalMappingCreateIS(wis,&cel2g));
1245     CHKERRQ(ISDestroy(&wis));
1246     CHKERRQ(ISDestroy(&gwis));
1247 
1248     CHKERRQ(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis));
1249     CHKERRQ(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis));
1250     CHKERRQ(ISDestroy(&wis));
1251     CHKERRQ(ISRenumber(gwis,NULL,&cnv,&wis));
1252     CHKERRQ(ISLocalToGlobalMappingCreateIS(wis,&cvl2g));
1253     CHKERRQ(ISDestroy(&wis));
1254     CHKERRQ(ISDestroy(&gwis));
1255 
1256     CHKERRQ(MatCreate(comm,&pcbddc->nedcG));
1257     CHKERRQ(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv));
1258     CHKERRQ(MatSetType(pcbddc->nedcG,MATAIJ));
1259     CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL));
1260     CHKERRQ(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL));
1261     CHKERRQ(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g));
1262     CHKERRQ(ISLocalToGlobalMappingDestroy(&cel2g));
1263     CHKERRQ(ISLocalToGlobalMappingDestroy(&cvl2g));
1264   }
1265   CHKERRQ(ISLocalToGlobalMappingDestroy(&vl2g));
1266 
1267 #if defined(PRINT_GDET)
1268   inc = 0;
1269   lev = pcbddc->current_level;
1270 #endif
1271 
1272   /* Insert values in the change of basis matrix */
1273   for (i=0;i<nee;i++) {
1274     Mat         Gins = NULL, GKins = NULL;
1275     IS          cornersis = NULL;
1276     PetscScalar cvals[2];
1277 
1278     if (pcbddc->nedcG) {
1279       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis));
1280     }
1281     CHKERRQ(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork));
1282     if (Gins && GKins) {
1283       const PetscScalar *data;
1284       const PetscInt    *rows,*cols;
1285       PetscInt          nrh,nch,nrc,ncc;
1286 
1287       CHKERRQ(ISGetIndices(eedges[i],&cols));
1288       /* H1 */
1289       CHKERRQ(ISGetIndices(extrows[i],&rows));
1290       CHKERRQ(MatGetSize(Gins,&nrh,&nch));
1291       CHKERRQ(MatDenseGetArrayRead(Gins,&data));
1292       CHKERRQ(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES));
1293       CHKERRQ(MatDenseRestoreArrayRead(Gins,&data));
1294       CHKERRQ(ISRestoreIndices(extrows[i],&rows));
1295       /* complement */
1296       CHKERRQ(MatGetSize(GKins,&nrc,&ncc));
1297       PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1298       PetscCheckFalse(ncc + nch != nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1299       PetscCheckFalse(ncc != 1 && pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1300       CHKERRQ(MatDenseGetArrayRead(GKins,&data));
1301       CHKERRQ(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES));
1302       CHKERRQ(MatDenseRestoreArrayRead(GKins,&data));
1303 
1304       /* coarse discrete gradient */
1305       if (pcbddc->nedcG) {
1306         PetscInt cols[2];
1307 
1308         cols[0] = 2*i;
1309         cols[1] = 2*i+1;
1310         CHKERRQ(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES));
1311       }
1312       CHKERRQ(ISRestoreIndices(eedges[i],&cols));
1313     }
1314     CHKERRQ(ISDestroy(&extrows[i]));
1315     CHKERRQ(ISDestroy(&extcols[i]));
1316     CHKERRQ(ISDestroy(&cornersis));
1317     CHKERRQ(MatDestroy(&Gins));
1318     CHKERRQ(MatDestroy(&GKins));
1319   }
1320   CHKERRQ(ISLocalToGlobalMappingDestroy(&el2g));
1321 
1322   /* Start assembling */
1323   CHKERRQ(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY));
1324   if (pcbddc->nedcG) {
1325     CHKERRQ(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY));
1326   }
1327 
1328   /* Free */
1329   if (fl2g) {
1330     CHKERRQ(ISDestroy(&primals));
1331     for (i=0;i<nee;i++) {
1332       CHKERRQ(ISDestroy(&eedges[i]));
1333     }
1334     CHKERRQ(PetscFree(eedges));
1335   }
1336 
1337   /* hack mat_graph with primal dofs on the coarse edges */
1338   {
1339     PCBDDCGraph graph   = pcbddc->mat_graph;
1340     PetscInt    *oqueue = graph->queue;
1341     PetscInt    *ocptr  = graph->cptr;
1342     PetscInt    ncc,*idxs;
1343 
1344     /* find first primal edge */
1345     if (pcbddc->nedclocal) {
1346       CHKERRQ(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs));
1347     } else {
1348       if (fl2g) {
1349         CHKERRQ(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges));
1350       }
1351       idxs = cedges;
1352     }
1353     cum = 0;
1354     while (cum < nee && cedges[cum] < 0) cum++;
1355 
1356     /* adapt connected components */
1357     CHKERRQ(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue));
1358     graph->cptr[0] = 0;
1359     for (i=0,ncc=0;i<graph->ncc;i++) {
1360       PetscInt lc = ocptr[i+1]-ocptr[i];
1361       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1362         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1363         graph->queue[graph->cptr[ncc]] = cedges[cum];
1364         ncc++;
1365         lc--;
1366         cum++;
1367         while (cum < nee && cedges[cum] < 0) cum++;
1368       }
1369       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1370       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1371       ncc++;
1372     }
1373     graph->ncc = ncc;
1374     if (pcbddc->nedclocal) {
1375       CHKERRQ(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs));
1376     }
1377     CHKERRQ(PetscFree2(ocptr,oqueue));
1378   }
1379   CHKERRQ(ISLocalToGlobalMappingDestroy(&fl2g));
1380   CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals));
1381   CHKERRQ(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1382   CHKERRQ(MatDestroy(&conn));
1383 
1384   CHKERRQ(ISDestroy(&nedfieldlocal));
1385   CHKERRQ(PetscFree(extrow));
1386   CHKERRQ(PetscFree2(work,rwork));
1387   CHKERRQ(PetscFree(corners));
1388   CHKERRQ(PetscFree(cedges));
1389   CHKERRQ(PetscFree(extrows));
1390   CHKERRQ(PetscFree(extcols));
1391   CHKERRQ(MatDestroy(&lG));
1392 
1393   /* Complete assembling */
1394   CHKERRQ(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY));
1395   if (pcbddc->nedcG) {
1396     CHKERRQ(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY));
1397 #if 0
1398     CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G"));
1399     CHKERRQ(MatView(pcbddc->nedcG,NULL));
1400 #endif
1401   }
1402 
1403   /* set change of basis */
1404   CHKERRQ(PCBDDCSetChangeOfBasisMat(pc,T,singular));
1405   CHKERRQ(MatDestroy(&T));
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /* the near-null space of BDDC carries information on quadrature weights,
1411    and these can be collinear -> so cheat with MatNullSpaceCreate
1412    and create a suitable set of basis vectors first */
1413 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1414 {
1415   PetscInt       i;
1416 
1417   PetscFunctionBegin;
1418   for (i=0;i<nvecs;i++) {
1419     PetscInt first,last;
1420 
1421     CHKERRQ(VecGetOwnershipRange(quad_vecs[i],&first,&last));
1422     PetscCheckFalse(last-first < 2*nvecs && has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1423     if (i>=first && i < last) {
1424       PetscScalar *data;
1425       CHKERRQ(VecGetArray(quad_vecs[i],&data));
1426       if (!has_const) {
1427         data[i-first] = 1.;
1428       } else {
1429         data[2*i-first] = 1./PetscSqrtReal(2.);
1430         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1431       }
1432       CHKERRQ(VecRestoreArray(quad_vecs[i],&data));
1433     }
1434     CHKERRQ(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1435   }
1436   CHKERRQ(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp));
1437   for (i=0;i<nvecs;i++) { /* reset vectors */
1438     PetscInt first,last;
1439     CHKERRQ(VecLockReadPop(quad_vecs[i]));
1440     CHKERRQ(VecGetOwnershipRange(quad_vecs[i],&first,&last));
1441     if (i>=first && i < last) {
1442       PetscScalar *data;
1443       CHKERRQ(VecGetArray(quad_vecs[i],&data));
1444       if (!has_const) {
1445         data[i-first] = 0.;
1446       } else {
1447         data[2*i-first] = 0.;
1448         data[2*i-first+1] = 0.;
1449       }
1450       CHKERRQ(VecRestoreArray(quad_vecs[i],&data));
1451     }
1452     CHKERRQ(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1453     CHKERRQ(VecLockReadPush(quad_vecs[i]));
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1459 {
1460   Mat                    loc_divudotp;
1461   Vec                    p,v,vins,quad_vec,*quad_vecs;
1462   ISLocalToGlobalMapping map;
1463   PetscScalar            *vals;
1464   const PetscScalar      *array;
1465   PetscInt               i,maxneighs = 0,maxsize,*gidxs;
1466   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1467   PetscMPIInt            rank;
1468 
1469   PetscFunctionBegin;
1470   CHKERRQ(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared));
1471   for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs);
1472   CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A)));
1473   if (!maxneighs) {
1474     CHKERRQ(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared));
1475     *nnsp = NULL;
1476     PetscFunctionReturn(0);
1477   }
1478   maxsize = 0;
1479   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1480   CHKERRQ(PetscMalloc2(maxsize,&gidxs,maxsize,&vals));
1481   /* create vectors to hold quadrature weights */
1482   CHKERRQ(MatCreateVecs(A,&quad_vec,NULL));
1483   if (!transpose) {
1484     CHKERRQ(MatISGetLocalToGlobalMapping(A,&map,NULL));
1485   } else {
1486     CHKERRQ(MatISGetLocalToGlobalMapping(A,NULL,&map));
1487   }
1488   CHKERRQ(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs));
1489   CHKERRQ(VecDestroy(&quad_vec));
1490   CHKERRQ(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp));
1491   for (i=0;i<maxneighs;i++) {
1492     CHKERRQ(VecLockReadPop(quad_vecs[i]));
1493   }
1494 
1495   /* compute local quad vec */
1496   CHKERRQ(MatISGetLocalMat(divudotp,&loc_divudotp));
1497   if (!transpose) {
1498     CHKERRQ(MatCreateVecs(loc_divudotp,&v,&p));
1499   } else {
1500     CHKERRQ(MatCreateVecs(loc_divudotp,&p,&v));
1501   }
1502   CHKERRQ(VecSet(p,1.));
1503   if (!transpose) {
1504     CHKERRQ(MatMultTranspose(loc_divudotp,p,v));
1505   } else {
1506     CHKERRQ(MatMult(loc_divudotp,p,v));
1507   }
1508   if (vl2l) {
1509     Mat        lA;
1510     VecScatter sc;
1511 
1512     CHKERRQ(MatISGetLocalMat(A,&lA));
1513     CHKERRQ(MatCreateVecs(lA,&vins,NULL));
1514     CHKERRQ(VecScatterCreate(v,NULL,vins,vl2l,&sc));
1515     CHKERRQ(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD));
1516     CHKERRQ(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD));
1517     CHKERRQ(VecScatterDestroy(&sc));
1518   } else {
1519     vins = v;
1520   }
1521   CHKERRQ(VecGetArrayRead(vins,&array));
1522   CHKERRQ(VecDestroy(&p));
1523 
1524   /* insert in global quadrature vecs */
1525   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
1526   for (i=1;i<n_neigh;i++) {
1527     const PetscInt    *idxs;
1528     PetscInt          idx,nn,j;
1529 
1530     idxs = shared[i];
1531     nn   = n_shared[i];
1532     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1533     CHKERRQ(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx));
1534     idx  = -(idx+1);
1535     PetscCheckFalse(idx < 0 || idx >= maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs);
1536     CHKERRQ(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs));
1537     CHKERRQ(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES));
1538   }
1539   CHKERRQ(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared));
1540   CHKERRQ(VecRestoreArrayRead(vins,&array));
1541   if (vl2l) {
1542     CHKERRQ(VecDestroy(&vins));
1543   }
1544   CHKERRQ(VecDestroy(&v));
1545   CHKERRQ(PetscFree2(gidxs,vals));
1546 
1547   /* assemble near null space */
1548   for (i=0;i<maxneighs;i++) {
1549     CHKERRQ(VecAssemblyBegin(quad_vecs[i]));
1550   }
1551   for (i=0;i<maxneighs;i++) {
1552     CHKERRQ(VecAssemblyEnd(quad_vecs[i]));
1553     CHKERRQ(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view"));
1554     CHKERRQ(VecLockReadPush(quad_vecs[i]));
1555   }
1556   CHKERRQ(VecDestroyVecs(maxneighs,&quad_vecs));
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1561 {
1562   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1563 
1564   PetscFunctionBegin;
1565   if (primalv) {
1566     if (pcbddc->user_primal_vertices_local) {
1567       IS list[2], newp;
1568 
1569       list[0] = primalv;
1570       list[1] = pcbddc->user_primal_vertices_local;
1571       CHKERRQ(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp));
1572       CHKERRQ(ISSortRemoveDups(newp));
1573       CHKERRQ(ISDestroy(&list[1]));
1574       pcbddc->user_primal_vertices_local = newp;
1575     } else {
1576       CHKERRQ(PCBDDCSetPrimalVerticesLocalIS(pc,primalv));
1577     }
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1583 {
1584   PetscInt f, *comp  = (PetscInt *)ctx;
1585 
1586   PetscFunctionBegin;
1587   for (f=0;f<Nf;f++) out[f] = X[*comp];
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1592 {
1593   PetscErrorCode ierr;
1594   Vec            local,global;
1595   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1596   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1597   PetscBool      monolithic = PETSC_FALSE;
1598 
1599   PetscFunctionBegin;
1600   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr);
1601   CHKERRQ(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL));
1602   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1603   /* need to convert from global to local topology information and remove references to information in global ordering */
1604   CHKERRQ(MatCreateVecs(pc->pmat,&global,NULL));
1605   CHKERRQ(MatCreateVecs(matis->A,&local,NULL));
1606   CHKERRQ(VecBindToCPU(global,PETSC_TRUE));
1607   CHKERRQ(VecBindToCPU(local,PETSC_TRUE));
1608   if (monolithic) { /* just get block size to properly compute vertices */
1609     if (pcbddc->vertex_size == 1) {
1610       CHKERRQ(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size));
1611     }
1612     goto boundary;
1613   }
1614 
1615   if (pcbddc->user_provided_isfordofs) {
1616     if (pcbddc->n_ISForDofs) {
1617       PetscInt i;
1618 
1619       CHKERRQ(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal));
1620       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1621         PetscInt bs;
1622 
1623         CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]));
1624         CHKERRQ(ISGetBlockSize(pcbddc->ISForDofs[i],&bs));
1625         CHKERRQ(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs));
1626         CHKERRQ(ISDestroy(&pcbddc->ISForDofs[i]));
1627       }
1628       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1629       pcbddc->n_ISForDofs = 0;
1630       CHKERRQ(PetscFree(pcbddc->ISForDofs));
1631     }
1632   } else {
1633     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1634       DM dm;
1635 
1636       CHKERRQ(MatGetDM(pc->pmat, &dm));
1637       if (!dm) {
1638         CHKERRQ(PCGetDM(pc, &dm));
1639       }
1640       if (dm) {
1641         IS      *fields;
1642         PetscInt nf,i;
1643 
1644         CHKERRQ(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL));
1645         CHKERRQ(PetscMalloc1(nf,&pcbddc->ISForDofsLocal));
1646         for (i=0;i<nf;i++) {
1647           PetscInt bs;
1648 
1649           CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]));
1650           CHKERRQ(ISGetBlockSize(fields[i],&bs));
1651           CHKERRQ(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs));
1652           CHKERRQ(ISDestroy(&fields[i]));
1653         }
1654         CHKERRQ(PetscFree(fields));
1655         pcbddc->n_ISForDofsLocal = nf;
1656       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1657         PetscContainer   c;
1658 
1659         CHKERRQ(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c));
1660         if (c) {
1661           MatISLocalFields lf;
1662           CHKERRQ(PetscContainerGetPointer(c,(void**)&lf));
1663           CHKERRQ(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf));
1664         } else { /* fallback, create the default fields if bs > 1 */
1665           PetscInt i, n = matis->A->rmap->n;
1666           CHKERRQ(MatGetBlockSize(pc->pmat,&i));
1667           if (i > 1) {
1668             pcbddc->n_ISForDofsLocal = i;
1669             CHKERRQ(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal));
1670             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1671               CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]));
1672             }
1673           }
1674         }
1675       }
1676     } else {
1677       PetscInt i;
1678       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1679         CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]));
1680       }
1681     }
1682   }
1683 
1684 boundary:
1685   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1686     CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal));
1687   } else if (pcbddc->DirichletBoundariesLocal) {
1688     CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal));
1689   }
1690   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1691     CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal));
1692   } else if (pcbddc->NeumannBoundariesLocal) {
1693     CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal));
1694   }
1695   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1696     CHKERRQ(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local));
1697   }
1698   CHKERRQ(VecDestroy(&global));
1699   CHKERRQ(VecDestroy(&local));
1700   /* detect local disconnected subdomains if requested (use matis->A) */
1701   if (pcbddc->detect_disconnected) {
1702     IS        primalv = NULL;
1703     PetscInt  i;
1704     PetscBool filter = pcbddc->detect_disconnected_filter;
1705 
1706     for (i=0;i<pcbddc->n_local_subs;i++) {
1707       CHKERRQ(ISDestroy(&pcbddc->local_subs[i]));
1708     }
1709     CHKERRQ(PetscFree(pcbddc->local_subs));
1710     CHKERRQ(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv));
1711     CHKERRQ(PCBDDCAddPrimalVerticesLocalIS(pc,primalv));
1712     CHKERRQ(ISDestroy(&primalv));
1713   }
1714   /* early stage corner detection */
1715   {
1716     DM dm;
1717 
1718     CHKERRQ(MatGetDM(pc->pmat,&dm));
1719     if (!dm) {
1720       CHKERRQ(PCGetDM(pc,&dm));
1721     }
1722     if (dm) {
1723       PetscBool isda;
1724 
1725       CHKERRQ(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda));
1726       if (isda) {
1727         ISLocalToGlobalMapping l2l;
1728         IS                     corners;
1729         Mat                    lA;
1730         PetscBool              gl,lo;
1731 
1732         {
1733           Vec               cvec;
1734           const PetscScalar *coords;
1735           PetscInt          dof,n,cdim;
1736           PetscBool         memc = PETSC_TRUE;
1737 
1738           CHKERRQ(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL));
1739           CHKERRQ(DMGetCoordinates(dm,&cvec));
1740           CHKERRQ(VecGetLocalSize(cvec,&n));
1741           CHKERRQ(VecGetBlockSize(cvec,&cdim));
1742           n   /= cdim;
1743           CHKERRQ(PetscFree(pcbddc->mat_graph->coords));
1744           CHKERRQ(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords));
1745           CHKERRQ(VecGetArrayRead(cvec,&coords));
1746 #if defined(PETSC_USE_COMPLEX)
1747           memc = PETSC_FALSE;
1748 #endif
1749           if (dof != 1) memc = PETSC_FALSE;
1750           if (memc) {
1751             CHKERRQ(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof));
1752           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1753             PetscReal *bcoords = pcbddc->mat_graph->coords;
1754             PetscInt  i, b, d;
1755 
1756             for (i=0;i<n;i++) {
1757               for (b=0;b<dof;b++) {
1758                 for (d=0;d<cdim;d++) {
1759                   bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1760                 }
1761               }
1762             }
1763           }
1764           CHKERRQ(VecRestoreArrayRead(cvec,&coords));
1765           pcbddc->mat_graph->cdim  = cdim;
1766           pcbddc->mat_graph->cnloc = dof*n;
1767           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1768         }
1769         CHKERRQ(DMDAGetSubdomainCornersIS(dm,&corners));
1770         CHKERRQ(MatISGetLocalMat(pc->pmat,&lA));
1771         CHKERRQ(MatGetLocalToGlobalMapping(lA,&l2l,NULL));
1772         CHKERRQ(MatISRestoreLocalMat(pc->pmat,&lA));
1773         lo   = (PetscBool)(l2l && corners);
1774         CHKERRMPI(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc)));
1775         if (gl) { /* From PETSc's DMDA */
1776           const PetscInt    *idx;
1777           PetscInt          dof,bs,*idxout,n;
1778 
1779           CHKERRQ(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL));
1780           CHKERRQ(ISLocalToGlobalMappingGetBlockSize(l2l,&bs));
1781           CHKERRQ(ISGetLocalSize(corners,&n));
1782           CHKERRQ(ISGetIndices(corners,&idx));
1783           if (bs == dof) {
1784             CHKERRQ(PetscMalloc1(n,&idxout));
1785             CHKERRQ(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout));
1786           } else { /* the original DMDA local-to-local map have been modified */
1787             PetscInt i,d;
1788 
1789             CHKERRQ(PetscMalloc1(dof*n,&idxout));
1790             for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1791             CHKERRQ(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout));
1792 
1793             bs = 1;
1794             n *= dof;
1795           }
1796           CHKERRQ(ISRestoreIndices(corners,&idx));
1797           CHKERRQ(DMDARestoreSubdomainCornersIS(dm,&corners));
1798           CHKERRQ(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners));
1799           CHKERRQ(PCBDDCAddPrimalVerticesLocalIS(pc,corners));
1800           CHKERRQ(ISDestroy(&corners));
1801           pcbddc->corner_selected  = PETSC_TRUE;
1802           pcbddc->corner_selection = PETSC_TRUE;
1803         }
1804         if (corners) {
1805           CHKERRQ(DMDARestoreSubdomainCornersIS(dm,&corners));
1806         }
1807       }
1808     }
1809   }
1810   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1811     DM dm;
1812 
1813     CHKERRQ(MatGetDM(pc->pmat,&dm));
1814     if (!dm) {
1815       CHKERRQ(PCGetDM(pc,&dm));
1816     }
1817     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1818       Vec            vcoords;
1819       PetscSection   section;
1820       PetscReal      *coords;
1821       PetscInt       d,cdim,nl,nf,**ctxs;
1822       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1823 
1824       CHKERRQ(DMGetCoordinateDim(dm,&cdim));
1825       CHKERRQ(DMGetLocalSection(dm,&section));
1826       CHKERRQ(PetscSectionGetNumFields(section,&nf));
1827       CHKERRQ(DMCreateGlobalVector(dm,&vcoords));
1828       CHKERRQ(VecGetLocalSize(vcoords,&nl));
1829       CHKERRQ(PetscMalloc1(nl*cdim,&coords));
1830       CHKERRQ(PetscMalloc2(nf,&funcs,nf,&ctxs));
1831       CHKERRQ(PetscMalloc1(nf,&ctxs[0]));
1832       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1833       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1834       for (d=0;d<cdim;d++) {
1835         PetscInt          i;
1836         const PetscScalar *v;
1837 
1838         for (i=0;i<nf;i++) ctxs[i][0] = d;
1839         CHKERRQ(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords));
1840         CHKERRQ(VecGetArrayRead(vcoords,&v));
1841         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1842         CHKERRQ(VecRestoreArrayRead(vcoords,&v));
1843       }
1844       CHKERRQ(VecDestroy(&vcoords));
1845       CHKERRQ(PCSetCoordinates(pc,cdim,nl,coords));
1846       CHKERRQ(PetscFree(coords));
1847       CHKERRQ(PetscFree(ctxs[0]));
1848       CHKERRQ(PetscFree2(funcs,ctxs));
1849     }
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1855 {
1856   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1857   IS              nis;
1858   const PetscInt  *idxs;
1859   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1860 
1861   PetscFunctionBegin;
1862   PetscCheckFalse(mop != MPI_LAND && mop != MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1863   if (mop == MPI_LAND) {
1864     /* init rootdata with true */
1865     for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1;
1866   } else {
1867     CHKERRQ(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n));
1868   }
1869   CHKERRQ(PetscArrayzero(matis->sf_leafdata,n));
1870   CHKERRQ(ISGetLocalSize(*is,&nd));
1871   CHKERRQ(ISGetIndices(*is,&idxs));
1872   for (i=0;i<nd;i++)
1873     if (-1 < idxs[i] && idxs[i] < n)
1874       matis->sf_leafdata[idxs[i]] = 1;
1875   CHKERRQ(ISRestoreIndices(*is,&idxs));
1876   CHKERRQ(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop));
1877   CHKERRQ(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop));
1878   CHKERRQ(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE));
1879   CHKERRQ(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE));
1880   if (mop == MPI_LAND) {
1881     CHKERRQ(PetscMalloc1(nd,&nidxs));
1882   } else {
1883     CHKERRQ(PetscMalloc1(n,&nidxs));
1884   }
1885   for (i=0,nnd=0;i<n;i++)
1886     if (matis->sf_leafdata[i])
1887       nidxs[nnd++] = i;
1888   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis));
1889   CHKERRQ(ISDestroy(is));
1890   *is  = nis;
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1895 {
1896   PC_IS             *pcis = (PC_IS*)(pc->data);
1897   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1898 
1899   PetscFunctionBegin;
1900   if (!pcbddc->benign_have_null) {
1901     PetscFunctionReturn(0);
1902   }
1903   if (pcbddc->ChangeOfBasisMatrix) {
1904     Vec swap;
1905 
1906     CHKERRQ(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change));
1907     swap = pcbddc->work_change;
1908     pcbddc->work_change = r;
1909     r = swap;
1910   }
1911   CHKERRQ(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD));
1912   CHKERRQ(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD));
1913   CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0));
1914   CHKERRQ(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D));
1915   CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0));
1916   CHKERRQ(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D));
1917   CHKERRQ(VecSet(z,0.));
1918   CHKERRQ(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE));
1919   CHKERRQ(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE));
1920   if (pcbddc->ChangeOfBasisMatrix) {
1921     pcbddc->work_change = r;
1922     CHKERRQ(VecCopy(z,pcbddc->work_change));
1923     CHKERRQ(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z));
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1929 {
1930   PCBDDCBenignMatMult_ctx ctx;
1931   PetscBool               apply_right,apply_left,reset_x;
1932 
1933   PetscFunctionBegin;
1934   CHKERRQ(MatShellGetContext(A,&ctx));
1935   if (transpose) {
1936     apply_right = ctx->apply_left;
1937     apply_left = ctx->apply_right;
1938   } else {
1939     apply_right = ctx->apply_right;
1940     apply_left = ctx->apply_left;
1941   }
1942   reset_x = PETSC_FALSE;
1943   if (apply_right) {
1944     const PetscScalar *ax;
1945     PetscInt          nl,i;
1946 
1947     CHKERRQ(VecGetLocalSize(x,&nl));
1948     CHKERRQ(VecGetArrayRead(x,&ax));
1949     CHKERRQ(PetscArraycpy(ctx->work,ax,nl));
1950     CHKERRQ(VecRestoreArrayRead(x,&ax));
1951     for (i=0;i<ctx->benign_n;i++) {
1952       PetscScalar    sum,val;
1953       const PetscInt *idxs;
1954       PetscInt       nz,j;
1955       CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz));
1956       CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs));
1957       sum = 0.;
1958       if (ctx->apply_p0) {
1959         val = ctx->work[idxs[nz-1]];
1960         for (j=0;j<nz-1;j++) {
1961           sum += ctx->work[idxs[j]];
1962           ctx->work[idxs[j]] += val;
1963         }
1964       } else {
1965         for (j=0;j<nz-1;j++) {
1966           sum += ctx->work[idxs[j]];
1967         }
1968       }
1969       ctx->work[idxs[nz-1]] -= sum;
1970       CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs));
1971     }
1972     CHKERRQ(VecPlaceArray(x,ctx->work));
1973     reset_x = PETSC_TRUE;
1974   }
1975   if (transpose) {
1976     CHKERRQ(MatMultTranspose(ctx->A,x,y));
1977   } else {
1978     CHKERRQ(MatMult(ctx->A,x,y));
1979   }
1980   if (reset_x) {
1981     CHKERRQ(VecResetArray(x));
1982   }
1983   if (apply_left) {
1984     PetscScalar *ay;
1985     PetscInt    i;
1986 
1987     CHKERRQ(VecGetArray(y,&ay));
1988     for (i=0;i<ctx->benign_n;i++) {
1989       PetscScalar    sum,val;
1990       const PetscInt *idxs;
1991       PetscInt       nz,j;
1992       CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz));
1993       CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs));
1994       val = -ay[idxs[nz-1]];
1995       if (ctx->apply_p0) {
1996         sum = 0.;
1997         for (j=0;j<nz-1;j++) {
1998           sum += ay[idxs[j]];
1999           ay[idxs[j]] += val;
2000         }
2001         ay[idxs[nz-1]] += sum;
2002       } else {
2003         for (j=0;j<nz-1;j++) {
2004           ay[idxs[j]] += val;
2005         }
2006         ay[idxs[nz-1]] = 0.;
2007       }
2008       CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs));
2009     }
2010     CHKERRQ(VecRestoreArray(y,&ay));
2011   }
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2016 {
2017   PetscFunctionBegin;
2018   CHKERRQ(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE));
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2023 {
2024   PetscFunctionBegin;
2025   CHKERRQ(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE));
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2030 {
2031   PC_IS                   *pcis = (PC_IS*)pc->data;
2032   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2033   PCBDDCBenignMatMult_ctx ctx;
2034 
2035   PetscFunctionBegin;
2036   if (!restore) {
2037     Mat                A_IB,A_BI;
2038     PetscScalar        *work;
2039     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2040 
2041     PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2042     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0);
2043     CHKERRQ(PetscMalloc1(pcis->n,&work));
2044     CHKERRQ(MatCreate(PETSC_COMM_SELF,&A_IB));
2045     CHKERRQ(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE));
2046     CHKERRQ(MatSetType(A_IB,MATSHELL));
2047     CHKERRQ(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private));
2048     CHKERRQ(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2049     CHKERRQ(PetscNew(&ctx));
2050     CHKERRQ(MatShellSetContext(A_IB,ctx));
2051     ctx->apply_left = PETSC_TRUE;
2052     ctx->apply_right = PETSC_FALSE;
2053     ctx->apply_p0 = PETSC_FALSE;
2054     ctx->benign_n = pcbddc->benign_n;
2055     if (reuse) {
2056       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2057       ctx->free = PETSC_FALSE;
2058     } else { /* TODO: could be optimized for successive solves */
2059       ISLocalToGlobalMapping N_to_D;
2060       PetscInt               i;
2061 
2062       CHKERRQ(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D));
2063       CHKERRQ(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs));
2064       for (i=0;i<pcbddc->benign_n;i++) {
2065         CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]));
2066       }
2067       CHKERRQ(ISLocalToGlobalMappingDestroy(&N_to_D));
2068       ctx->free = PETSC_TRUE;
2069     }
2070     ctx->A = pcis->A_IB;
2071     ctx->work = work;
2072     CHKERRQ(MatSetUp(A_IB));
2073     CHKERRQ(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY));
2074     CHKERRQ(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY));
2075     pcis->A_IB = A_IB;
2076 
2077     /* A_BI as A_IB^T */
2078     CHKERRQ(MatCreateTranspose(A_IB,&A_BI));
2079     pcbddc->benign_original_mat = pcis->A_BI;
2080     pcis->A_BI = A_BI;
2081   } else {
2082     if (!pcbddc->benign_original_mat) {
2083       PetscFunctionReturn(0);
2084     }
2085     CHKERRQ(MatShellGetContext(pcis->A_IB,&ctx));
2086     CHKERRQ(MatDestroy(&pcis->A_IB));
2087     pcis->A_IB = ctx->A;
2088     ctx->A = NULL;
2089     CHKERRQ(MatDestroy(&pcis->A_BI));
2090     pcis->A_BI = pcbddc->benign_original_mat;
2091     pcbddc->benign_original_mat = NULL;
2092     if (ctx->free) {
2093       PetscInt i;
2094       for (i=0;i<ctx->benign_n;i++) {
2095         CHKERRQ(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2096       }
2097       CHKERRQ(PetscFree(ctx->benign_zerodiag_subs));
2098     }
2099     CHKERRQ(PetscFree(ctx->work));
2100     CHKERRQ(PetscFree(ctx));
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /* used just in bddc debug mode */
2106 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2107 {
2108   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2109   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2110   Mat            An;
2111 
2112   PetscFunctionBegin;
2113   CHKERRQ(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An));
2114   CHKERRQ(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL));
2115   if (is1) {
2116     CHKERRQ(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B));
2117     CHKERRQ(MatDestroy(&An));
2118   } else {
2119     *B = An;
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 /* TODO: add reuse flag */
2125 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2126 {
2127   Mat            Bt;
2128   PetscScalar    *a,*bdata;
2129   const PetscInt *ii,*ij;
2130   PetscInt       m,n,i,nnz,*bii,*bij;
2131   PetscBool      flg_row;
2132 
2133   PetscFunctionBegin;
2134   CHKERRQ(MatGetSize(A,&n,&m));
2135   CHKERRQ(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row));
2136   CHKERRQ(MatSeqAIJGetArray(A,&a));
2137   nnz = n;
2138   for (i=0;i<ii[n];i++) {
2139     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2140   }
2141   CHKERRQ(PetscMalloc1(n+1,&bii));
2142   CHKERRQ(PetscMalloc1(nnz,&bij));
2143   CHKERRQ(PetscMalloc1(nnz,&bdata));
2144   nnz = 0;
2145   bii[0] = 0;
2146   for (i=0;i<n;i++) {
2147     PetscInt j;
2148     for (j=ii[i];j<ii[i+1];j++) {
2149       PetscScalar entry = a[j];
2150       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2151         bij[nnz] = ij[j];
2152         bdata[nnz] = entry;
2153         nnz++;
2154       }
2155     }
2156     bii[i+1] = nnz;
2157   }
2158   CHKERRQ(MatSeqAIJRestoreArray(A,&a));
2159   CHKERRQ(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt));
2160   CHKERRQ(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row));
2161   {
2162     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2163     b->free_a = PETSC_TRUE;
2164     b->free_ij = PETSC_TRUE;
2165   }
2166   if (*B == A) {
2167     CHKERRQ(MatDestroy(&A));
2168   }
2169   *B = Bt;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2174 {
2175   Mat                    B = NULL;
2176   DM                     dm;
2177   IS                     is_dummy,*cc_n;
2178   ISLocalToGlobalMapping l2gmap_dummy;
2179   PCBDDCGraph            graph;
2180   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2181   PetscInt               i,n;
2182   PetscInt               *xadj,*adjncy;
2183   PetscBool              isplex = PETSC_FALSE;
2184 
2185   PetscFunctionBegin;
2186   if (ncc) *ncc = 0;
2187   if (cc) *cc = NULL;
2188   if (primalv) *primalv = NULL;
2189   CHKERRQ(PCBDDCGraphCreate(&graph));
2190   CHKERRQ(MatGetDM(pc->pmat,&dm));
2191   if (!dm) {
2192     CHKERRQ(PCGetDM(pc,&dm));
2193   }
2194   if (dm) {
2195     CHKERRQ(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex));
2196   }
2197   if (filter) isplex = PETSC_FALSE;
2198 
2199   if (isplex) { /* this code has been modified from plexpartition.c */
2200     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2201     PetscInt      *adj = NULL;
2202     IS             cellNumbering;
2203     const PetscInt *cellNum;
2204     PetscBool      useCone, useClosure;
2205     PetscSection   section;
2206     PetscSegBuffer adjBuffer;
2207     PetscSF        sfPoint;
2208 
2209     CHKERRQ(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2210     CHKERRQ(DMGetPointSF(dm, &sfPoint));
2211     CHKERRQ(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2212     /* Build adjacency graph via a section/segbuffer */
2213     CHKERRQ(PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section));
2214     CHKERRQ(PetscSectionSetChart(section, pStart, pEnd));
2215     CHKERRQ(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer));
2216     /* Always use FVM adjacency to create partitioner graph */
2217     CHKERRQ(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2218     CHKERRQ(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2219     CHKERRQ(DMPlexGetCellNumbering(dm, &cellNumbering));
2220     CHKERRQ(ISGetIndices(cellNumbering, &cellNum));
2221     for (n = 0, p = pStart; p < pEnd; p++) {
2222       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2223       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2224       adjSize = PETSC_DETERMINE;
2225       CHKERRQ(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2226       for (a = 0; a < adjSize; ++a) {
2227         const PetscInt point = adj[a];
2228         if (pStart <= point && point < pEnd) {
2229           PetscInt *PETSC_RESTRICT pBuf;
2230           CHKERRQ(PetscSectionAddDof(section, p, 1));
2231           CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2232           *pBuf = point;
2233         }
2234       }
2235       n++;
2236     }
2237     CHKERRQ(DMSetBasicAdjacency(dm, useCone, useClosure));
2238     /* Derive CSR graph from section/segbuffer */
2239     CHKERRQ(PetscSectionSetUp(section));
2240     CHKERRQ(PetscSectionGetStorageSize(section, &size));
2241     CHKERRQ(PetscMalloc1(n+1, &xadj));
2242     for (idx = 0, p = pStart; p < pEnd; p++) {
2243       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2244       CHKERRQ(PetscSectionGetOffset(section, p, &(xadj[idx++])));
2245     }
2246     xadj[n] = size;
2247     CHKERRQ(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2248     /* Clean up */
2249     CHKERRQ(PetscSegBufferDestroy(&adjBuffer));
2250     CHKERRQ(PetscSectionDestroy(&section));
2251     CHKERRQ(PetscFree(adj));
2252     graph->xadj = xadj;
2253     graph->adjncy = adjncy;
2254   } else {
2255     Mat       A;
2256     PetscBool isseqaij, flg_row;
2257 
2258     CHKERRQ(MatISGetLocalMat(pc->pmat,&A));
2259     if (!A->rmap->N || !A->cmap->N) {
2260       CHKERRQ(PCBDDCGraphDestroy(&graph));
2261       PetscFunctionReturn(0);
2262     }
2263     CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij));
2264     if (!isseqaij && filter) {
2265       PetscBool isseqdense;
2266 
2267       CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense));
2268       if (!isseqdense) {
2269         CHKERRQ(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
2270       } else { /* TODO: rectangular case and LDA */
2271         PetscScalar *array;
2272         PetscReal   chop=1.e-6;
2273 
2274         CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B));
2275         CHKERRQ(MatDenseGetArray(B,&array));
2276         CHKERRQ(MatGetSize(B,&n,NULL));
2277         for (i=0;i<n;i++) {
2278           PetscInt j;
2279           for (j=i+1;j<n;j++) {
2280             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2281             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2282             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2283           }
2284         }
2285         CHKERRQ(MatDenseRestoreArray(B,&array));
2286         CHKERRQ(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B));
2287       }
2288     } else {
2289       CHKERRQ(PetscObjectReference((PetscObject)A));
2290       B = A;
2291     }
2292     CHKERRQ(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row));
2293 
2294     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2295     if (filter) {
2296       PetscScalar *data;
2297       PetscInt    j,cum;
2298 
2299       CHKERRQ(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered));
2300       CHKERRQ(MatSeqAIJGetArray(B,&data));
2301       cum = 0;
2302       for (i=0;i<n;i++) {
2303         PetscInt t;
2304 
2305         for (j=xadj[i];j<xadj[i+1];j++) {
2306           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2307             continue;
2308           }
2309           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2310         }
2311         t = xadj_filtered[i];
2312         xadj_filtered[i] = cum;
2313         cum += t;
2314       }
2315       CHKERRQ(MatSeqAIJRestoreArray(B,&data));
2316       graph->xadj = xadj_filtered;
2317       graph->adjncy = adjncy_filtered;
2318     } else {
2319       graph->xadj = xadj;
2320       graph->adjncy = adjncy;
2321     }
2322   }
2323   /* compute local connected components using PCBDDCGraph */
2324   CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy));
2325   CHKERRQ(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy));
2326   CHKERRQ(ISDestroy(&is_dummy));
2327   CHKERRQ(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT));
2328   CHKERRQ(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2329   CHKERRQ(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL));
2330   CHKERRQ(PCBDDCGraphComputeConnectedComponents(graph));
2331 
2332   /* partial clean up */
2333   CHKERRQ(PetscFree2(xadj_filtered,adjncy_filtered));
2334   if (B) {
2335     PetscBool flg_row;
2336     CHKERRQ(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row));
2337     CHKERRQ(MatDestroy(&B));
2338   }
2339   if (isplex) {
2340     CHKERRQ(PetscFree(xadj));
2341     CHKERRQ(PetscFree(adjncy));
2342   }
2343 
2344   /* get back data */
2345   if (isplex) {
2346     if (ncc) *ncc = graph->ncc;
2347     if (cc || primalv) {
2348       Mat          A;
2349       PetscBT      btv,btvt;
2350       PetscSection subSection;
2351       PetscInt     *ids,cum,cump,*cids,*pids;
2352 
2353       CHKERRQ(DMPlexGetSubdomainSection(dm,&subSection));
2354       CHKERRQ(MatISGetLocalMat(pc->pmat,&A));
2355       CHKERRQ(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids));
2356       CHKERRQ(PetscBTCreate(A->rmap->n,&btv));
2357       CHKERRQ(PetscBTCreate(A->rmap->n,&btvt));
2358 
2359       cids[0] = 0;
2360       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2361         PetscInt j;
2362 
2363         CHKERRQ(PetscBTMemzero(A->rmap->n,btvt));
2364         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2365           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2366 
2367           CHKERRQ(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure));
2368           for (k = 0; k < 2*size; k += 2) {
2369             PetscInt s, pp, p = closure[k], off, dof, cdof;
2370 
2371             CHKERRQ(PetscSectionGetConstraintDof(subSection,p,&cdof));
2372             CHKERRQ(PetscSectionGetOffset(subSection,p,&off));
2373             CHKERRQ(PetscSectionGetDof(subSection,p,&dof));
2374             for (s = 0; s < dof-cdof; s++) {
2375               if (PetscBTLookupSet(btvt,off+s)) continue;
2376               if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s;
2377               else pids[cump++] = off+s; /* cross-vertex */
2378             }
2379             CHKERRQ(DMPlexGetTreeParent(dm,p,&pp,NULL));
2380             if (pp != p) {
2381               CHKERRQ(PetscSectionGetConstraintDof(subSection,pp,&cdof));
2382               CHKERRQ(PetscSectionGetOffset(subSection,pp,&off));
2383               CHKERRQ(PetscSectionGetDof(subSection,pp,&dof));
2384               for (s = 0; s < dof-cdof; s++) {
2385                 if (PetscBTLookupSet(btvt,off+s)) continue;
2386                 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s;
2387                 else pids[cump++] = off+s; /* cross-vertex */
2388               }
2389             }
2390           }
2391           CHKERRQ(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure));
2392         }
2393         cids[i+1] = cum;
2394         /* mark dofs as already assigned */
2395         for (j = cids[i]; j < cids[i+1]; j++) {
2396           CHKERRQ(PetscBTSet(btv,ids[j]));
2397         }
2398       }
2399       if (cc) {
2400         CHKERRQ(PetscMalloc1(graph->ncc,&cc_n));
2401         for (i = 0; i < graph->ncc; i++) {
2402           CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]));
2403         }
2404         *cc = cc_n;
2405       }
2406       if (primalv) {
2407         CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv));
2408       }
2409       CHKERRQ(PetscFree3(ids,cids,pids));
2410       CHKERRQ(PetscBTDestroy(&btv));
2411       CHKERRQ(PetscBTDestroy(&btvt));
2412     }
2413   } else {
2414     if (ncc) *ncc = graph->ncc;
2415     if (cc) {
2416       CHKERRQ(PetscMalloc1(graph->ncc,&cc_n));
2417       for (i=0;i<graph->ncc;i++) {
2418         CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]));
2419       }
2420       *cc = cc_n;
2421     }
2422   }
2423   /* clean up graph */
2424   graph->xadj = NULL;
2425   graph->adjncy = NULL;
2426   CHKERRQ(PCBDDCGraphDestroy(&graph));
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2431 {
2432   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2433   PC_IS*         pcis = (PC_IS*)(pc->data);
2434   IS             dirIS = NULL;
2435   PetscInt       i;
2436 
2437   PetscFunctionBegin;
2438   CHKERRQ(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS));
2439   if (zerodiag) {
2440     Mat            A;
2441     Vec            vec3_N;
2442     PetscScalar    *vals;
2443     const PetscInt *idxs;
2444     PetscInt       nz,*count;
2445 
2446     /* p0 */
2447     CHKERRQ(VecSet(pcis->vec1_N,0.));
2448     CHKERRQ(PetscMalloc1(pcis->n,&vals));
2449     CHKERRQ(ISGetLocalSize(zerodiag,&nz));
2450     CHKERRQ(ISGetIndices(zerodiag,&idxs));
2451     for (i=0;i<nz;i++) vals[i] = 1.;
2452     CHKERRQ(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES));
2453     CHKERRQ(VecAssemblyBegin(pcis->vec1_N));
2454     CHKERRQ(VecAssemblyEnd(pcis->vec1_N));
2455     /* v_I */
2456     CHKERRQ(VecSetRandom(pcis->vec2_N,NULL));
2457     for (i=0;i<nz;i++) vals[i] = 0.;
2458     CHKERRQ(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES));
2459     CHKERRQ(ISRestoreIndices(zerodiag,&idxs));
2460     CHKERRQ(ISGetIndices(pcis->is_B_local,&idxs));
2461     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2462     CHKERRQ(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES));
2463     CHKERRQ(ISRestoreIndices(pcis->is_B_local,&idxs));
2464     if (dirIS) {
2465       PetscInt n;
2466 
2467       CHKERRQ(ISGetLocalSize(dirIS,&n));
2468       CHKERRQ(ISGetIndices(dirIS,&idxs));
2469       for (i=0;i<n;i++) vals[i] = 0.;
2470       CHKERRQ(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES));
2471       CHKERRQ(ISRestoreIndices(dirIS,&idxs));
2472     }
2473     CHKERRQ(VecAssemblyBegin(pcis->vec2_N));
2474     CHKERRQ(VecAssemblyEnd(pcis->vec2_N));
2475     CHKERRQ(VecDuplicate(pcis->vec1_N,&vec3_N));
2476     CHKERRQ(VecSet(vec3_N,0.));
2477     CHKERRQ(MatISGetLocalMat(pc->pmat,&A));
2478     CHKERRQ(MatMult(A,pcis->vec1_N,vec3_N));
2479     CHKERRQ(VecDot(vec3_N,pcis->vec2_N,&vals[0]));
2480     PetscCheckFalse(PetscAbsScalar(vals[0]) > 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2481     CHKERRQ(PetscFree(vals));
2482     CHKERRQ(VecDestroy(&vec3_N));
2483 
2484     /* there should not be any pressure dofs lying on the interface */
2485     CHKERRQ(PetscCalloc1(pcis->n,&count));
2486     CHKERRQ(ISGetIndices(pcis->is_B_local,&idxs));
2487     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2488     CHKERRQ(ISRestoreIndices(pcis->is_B_local,&idxs));
2489     CHKERRQ(ISGetIndices(zerodiag,&idxs));
2490     for (i=0;i<nz;i++) PetscCheckFalse(count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2491     CHKERRQ(ISRestoreIndices(zerodiag,&idxs));
2492     CHKERRQ(PetscFree(count));
2493   }
2494   CHKERRQ(ISDestroy(&dirIS));
2495 
2496   /* check PCBDDCBenignGetOrSetP0 */
2497   CHKERRQ(VecSetRandom(pcis->vec1_global,NULL));
2498   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2499   CHKERRQ(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE));
2500   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2501   CHKERRQ(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE));
2502   for (i=0;i<pcbddc->benign_n;i++) {
2503     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2504     PetscCheckFalse(val != -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2505   }
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2510 {
2511   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2512   Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);
2513   IS             pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2514   PetscInt       nz,n,benign_n,bsp = 1;
2515   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2516   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   if (reuse) goto project_b0;
2521   CHKERRQ(PetscSFDestroy(&pcbddc->benign_sf));
2522   CHKERRQ(MatDestroy(&pcbddc->benign_B0));
2523   for (n=0;n<pcbddc->benign_n;n++) {
2524     CHKERRQ(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2525   }
2526   CHKERRQ(PetscFree(pcbddc->benign_zerodiag_subs));
2527   has_null_pressures = PETSC_TRUE;
2528   have_null = PETSC_TRUE;
2529   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2530      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2531      Checks if all the pressure dofs in each subdomain have a zero diagonal
2532      If not, a change of basis on pressures is not needed
2533      since the local Schur complements are already SPD
2534   */
2535   if (pcbddc->n_ISForDofsLocal) {
2536     IS        iP = NULL;
2537     PetscInt  p,*pp;
2538     PetscBool flg;
2539 
2540     CHKERRQ(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp));
2541     n    = pcbddc->n_ISForDofsLocal;
2542     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr);
2543     CHKERRQ(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg));
2544     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2545     if (!flg) {
2546       n = 1;
2547       pp[0] = pcbddc->n_ISForDofsLocal-1;
2548     }
2549 
2550     bsp = 0;
2551     for (p=0;p<n;p++) {
2552       PetscInt bs;
2553 
2554       PetscCheckFalse(pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2555       CHKERRQ(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs));
2556       bsp += bs;
2557     }
2558     CHKERRQ(PetscMalloc1(bsp,&bzerodiag));
2559     bsp  = 0;
2560     for (p=0;p<n;p++) {
2561       const PetscInt *idxs;
2562       PetscInt       b,bs,npl,*bidxs;
2563 
2564       CHKERRQ(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs));
2565       CHKERRQ(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl));
2566       CHKERRQ(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs));
2567       CHKERRQ(PetscMalloc1(npl/bs,&bidxs));
2568       for (b=0;b<bs;b++) {
2569         PetscInt i;
2570 
2571         for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2572         CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]));
2573         bsp++;
2574       }
2575       CHKERRQ(PetscFree(bidxs));
2576       CHKERRQ(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs));
2577     }
2578     CHKERRQ(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures));
2579 
2580     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2581     CHKERRQ(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP));
2582     if (iP) {
2583       IS newpressures;
2584 
2585       CHKERRQ(ISDifference(pressures,iP,&newpressures));
2586       CHKERRQ(ISDestroy(&pressures));
2587       pressures = newpressures;
2588     }
2589     CHKERRQ(ISSorted(pressures,&sorted));
2590     if (!sorted) {
2591       CHKERRQ(ISSort(pressures));
2592     }
2593     CHKERRQ(PetscFree(pp));
2594   }
2595 
2596   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2597   CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&n,NULL));
2598   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2599   CHKERRQ(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag));
2600   CHKERRQ(ISSorted(zerodiag,&sorted));
2601   if (!sorted) {
2602     CHKERRQ(ISSort(zerodiag));
2603   }
2604   CHKERRQ(PetscObjectReference((PetscObject)zerodiag));
2605   zerodiag_save = zerodiag;
2606   CHKERRQ(ISGetLocalSize(zerodiag,&nz));
2607   if (!nz) {
2608     if (n) have_null = PETSC_FALSE;
2609     has_null_pressures = PETSC_FALSE;
2610     CHKERRQ(ISDestroy(&zerodiag));
2611   }
2612   recompute_zerodiag = PETSC_FALSE;
2613 
2614   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2615   zerodiag_subs    = NULL;
2616   benign_n         = 0;
2617   n_interior_dofs  = 0;
2618   interior_dofs    = NULL;
2619   nneu             = 0;
2620   if (pcbddc->NeumannBoundariesLocal) {
2621     CHKERRQ(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu));
2622   }
2623   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2624   if (checkb) { /* need to compute interior nodes */
2625     PetscInt n,i,j;
2626     PetscInt n_neigh,*neigh,*n_shared,**shared;
2627     PetscInt *iwork;
2628 
2629     CHKERRQ(ISLocalToGlobalMappingGetSize(matis->rmapping,&n));
2630     CHKERRQ(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared));
2631     CHKERRQ(PetscCalloc1(n,&iwork));
2632     CHKERRQ(PetscMalloc1(n,&interior_dofs));
2633     for (i=1;i<n_neigh;i++)
2634       for (j=0;j<n_shared[i];j++)
2635           iwork[shared[i][j]] += 1;
2636     for (i=0;i<n;i++)
2637       if (!iwork[i])
2638         interior_dofs[n_interior_dofs++] = i;
2639     CHKERRQ(PetscFree(iwork));
2640     CHKERRQ(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared));
2641   }
2642   if (has_null_pressures) {
2643     IS             *subs;
2644     PetscInt       nsubs,i,j,nl;
2645     const PetscInt *idxs;
2646     PetscScalar    *array;
2647     Vec            *work;
2648 
2649     subs  = pcbddc->local_subs;
2650     nsubs = pcbddc->n_local_subs;
2651     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2652     if (checkb) {
2653       CHKERRQ(VecDuplicateVecs(matis->y,2,&work));
2654       CHKERRQ(ISGetLocalSize(zerodiag,&nl));
2655       CHKERRQ(ISGetIndices(zerodiag,&idxs));
2656       /* work[0] = 1_p */
2657       CHKERRQ(VecSet(work[0],0.));
2658       CHKERRQ(VecGetArray(work[0],&array));
2659       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2660       CHKERRQ(VecRestoreArray(work[0],&array));
2661       /* work[0] = 1_v */
2662       CHKERRQ(VecSet(work[1],1.));
2663       CHKERRQ(VecGetArray(work[1],&array));
2664       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2665       CHKERRQ(VecRestoreArray(work[1],&array));
2666       CHKERRQ(ISRestoreIndices(zerodiag,&idxs));
2667     }
2668 
2669     if (nsubs > 1 || bsp > 1) {
2670       IS       *is;
2671       PetscInt b,totb;
2672 
2673       totb  = bsp;
2674       is    = bsp > 1 ? bzerodiag : &zerodiag;
2675       nsubs = PetscMax(nsubs,1);
2676       CHKERRQ(PetscCalloc1(nsubs*totb,&zerodiag_subs));
2677       for (b=0;b<totb;b++) {
2678         for (i=0;i<nsubs;i++) {
2679           ISLocalToGlobalMapping l2g;
2680           IS                     t_zerodiag_subs;
2681           PetscInt               nl;
2682 
2683           if (subs) {
2684             CHKERRQ(ISLocalToGlobalMappingCreateIS(subs[i],&l2g));
2685           } else {
2686             IS tis;
2687 
2688             CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&nl,NULL));
2689             CHKERRQ(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis));
2690             CHKERRQ(ISLocalToGlobalMappingCreateIS(tis,&l2g));
2691             CHKERRQ(ISDestroy(&tis));
2692           }
2693           CHKERRQ(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs));
2694           CHKERRQ(ISGetLocalSize(t_zerodiag_subs,&nl));
2695           if (nl) {
2696             PetscBool valid = PETSC_TRUE;
2697 
2698             if (checkb) {
2699               CHKERRQ(VecSet(matis->x,0));
2700               CHKERRQ(ISGetLocalSize(subs[i],&nl));
2701               CHKERRQ(ISGetIndices(subs[i],&idxs));
2702               CHKERRQ(VecGetArray(matis->x,&array));
2703               for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2704               CHKERRQ(VecRestoreArray(matis->x,&array));
2705               CHKERRQ(ISRestoreIndices(subs[i],&idxs));
2706               CHKERRQ(VecPointwiseMult(matis->x,work[0],matis->x));
2707               CHKERRQ(MatMult(matis->A,matis->x,matis->y));
2708               CHKERRQ(VecPointwiseMult(matis->y,work[1],matis->y));
2709               CHKERRQ(VecGetArray(matis->y,&array));
2710               for (j=0;j<n_interior_dofs;j++) {
2711                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2712                   valid = PETSC_FALSE;
2713                   break;
2714                 }
2715               }
2716               CHKERRQ(VecRestoreArray(matis->y,&array));
2717             }
2718             if (valid && nneu) {
2719               const PetscInt *idxs;
2720               PetscInt       nzb;
2721 
2722               CHKERRQ(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs));
2723               CHKERRQ(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL));
2724               CHKERRQ(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs));
2725               if (nzb) valid = PETSC_FALSE;
2726             }
2727             if (valid && pressures) {
2728               IS       t_pressure_subs,tmp;
2729               PetscInt i1,i2;
2730 
2731               CHKERRQ(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs));
2732               CHKERRQ(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp));
2733               CHKERRQ(ISGetLocalSize(tmp,&i1));
2734               CHKERRQ(ISGetLocalSize(t_zerodiag_subs,&i2));
2735               if (i2 != i1) valid = PETSC_FALSE;
2736               CHKERRQ(ISDestroy(&t_pressure_subs));
2737               CHKERRQ(ISDestroy(&tmp));
2738             }
2739             if (valid) {
2740               CHKERRQ(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]));
2741               benign_n++;
2742             } else recompute_zerodiag = PETSC_TRUE;
2743           }
2744           CHKERRQ(ISDestroy(&t_zerodiag_subs));
2745           CHKERRQ(ISLocalToGlobalMappingDestroy(&l2g));
2746         }
2747       }
2748     } else { /* there's just one subdomain (or zero if they have not been detected */
2749       PetscBool valid = PETSC_TRUE;
2750 
2751       if (nneu) valid = PETSC_FALSE;
2752       if (valid && pressures) {
2753         CHKERRQ(ISEqual(pressures,zerodiag,&valid));
2754       }
2755       if (valid && checkb) {
2756         CHKERRQ(MatMult(matis->A,work[0],matis->x));
2757         CHKERRQ(VecPointwiseMult(matis->x,work[1],matis->x));
2758         CHKERRQ(VecGetArray(matis->x,&array));
2759         for (j=0;j<n_interior_dofs;j++) {
2760           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2761             valid = PETSC_FALSE;
2762             break;
2763           }
2764         }
2765         CHKERRQ(VecRestoreArray(matis->x,&array));
2766       }
2767       if (valid) {
2768         benign_n = 1;
2769         CHKERRQ(PetscMalloc1(benign_n,&zerodiag_subs));
2770         CHKERRQ(PetscObjectReference((PetscObject)zerodiag));
2771         zerodiag_subs[0] = zerodiag;
2772       }
2773     }
2774     if (checkb) {
2775       CHKERRQ(VecDestroyVecs(2,&work));
2776     }
2777   }
2778   CHKERRQ(PetscFree(interior_dofs));
2779 
2780   if (!benign_n) {
2781     PetscInt n;
2782 
2783     CHKERRQ(ISDestroy(&zerodiag));
2784     recompute_zerodiag = PETSC_FALSE;
2785     CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&n,NULL));
2786     if (n) have_null = PETSC_FALSE;
2787   }
2788 
2789   /* final check for null pressures */
2790   if (zerodiag && pressures) {
2791     CHKERRQ(ISEqual(pressures,zerodiag,&have_null));
2792   }
2793 
2794   if (recompute_zerodiag) {
2795     CHKERRQ(ISDestroy(&zerodiag));
2796     if (benign_n == 1) {
2797       CHKERRQ(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2798       zerodiag = zerodiag_subs[0];
2799     } else {
2800       PetscInt i,nzn,*new_idxs;
2801 
2802       nzn = 0;
2803       for (i=0;i<benign_n;i++) {
2804         PetscInt ns;
2805         CHKERRQ(ISGetLocalSize(zerodiag_subs[i],&ns));
2806         nzn += ns;
2807       }
2808       CHKERRQ(PetscMalloc1(nzn,&new_idxs));
2809       nzn = 0;
2810       for (i=0;i<benign_n;i++) {
2811         PetscInt ns,*idxs;
2812         CHKERRQ(ISGetLocalSize(zerodiag_subs[i],&ns));
2813         CHKERRQ(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs));
2814         CHKERRQ(PetscArraycpy(new_idxs+nzn,idxs,ns));
2815         CHKERRQ(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs));
2816         nzn += ns;
2817       }
2818       CHKERRQ(PetscSortInt(nzn,new_idxs));
2819       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag));
2820     }
2821     have_null = PETSC_FALSE;
2822   }
2823 
2824   /* determines if the coarse solver will be singular or not */
2825   CHKERRMPI(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc)));
2826 
2827   /* Prepare matrix to compute no-net-flux */
2828   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2829     Mat                    A,loc_divudotp;
2830     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2831     IS                     row,col,isused = NULL;
2832     PetscInt               M,N,n,st,n_isused;
2833 
2834     if (pressures) {
2835       isused = pressures;
2836     } else {
2837       isused = zerodiag_save;
2838     }
2839     CHKERRQ(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL));
2840     CHKERRQ(MatISGetLocalMat(pc->pmat,&A));
2841     CHKERRQ(MatGetLocalSize(A,&n,NULL));
2842     PetscCheckFalse(!isused && n,PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2843     n_isused = 0;
2844     if (isused) {
2845       CHKERRQ(ISGetLocalSize(isused,&n_isused));
2846     }
2847     CHKERRMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc)));
2848     st = st-n_isused;
2849     if (n) {
2850       const PetscInt *gidxs;
2851 
2852       CHKERRQ(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp));
2853       CHKERRQ(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs));
2854       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2855       CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row));
2856       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col));
2857       CHKERRQ(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs));
2858     } else {
2859       CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp));
2860       CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row));
2861       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col));
2862     }
2863     CHKERRQ(MatGetSize(pc->pmat,NULL,&N));
2864     CHKERRQ(ISGetSize(row,&M));
2865     CHKERRQ(ISLocalToGlobalMappingCreateIS(row,&rl2g));
2866     CHKERRQ(ISLocalToGlobalMappingCreateIS(col,&cl2g));
2867     CHKERRQ(ISDestroy(&row));
2868     CHKERRQ(ISDestroy(&col));
2869     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp));
2870     CHKERRQ(MatSetType(pcbddc->divudotp,MATIS));
2871     CHKERRQ(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N));
2872     CHKERRQ(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g));
2873     CHKERRQ(ISLocalToGlobalMappingDestroy(&rl2g));
2874     CHKERRQ(ISLocalToGlobalMappingDestroy(&cl2g));
2875     CHKERRQ(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp));
2876     CHKERRQ(MatDestroy(&loc_divudotp));
2877     CHKERRQ(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY));
2878     CHKERRQ(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY));
2879   }
2880   CHKERRQ(ISDestroy(&zerodiag_save));
2881   CHKERRQ(ISDestroy(&pressures));
2882   if (bzerodiag) {
2883     PetscInt i;
2884 
2885     for (i=0;i<bsp;i++) {
2886       CHKERRQ(ISDestroy(&bzerodiag[i]));
2887     }
2888     CHKERRQ(PetscFree(bzerodiag));
2889   }
2890   pcbddc->benign_n = benign_n;
2891   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2892 
2893   /* determines if the problem has subdomains with 0 pressure block */
2894   have_null = (PetscBool)(!!pcbddc->benign_n);
2895   CHKERRMPI(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
2896 
2897 project_b0:
2898   CHKERRQ(MatGetLocalSize(pcbddc->local_mat,&n,NULL));
2899   /* change of basis and p0 dofs */
2900   if (pcbddc->benign_n) {
2901     PetscInt i,s,*nnz;
2902 
2903     /* local change of basis for pressures */
2904     CHKERRQ(MatDestroy(&pcbddc->benign_change));
2905     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change));
2906     CHKERRQ(MatSetType(pcbddc->benign_change,MATAIJ));
2907     CHKERRQ(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE));
2908     CHKERRQ(PetscMalloc1(n,&nnz));
2909     for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2910     for (i=0;i<pcbddc->benign_n;i++) {
2911       const PetscInt *idxs;
2912       PetscInt       nzs,j;
2913 
2914       CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs));
2915       CHKERRQ(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs));
2916       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2917       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2918       CHKERRQ(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs));
2919     }
2920     CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz));
2921     CHKERRQ(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
2922     CHKERRQ(PetscFree(nnz));
2923     /* set identity by default */
2924     for (i=0;i<n;i++) {
2925       CHKERRQ(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES));
2926     }
2927     CHKERRQ(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0));
2928     CHKERRQ(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0));
2929     /* set change on pressures */
2930     for (s=0;s<pcbddc->benign_n;s++) {
2931       PetscScalar    *array;
2932       const PetscInt *idxs;
2933       PetscInt       nzs;
2934 
2935       CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs));
2936       CHKERRQ(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs));
2937       for (i=0;i<nzs-1;i++) {
2938         PetscScalar vals[2];
2939         PetscInt    cols[2];
2940 
2941         cols[0] = idxs[i];
2942         cols[1] = idxs[nzs-1];
2943         vals[0] = 1.;
2944         vals[1] = 1.;
2945         CHKERRQ(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES));
2946       }
2947       CHKERRQ(PetscMalloc1(nzs,&array));
2948       for (i=0;i<nzs-1;i++) array[i] = -1.;
2949       array[nzs-1] = 1.;
2950       CHKERRQ(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES));
2951       /* store local idxs for p0 */
2952       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2953       CHKERRQ(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs));
2954       CHKERRQ(PetscFree(array));
2955     }
2956     CHKERRQ(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY));
2957     CHKERRQ(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY));
2958 
2959     /* project if needed */
2960     if (pcbddc->benign_change_explicit) {
2961       Mat M;
2962 
2963       CHKERRQ(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M));
2964       CHKERRQ(MatDestroy(&pcbddc->local_mat));
2965       CHKERRQ(MatSeqAIJCompress(M,&pcbddc->local_mat));
2966       CHKERRQ(MatDestroy(&M));
2967     }
2968     /* store global idxs for p0 */
2969     CHKERRQ(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx));
2970   }
2971   *zerodiaglocal = zerodiag;
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2976 {
2977   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2978   PetscScalar    *array;
2979 
2980   PetscFunctionBegin;
2981   if (!pcbddc->benign_sf) {
2982     CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf));
2983     CHKERRQ(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx));
2984   }
2985   if (get) {
2986     CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array));
2987     CHKERRQ(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE));
2988     CHKERRQ(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE));
2989     CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array));
2990   } else {
2991     CHKERRQ(VecGetArray(v,&array));
2992     CHKERRQ(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE));
2993     CHKERRQ(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE));
2994     CHKERRQ(VecRestoreArray(v,&array));
2995   }
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3000 {
3001   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3002 
3003   PetscFunctionBegin;
3004   /* TODO: add error checking
3005     - avoid nested pop (or push) calls.
3006     - cannot push before pop.
3007     - cannot call this if pcbddc->local_mat is NULL
3008   */
3009   if (!pcbddc->benign_n) {
3010     PetscFunctionReturn(0);
3011   }
3012   if (pop) {
3013     if (pcbddc->benign_change_explicit) {
3014       IS       is_p0;
3015       MatReuse reuse;
3016 
3017       /* extract B_0 */
3018       reuse = MAT_INITIAL_MATRIX;
3019       if (pcbddc->benign_B0) {
3020         reuse = MAT_REUSE_MATRIX;
3021       }
3022       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0));
3023       CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0));
3024       /* remove rows and cols from local problem */
3025       CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE));
3026       CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
3027       CHKERRQ(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL));
3028       CHKERRQ(ISDestroy(&is_p0));
3029     } else {
3030       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
3031       PetscScalar *vals;
3032       PetscInt    i,n,*idxs_ins;
3033 
3034       CHKERRQ(VecGetLocalSize(matis->y,&n));
3035       CHKERRQ(PetscMalloc2(n,&idxs_ins,n,&vals));
3036       if (!pcbddc->benign_B0) {
3037         PetscInt *nnz;
3038         CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0));
3039         CHKERRQ(MatSetType(pcbddc->benign_B0,MATAIJ));
3040         CHKERRQ(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE));
3041         CHKERRQ(PetscMalloc1(pcbddc->benign_n,&nnz));
3042         for (i=0;i<pcbddc->benign_n;i++) {
3043           CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]));
3044           nnz[i] = n - nnz[i];
3045         }
3046         CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz));
3047         CHKERRQ(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
3048         CHKERRQ(PetscFree(nnz));
3049       }
3050 
3051       for (i=0;i<pcbddc->benign_n;i++) {
3052         PetscScalar *array;
3053         PetscInt    *idxs,j,nz,cum;
3054 
3055         CHKERRQ(VecSet(matis->x,0.));
3056         CHKERRQ(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz));
3057         CHKERRQ(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs));
3058         for (j=0;j<nz;j++) vals[j] = 1.;
3059         CHKERRQ(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES));
3060         CHKERRQ(VecAssemblyBegin(matis->x));
3061         CHKERRQ(VecAssemblyEnd(matis->x));
3062         CHKERRQ(VecSet(matis->y,0.));
3063         CHKERRQ(MatMult(matis->A,matis->x,matis->y));
3064         CHKERRQ(VecGetArray(matis->y,&array));
3065         cum = 0;
3066         for (j=0;j<n;j++) {
3067           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3068             vals[cum] = array[j];
3069             idxs_ins[cum] = j;
3070             cum++;
3071           }
3072         }
3073         CHKERRQ(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES));
3074         CHKERRQ(VecRestoreArray(matis->y,&array));
3075         CHKERRQ(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs));
3076       }
3077       CHKERRQ(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY));
3078       CHKERRQ(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY));
3079       CHKERRQ(PetscFree2(idxs_ins,vals));
3080     }
3081   } else { /* push */
3082     if (pcbddc->benign_change_explicit) {
3083       PetscInt i;
3084 
3085       for (i=0;i<pcbddc->benign_n;i++) {
3086         PetscScalar *B0_vals;
3087         PetscInt    *B0_cols,B0_ncol;
3088 
3089         CHKERRQ(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals));
3090         CHKERRQ(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES));
3091         CHKERRQ(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES));
3092         CHKERRQ(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES));
3093         CHKERRQ(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals));
3094       }
3095       CHKERRQ(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY));
3096       CHKERRQ(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY));
3097     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3098   }
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3103 {
3104   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3105   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3106   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3107   PetscBLASInt    *B_iwork,*B_ifail;
3108   PetscScalar     *work,lwork;
3109   PetscScalar     *St,*S,*eigv;
3110   PetscScalar     *Sarray,*Starray;
3111   PetscReal       *eigs,thresh,lthresh,uthresh;
3112   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3113   PetscBool       allocated_S_St;
3114 #if defined(PETSC_USE_COMPLEX)
3115   PetscReal       *rwork;
3116 #endif
3117   PetscErrorCode  ierr;
3118 
3119   PetscFunctionBegin;
3120   PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3121   PetscCheck(sub_schurs->schur_explicit,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3122   PetscCheckFalse(sub_schurs->n_subs && (!sub_schurs->is_symmetric),PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3123   CHKERRQ(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0));
3124 
3125   if (pcbddc->dbg_flag) {
3126     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
3127     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n"));
3128     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n"));
3129     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3130   }
3131 
3132   if (pcbddc->dbg_flag) {
3133     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef));
3134   }
3135 
3136   /* max size of subsets */
3137   mss = 0;
3138   for (i=0;i<sub_schurs->n_subs;i++) {
3139     PetscInt subset_size;
3140 
3141     CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
3142     mss = PetscMax(mss,subset_size);
3143   }
3144 
3145   /* min/max and threshold */
3146   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3147   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3148   nmax = PetscMax(nmin,nmax);
3149   allocated_S_St = PETSC_FALSE;
3150   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3151     allocated_S_St = PETSC_TRUE;
3152   }
3153 
3154   /* allocate lapack workspace */
3155   cum = cum2 = 0;
3156   maxneigs = 0;
3157   for (i=0;i<sub_schurs->n_subs;i++) {
3158     PetscInt n,subset_size;
3159 
3160     CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
3161     n = PetscMin(subset_size,nmax);
3162     cum += subset_size;
3163     cum2 += subset_size*n;
3164     maxneigs = PetscMax(maxneigs,n);
3165   }
3166   lwork = 0;
3167   if (mss) {
3168     if (sub_schurs->is_symmetric) {
3169       PetscScalar  sdummy = 0.;
3170       PetscBLASInt B_itype = 1;
3171       PetscBLASInt B_N = mss, idummy = 0;
3172       PetscReal    rdummy = 0.,zero = 0.0;
3173       PetscReal    eps = 0.0; /* dlamch? */
3174 
3175       B_lwork = -1;
3176       /* some implementations may complain about NULL pointers, even if we are querying */
3177       S = &sdummy;
3178       St = &sdummy;
3179       eigs = &rdummy;
3180       eigv = &sdummy;
3181       B_iwork = &idummy;
3182       B_ifail = &idummy;
3183 #if defined(PETSC_USE_COMPLEX)
3184       rwork = &rdummy;
3185 #endif
3186       thresh = 1.0;
3187       CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3188 #if defined(PETSC_USE_COMPLEX)
3189       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3190 #else
3191       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3192 #endif
3193       PetscCheckFalse(B_ierr != 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3194       CHKERRQ(PetscFPTrapPop());
3195     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3196   }
3197 
3198   nv = 0;
3199   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3200     CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&nv));
3201   }
3202   CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork));
3203   if (allocated_S_St) {
3204     CHKERRQ(PetscMalloc2(mss*mss,&S,mss*mss,&St));
3205   }
3206   CHKERRQ(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail));
3207 #if defined(PETSC_USE_COMPLEX)
3208   CHKERRQ(PetscMalloc1(7*mss,&rwork));
3209 #endif
3210   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3211                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3212                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3213                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3214                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3215   CHKERRQ(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs));
3216 
3217   maxneigs = 0;
3218   cum = cumarray = 0;
3219   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3220   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3221   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3222     const PetscInt *idxs;
3223 
3224     CHKERRQ(ISGetIndices(sub_schurs->is_vertices,&idxs));
3225     for (cum=0;cum<nv;cum++) {
3226       pcbddc->adaptive_constraints_n[cum] = 1;
3227       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3228       pcbddc->adaptive_constraints_data[cum] = 1.0;
3229       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3230       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3231     }
3232     CHKERRQ(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
3233   }
3234 
3235   if (mss) { /* multilevel */
3236     CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray));
3237     CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray));
3238   }
3239 
3240   lthresh = pcbddc->adaptive_threshold[0];
3241   uthresh = pcbddc->adaptive_threshold[1];
3242   for (i=0;i<sub_schurs->n_subs;i++) {
3243     const PetscInt *idxs;
3244     PetscReal      upper,lower;
3245     PetscInt       j,subset_size,eigs_start = 0;
3246     PetscBLASInt   B_N;
3247     PetscBool      same_data = PETSC_FALSE;
3248     PetscBool      scal = PETSC_FALSE;
3249 
3250     if (pcbddc->use_deluxe_scaling) {
3251       upper = PETSC_MAX_REAL;
3252       lower = uthresh;
3253     } else {
3254       PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3255       upper = 1./uthresh;
3256       lower = 0.;
3257     }
3258     CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
3259     CHKERRQ(ISGetIndices(sub_schurs->is_subs[i],&idxs));
3260     CHKERRQ(PetscBLASIntCast(subset_size,&B_N));
3261     /* this is experimental: we assume the dofs have been properly grouped to have
3262        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3263     if (!sub_schurs->is_posdef) {
3264       Mat T;
3265 
3266       for (j=0;j<subset_size;j++) {
3267         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3268           CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T));
3269           CHKERRQ(MatScale(T,-1.0));
3270           CHKERRQ(MatDestroy(&T));
3271           CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T));
3272           CHKERRQ(MatScale(T,-1.0));
3273           CHKERRQ(MatDestroy(&T));
3274           if (sub_schurs->change_primal_sub) {
3275             PetscInt       nz,k;
3276             const PetscInt *idxs;
3277 
3278             CHKERRQ(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz));
3279             CHKERRQ(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs));
3280             for (k=0;k<nz;k++) {
3281               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3282               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3283             }
3284             CHKERRQ(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs));
3285           }
3286           scal = PETSC_TRUE;
3287           break;
3288         }
3289       }
3290     }
3291 
3292     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3293       if (sub_schurs->is_symmetric) {
3294         PetscInt j,k;
3295         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3296           CHKERRQ(PetscArrayzero(S,subset_size*subset_size));
3297           CHKERRQ(PetscArrayzero(St,subset_size*subset_size));
3298         }
3299         for (j=0;j<subset_size;j++) {
3300           for (k=j;k<subset_size;k++) {
3301             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3302             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3303           }
3304         }
3305       } else {
3306         CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size));
3307         CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size));
3308       }
3309     } else {
3310       S = Sarray + cumarray;
3311       St = Starray + cumarray;
3312     }
3313     /* see if we can save some work */
3314     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3315       CHKERRQ(PetscArraycmp(S,St,subset_size*subset_size,&same_data));
3316     }
3317 
3318     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3319       B_neigs = 0;
3320     } else {
3321       if (sub_schurs->is_symmetric) {
3322         PetscBLASInt B_itype = 1;
3323         PetscBLASInt B_IL, B_IU;
3324         PetscReal    eps = -1.0; /* dlamch? */
3325         PetscInt     nmin_s;
3326         PetscBool    compute_range;
3327 
3328         B_neigs = 0;
3329         compute_range = (PetscBool)!same_data;
3330         if (nmin >= subset_size) compute_range = PETSC_FALSE;
3331 
3332         if (pcbddc->dbg_flag) {
3333           PetscInt nc = 0;
3334 
3335           if (sub_schurs->change_primal_sub) {
3336             CHKERRQ(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc));
3337           }
3338           CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc));
3339         }
3340 
3341         CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3342         if (compute_range) {
3343 
3344           /* ask for eigenvalues larger than thresh */
3345           if (sub_schurs->is_posdef) {
3346 #if defined(PETSC_USE_COMPLEX)
3347             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3348 #else
3349             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3350 #endif
3351             CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3352           } else { /* no theory so far, but it works nicely */
3353             PetscInt  recipe = 0,recipe_m = 1;
3354             PetscReal bb[2];
3355 
3356             CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL));
3357             switch (recipe) {
3358             case 0:
3359               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3360               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3361 #if defined(PETSC_USE_COMPLEX)
3362               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3363 #else
3364               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3365 #endif
3366               CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3367               break;
3368             case 1:
3369               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3370 #if defined(PETSC_USE_COMPLEX)
3371               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3372 #else
3373               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3374 #endif
3375               CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3376               if (!scal) {
3377                 PetscBLASInt B_neigs2 = 0;
3378 
3379                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3380                 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size));
3381                 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size));
3382 #if defined(PETSC_USE_COMPLEX)
3383                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3384 #else
3385                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3386 #endif
3387                 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3388                 B_neigs += B_neigs2;
3389               }
3390               break;
3391             case 2:
3392               if (scal) {
3393                 bb[0] = PETSC_MIN_REAL;
3394                 bb[1] = 0;
3395 #if defined(PETSC_USE_COMPLEX)
3396                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3397 #else
3398                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3399 #endif
3400                 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3401               } else {
3402                 PetscBLASInt B_neigs2 = 0;
3403                 PetscBool    import = PETSC_FALSE;
3404 
3405                 lthresh = PetscMax(lthresh,0.0);
3406                 if (lthresh > 0.0) {
3407                   bb[0] = PETSC_MIN_REAL;
3408                   bb[1] = lthresh*lthresh;
3409 
3410                   import = PETSC_TRUE;
3411 #if defined(PETSC_USE_COMPLEX)
3412                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3413 #else
3414                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3415 #endif
3416                   CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3417                 }
3418                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3419                 bb[1] = PETSC_MAX_REAL;
3420                 if (import) {
3421                   CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size));
3422                   CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size));
3423                 }
3424 #if defined(PETSC_USE_COMPLEX)
3425                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3426 #else
3427                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3428 #endif
3429                 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3430                 B_neigs += B_neigs2;
3431               }
3432               break;
3433             case 3:
3434               if (scal) {
3435                 CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL));
3436               } else {
3437                 CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL));
3438               }
3439               if (!scal) {
3440                 bb[0] = uthresh;
3441                 bb[1] = PETSC_MAX_REAL;
3442 #if defined(PETSC_USE_COMPLEX)
3443                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3444 #else
3445                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3446 #endif
3447                 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3448               }
3449               if (recipe_m > 0 && B_N - B_neigs > 0) {
3450                 PetscBLASInt B_neigs2 = 0;
3451 
3452                 B_IL = 1;
3453                 CHKERRQ(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU));
3454                 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size));
3455                 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size));
3456 #if defined(PETSC_USE_COMPLEX)
3457                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3458 #else
3459                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3460 #endif
3461                 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3462                 B_neigs += B_neigs2;
3463               }
3464               break;
3465             case 4:
3466               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3467 #if defined(PETSC_USE_COMPLEX)
3468               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3469 #else
3470               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3471 #endif
3472               CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3473               {
3474                 PetscBLASInt B_neigs2 = 0;
3475 
3476                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3477                 CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size));
3478                 CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size));
3479 #if defined(PETSC_USE_COMPLEX)
3480                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3481 #else
3482                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3483 #endif
3484                 CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3485                 B_neigs += B_neigs2;
3486               }
3487               break;
3488             case 5: /* same as before: first compute all eigenvalues, then filter */
3489 #if defined(PETSC_USE_COMPLEX)
3490               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3491 #else
3492               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3493 #endif
3494               CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3495               {
3496                 PetscInt e,k,ne;
3497                 for (e=0,ne=0;e<B_neigs;e++) {
3498                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3499                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3500                     eigs[ne] = eigs[e];
3501                     ne++;
3502                   }
3503                 }
3504                 CHKERRQ(PetscArraycpy(eigv,S,B_N*ne));
3505                 B_neigs = ne;
3506               }
3507               break;
3508             default:
3509               SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3510             }
3511           }
3512         } else if (!same_data) { /* this is just to see all the eigenvalues */
3513           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3514           B_IL = 1;
3515 #if defined(PETSC_USE_COMPLEX)
3516           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3517 #else
3518           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3519 #endif
3520           CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3521         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3522           PetscInt k;
3523           PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3524           CHKERRQ(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax));
3525           CHKERRQ(PetscBLASIntCast(nmax,&B_neigs));
3526           nmin = nmax;
3527           CHKERRQ(PetscArrayzero(eigv,subset_size*nmax));
3528           for (k=0;k<nmax;k++) {
3529             eigs[k] = 1./PETSC_SMALL;
3530             eigv[k*(subset_size+1)] = 1.0;
3531           }
3532         }
3533         CHKERRQ(PetscFPTrapPop());
3534         if (B_ierr) {
3535           PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3536           else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3537           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3538         }
3539 
3540         if (B_neigs > nmax) {
3541           if (pcbddc->dbg_flag) {
3542             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %D.\n",B_neigs,nmax));
3543           }
3544           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3545           B_neigs = nmax;
3546         }
3547 
3548         nmin_s = PetscMin(nmin,B_N);
3549         if (B_neigs < nmin_s) {
3550           PetscBLASInt B_neigs2 = 0;
3551 
3552           if (pcbddc->use_deluxe_scaling) {
3553             if (scal) {
3554               B_IU = nmin_s;
3555               B_IL = B_neigs + 1;
3556             } else {
3557               B_IL = B_N - nmin_s + 1;
3558               B_IU = B_N - B_neigs;
3559             }
3560           } else {
3561             B_IL = B_neigs + 1;
3562             B_IU = nmin_s;
3563           }
3564           if (pcbddc->dbg_flag) {
3565             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU));
3566           }
3567           if (sub_schurs->is_symmetric) {
3568             PetscInt j,k;
3569             for (j=0;j<subset_size;j++) {
3570               for (k=j;k<subset_size;k++) {
3571                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3572                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3573               }
3574             }
3575           } else {
3576             CHKERRQ(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size));
3577             CHKERRQ(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size));
3578           }
3579           CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3580 #if defined(PETSC_USE_COMPLEX)
3581           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3582 #else
3583           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3584 #endif
3585           CHKERRQ(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0));
3586           CHKERRQ(PetscFPTrapPop());
3587           B_neigs += B_neigs2;
3588         }
3589         if (B_ierr) {
3590           PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3591           else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3592           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3593         }
3594         if (pcbddc->dbg_flag) {
3595           CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs));
3596           for (j=0;j<B_neigs;j++) {
3597             if (eigs[j] == 0.0) {
3598               CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n"));
3599             } else {
3600               if (pcbddc->use_deluxe_scaling) {
3601                 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]));
3602               } else {
3603                 CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]));
3604               }
3605             }
3606           }
3607         }
3608       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3609     }
3610     /* change the basis back to the original one */
3611     if (sub_schurs->change) {
3612       Mat change,phi,phit;
3613 
3614       if (pcbddc->dbg_flag > 2) {
3615         PetscInt ii;
3616         for (ii=0;ii<B_neigs;ii++) {
3617           CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N));
3618           for (j=0;j<B_N;j++) {
3619 #if defined(PETSC_USE_COMPLEX)
3620             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3621             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3622             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c));
3623 #else
3624             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]));
3625 #endif
3626           }
3627         }
3628       }
3629       CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change,NULL));
3630       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit));
3631       CHKERRQ(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi));
3632       CHKERRQ(MatCopy(phi,phit,SAME_NONZERO_PATTERN));
3633       CHKERRQ(MatDestroy(&phit));
3634       CHKERRQ(MatDestroy(&phi));
3635     }
3636     maxneigs = PetscMax(B_neigs,maxneigs);
3637     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3638     if (B_neigs) {
3639       CHKERRQ(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size));
3640 
3641       if (pcbddc->dbg_flag > 1) {
3642         PetscInt ii;
3643         for (ii=0;ii<B_neigs;ii++) {
3644           CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N));
3645           for (j=0;j<B_N;j++) {
3646 #if defined(PETSC_USE_COMPLEX)
3647             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3648             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3649             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c));
3650 #else
3651             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]));
3652 #endif
3653           }
3654         }
3655       }
3656       CHKERRQ(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size));
3657       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3658       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3659       cum++;
3660     }
3661     CHKERRQ(ISRestoreIndices(sub_schurs->is_subs[i],&idxs));
3662     /* shift for next computation */
3663     cumarray += subset_size*subset_size;
3664   }
3665   if (pcbddc->dbg_flag) {
3666     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
3667   }
3668 
3669   if (mss) {
3670     CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray));
3671     CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray));
3672     /* destroy matrices (junk) */
3673     CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3674     CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3675   }
3676   if (allocated_S_St) {
3677     CHKERRQ(PetscFree2(S,St));
3678   }
3679   CHKERRQ(PetscFree5(eigv,eigs,work,B_iwork,B_ifail));
3680 #if defined(PETSC_USE_COMPLEX)
3681   CHKERRQ(PetscFree(rwork));
3682 #endif
3683   if (pcbddc->dbg_flag) {
3684     PetscInt maxneigs_r;
3685     CHKERRMPI(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc)));
3686     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r));
3687   }
3688   CHKERRQ(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0));
3689   PetscFunctionReturn(0);
3690 }
3691 
3692 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3693 {
3694   PetscScalar    *coarse_submat_vals;
3695 
3696   PetscFunctionBegin;
3697   /* Setup local scatters R_to_B and (optionally) R_to_D */
3698   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3699   CHKERRQ(PCBDDCSetUpLocalScatters(pc));
3700 
3701   /* Setup local neumann solver ksp_R */
3702   /* PCBDDCSetUpLocalScatters should be called first! */
3703   CHKERRQ(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE));
3704 
3705   /*
3706      Setup local correction and local part of coarse basis.
3707      Gives back the dense local part of the coarse matrix in column major ordering
3708   */
3709   CHKERRQ(PCBDDCSetUpCorrection(pc,&coarse_submat_vals));
3710 
3711   /* Compute total number of coarse nodes and setup coarse solver */
3712   CHKERRQ(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals));
3713 
3714   /* free */
3715   CHKERRQ(PetscFree(coarse_submat_vals));
3716   PetscFunctionReturn(0);
3717 }
3718 
3719 PetscErrorCode PCBDDCResetCustomization(PC pc)
3720 {
3721   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3722 
3723   PetscFunctionBegin;
3724   CHKERRQ(ISDestroy(&pcbddc->user_primal_vertices));
3725   CHKERRQ(ISDestroy(&pcbddc->user_primal_vertices_local));
3726   CHKERRQ(ISDestroy(&pcbddc->NeumannBoundaries));
3727   CHKERRQ(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3728   CHKERRQ(ISDestroy(&pcbddc->DirichletBoundaries));
3729   CHKERRQ(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3730   CHKERRQ(PetscFree(pcbddc->onearnullvecs_state));
3731   CHKERRQ(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3732   CHKERRQ(PCBDDCSetDofsSplitting(pc,0,NULL));
3733   CHKERRQ(PCBDDCSetDofsSplittingLocal(pc,0,NULL));
3734   PetscFunctionReturn(0);
3735 }
3736 
3737 PetscErrorCode PCBDDCResetTopography(PC pc)
3738 {
3739   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3740   PetscInt       i;
3741 
3742   PetscFunctionBegin;
3743   CHKERRQ(MatDestroy(&pcbddc->nedcG));
3744   CHKERRQ(ISDestroy(&pcbddc->nedclocal));
3745   CHKERRQ(MatDestroy(&pcbddc->discretegradient));
3746   CHKERRQ(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3747   CHKERRQ(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3748   CHKERRQ(MatDestroy(&pcbddc->switch_static_change));
3749   CHKERRQ(VecDestroy(&pcbddc->work_change));
3750   CHKERRQ(MatDestroy(&pcbddc->ConstraintMatrix));
3751   CHKERRQ(MatDestroy(&pcbddc->divudotp));
3752   CHKERRQ(ISDestroy(&pcbddc->divudotp_vl2l));
3753   CHKERRQ(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3754   for (i=0;i<pcbddc->n_local_subs;i++) {
3755     CHKERRQ(ISDestroy(&pcbddc->local_subs[i]));
3756   }
3757   pcbddc->n_local_subs = 0;
3758   CHKERRQ(PetscFree(pcbddc->local_subs));
3759   CHKERRQ(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3760   pcbddc->graphanalyzed        = PETSC_FALSE;
3761   pcbddc->recompute_topography = PETSC_TRUE;
3762   pcbddc->corner_selected      = PETSC_FALSE;
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 PetscErrorCode PCBDDCResetSolvers(PC pc)
3767 {
3768   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3769 
3770   PetscFunctionBegin;
3771   CHKERRQ(VecDestroy(&pcbddc->coarse_vec));
3772   if (pcbddc->coarse_phi_B) {
3773     PetscScalar *array;
3774     CHKERRQ(MatDenseGetArray(pcbddc->coarse_phi_B,&array));
3775     CHKERRQ(PetscFree(array));
3776   }
3777   CHKERRQ(MatDestroy(&pcbddc->coarse_phi_B));
3778   CHKERRQ(MatDestroy(&pcbddc->coarse_phi_D));
3779   CHKERRQ(MatDestroy(&pcbddc->coarse_psi_B));
3780   CHKERRQ(MatDestroy(&pcbddc->coarse_psi_D));
3781   CHKERRQ(VecDestroy(&pcbddc->vec1_P));
3782   CHKERRQ(VecDestroy(&pcbddc->vec1_C));
3783   CHKERRQ(MatDestroy(&pcbddc->local_auxmat2));
3784   CHKERRQ(MatDestroy(&pcbddc->local_auxmat1));
3785   CHKERRQ(VecDestroy(&pcbddc->vec1_R));
3786   CHKERRQ(VecDestroy(&pcbddc->vec2_R));
3787   CHKERRQ(ISDestroy(&pcbddc->is_R_local));
3788   CHKERRQ(VecScatterDestroy(&pcbddc->R_to_B));
3789   CHKERRQ(VecScatterDestroy(&pcbddc->R_to_D));
3790   CHKERRQ(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3791   CHKERRQ(KSPReset(pcbddc->ksp_D));
3792   CHKERRQ(KSPReset(pcbddc->ksp_R));
3793   CHKERRQ(KSPReset(pcbddc->coarse_ksp));
3794   CHKERRQ(MatDestroy(&pcbddc->local_mat));
3795   CHKERRQ(PetscFree(pcbddc->primal_indices_local_idxs));
3796   CHKERRQ(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult));
3797   CHKERRQ(PetscFree(pcbddc->global_primal_indices));
3798   CHKERRQ(ISDestroy(&pcbddc->coarse_subassembling));
3799   CHKERRQ(MatDestroy(&pcbddc->benign_change));
3800   CHKERRQ(VecDestroy(&pcbddc->benign_vec));
3801   CHKERRQ(PCBDDCBenignShellMat(pc,PETSC_TRUE));
3802   CHKERRQ(MatDestroy(&pcbddc->benign_B0));
3803   CHKERRQ(PetscSFDestroy(&pcbddc->benign_sf));
3804   if (pcbddc->benign_zerodiag_subs) {
3805     PetscInt i;
3806     for (i=0;i<pcbddc->benign_n;i++) {
3807       CHKERRQ(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3808     }
3809     CHKERRQ(PetscFree(pcbddc->benign_zerodiag_subs));
3810   }
3811   CHKERRQ(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0));
3812   PetscFunctionReturn(0);
3813 }
3814 
3815 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3816 {
3817   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3818   PC_IS          *pcis = (PC_IS*)pc->data;
3819   VecType        impVecType;
3820   PetscInt       n_constraints,n_R,old_size;
3821 
3822   PetscFunctionBegin;
3823   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3824   n_R = pcis->n - pcbddc->n_vertices;
3825   CHKERRQ(VecGetType(pcis->vec1_N,&impVecType));
3826   /* local work vectors (try to avoid unneeded work)*/
3827   /* R nodes */
3828   old_size = -1;
3829   if (pcbddc->vec1_R) {
3830     CHKERRQ(VecGetSize(pcbddc->vec1_R,&old_size));
3831   }
3832   if (n_R != old_size) {
3833     CHKERRQ(VecDestroy(&pcbddc->vec1_R));
3834     CHKERRQ(VecDestroy(&pcbddc->vec2_R));
3835     CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R));
3836     CHKERRQ(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R));
3837     CHKERRQ(VecSetType(pcbddc->vec1_R,impVecType));
3838     CHKERRQ(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R));
3839   }
3840   /* local primal dofs */
3841   old_size = -1;
3842   if (pcbddc->vec1_P) {
3843     CHKERRQ(VecGetSize(pcbddc->vec1_P,&old_size));
3844   }
3845   if (pcbddc->local_primal_size != old_size) {
3846     CHKERRQ(VecDestroy(&pcbddc->vec1_P));
3847     CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P));
3848     CHKERRQ(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size));
3849     CHKERRQ(VecSetType(pcbddc->vec1_P,impVecType));
3850   }
3851   /* local explicit constraints */
3852   old_size = -1;
3853   if (pcbddc->vec1_C) {
3854     CHKERRQ(VecGetSize(pcbddc->vec1_C,&old_size));
3855   }
3856   if (n_constraints && n_constraints != old_size) {
3857     CHKERRQ(VecDestroy(&pcbddc->vec1_C));
3858     CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C));
3859     CHKERRQ(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints));
3860     CHKERRQ(VecSetType(pcbddc->vec1_C,impVecType));
3861   }
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3866 {
3867   /* pointers to pcis and pcbddc */
3868   PC_IS*          pcis = (PC_IS*)pc->data;
3869   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3870   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3871   /* submatrices of local problem */
3872   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3873   /* submatrices of local coarse problem */
3874   Mat             S_VV,S_CV,S_VC,S_CC;
3875   /* working matrices */
3876   Mat             C_CR;
3877   /* additional working stuff */
3878   PC              pc_R;
3879   Mat             F,Brhs = NULL;
3880   Vec             dummy_vec;
3881   PetscBool       isLU,isCHOL,need_benign_correction,sparserhs;
3882   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3883   PetscScalar     *work;
3884   PetscInt        *idx_V_B;
3885   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3886   PetscInt        i,n_R,n_D,n_B;
3887   PetscScalar     one=1.0,m_one=-1.0;
3888 
3889   PetscFunctionBegin;
3890   PetscCheckFalse(!pcbddc->symmetric_primal && pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3891   CHKERRQ(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0));
3892 
3893   /* Set Non-overlapping dimensions */
3894   n_vertices = pcbddc->n_vertices;
3895   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3896   n_B = pcis->n_B;
3897   n_D = pcis->n - n_B;
3898   n_R = pcis->n - n_vertices;
3899 
3900   /* vertices in boundary numbering */
3901   CHKERRQ(PetscMalloc1(n_vertices,&idx_V_B));
3902   CHKERRQ(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B));
3903   PetscCheckFalse(i != n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3904 
3905   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3906   CHKERRQ(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals));
3907   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV));
3908   CHKERRQ(MatDenseSetLDA(S_VV,pcbddc->local_primal_size));
3909   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV));
3910   CHKERRQ(MatDenseSetLDA(S_CV,pcbddc->local_primal_size));
3911   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC));
3912   CHKERRQ(MatDenseSetLDA(S_VC,pcbddc->local_primal_size));
3913   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC));
3914   CHKERRQ(MatDenseSetLDA(S_CC,pcbddc->local_primal_size));
3915 
3916   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3917   CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_R));
3918   CHKERRQ(PCSetUp(pc_R));
3919   CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU));
3920   CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL));
3921   lda_rhs = n_R;
3922   need_benign_correction = PETSC_FALSE;
3923   if (isLU || isCHOL) {
3924     CHKERRQ(PCFactorGetMatrix(pc_R,&F));
3925   } else if (sub_schurs && sub_schurs->reuse_solver) {
3926     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3927     MatFactorType      type;
3928 
3929     F = reuse_solver->F;
3930     CHKERRQ(MatGetFactorType(F,&type));
3931     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3932     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3933     CHKERRQ(MatGetSize(F,&lda_rhs,NULL));
3934     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3935   } else F = NULL;
3936 
3937   /* determine if we can use a sparse right-hand side */
3938   sparserhs = PETSC_FALSE;
3939   if (F) {
3940     MatSolverType solver;
3941 
3942     CHKERRQ(MatFactorGetSolverType(F,&solver));
3943     CHKERRQ(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs));
3944   }
3945 
3946   /* allocate workspace */
3947   n = 0;
3948   if (n_constraints) {
3949     n += lda_rhs*n_constraints;
3950   }
3951   if (n_vertices) {
3952     n = PetscMax(2*lda_rhs*n_vertices,n);
3953     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3954   }
3955   if (!pcbddc->symmetric_primal) {
3956     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3957   }
3958   CHKERRQ(PetscMalloc1(n,&work));
3959 
3960   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3961   dummy_vec = NULL;
3962   if (need_benign_correction && lda_rhs != n_R && F) {
3963     CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec));
3964     CHKERRQ(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE));
3965     CHKERRQ(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name));
3966   }
3967 
3968   CHKERRQ(MatDestroy(&pcbddc->local_auxmat1));
3969   CHKERRQ(MatDestroy(&pcbddc->local_auxmat2));
3970 
3971   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3972   if (n_constraints) {
3973     Mat         M3,C_B;
3974     IS          is_aux;
3975 
3976     /* Extract constraints on R nodes: C_{CR}  */
3977     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux));
3978     CHKERRQ(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR));
3979     CHKERRQ(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B));
3980 
3981     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3982     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3983     if (!sparserhs) {
3984       CHKERRQ(PetscArrayzero(work,lda_rhs*n_constraints));
3985       for (i=0;i<n_constraints;i++) {
3986         const PetscScalar *row_cmat_values;
3987         const PetscInt    *row_cmat_indices;
3988         PetscInt          size_of_constraint,j;
3989 
3990         CHKERRQ(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values));
3991         for (j=0;j<size_of_constraint;j++) {
3992           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3993         }
3994         CHKERRQ(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values));
3995       }
3996       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs));
3997     } else {
3998       Mat tC_CR;
3999 
4000       CHKERRQ(MatScale(C_CR,-1.0));
4001       if (lda_rhs != n_R) {
4002         PetscScalar *aa;
4003         PetscInt    r,*ii,*jj;
4004         PetscBool   done;
4005 
4006         CHKERRQ(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done));
4007         PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4008         CHKERRQ(MatSeqAIJGetArray(C_CR,&aa));
4009         CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR));
4010         CHKERRQ(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done));
4011         PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4012       } else {
4013         CHKERRQ(PetscObjectReference((PetscObject)C_CR));
4014         tC_CR = C_CR;
4015       }
4016       CHKERRQ(MatCreateTranspose(tC_CR,&Brhs));
4017       CHKERRQ(MatDestroy(&tC_CR));
4018     }
4019     CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R));
4020     if (F) {
4021       if (need_benign_correction) {
4022         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4023 
4024         /* rhs is already zero on interior dofs, no need to change the rhs */
4025         CHKERRQ(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n));
4026       }
4027       CHKERRQ(MatMatSolve(F,Brhs,local_auxmat2_R));
4028       if (need_benign_correction) {
4029         PetscScalar        *marr;
4030         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4031 
4032         CHKERRQ(MatDenseGetArray(local_auxmat2_R,&marr));
4033         if (lda_rhs != n_R) {
4034           for (i=0;i<n_constraints;i++) {
4035             CHKERRQ(VecPlaceArray(dummy_vec,marr+i*lda_rhs));
4036             CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE));
4037             CHKERRQ(VecResetArray(dummy_vec));
4038           }
4039         } else {
4040           for (i=0;i<n_constraints;i++) {
4041             CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs));
4042             CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE));
4043             CHKERRQ(VecResetArray(pcbddc->vec1_R));
4044           }
4045         }
4046         CHKERRQ(MatDenseRestoreArray(local_auxmat2_R,&marr));
4047       }
4048     } else {
4049       PetscScalar *marr;
4050 
4051       CHKERRQ(MatDenseGetArray(local_auxmat2_R,&marr));
4052       for (i=0;i<n_constraints;i++) {
4053         CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs));
4054         CHKERRQ(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs));
4055         CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R));
4056         CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R));
4057         CHKERRQ(VecResetArray(pcbddc->vec1_R));
4058         CHKERRQ(VecResetArray(pcbddc->vec2_R));
4059       }
4060       CHKERRQ(MatDenseRestoreArray(local_auxmat2_R,&marr));
4061     }
4062     if (sparserhs) {
4063       CHKERRQ(MatScale(C_CR,-1.0));
4064     }
4065     CHKERRQ(MatDestroy(&Brhs));
4066     if (!pcbddc->switch_static) {
4067       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2));
4068       for (i=0;i<n_constraints;i++) {
4069         Vec r, b;
4070         CHKERRQ(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r));
4071         CHKERRQ(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b));
4072         CHKERRQ(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD));
4073         CHKERRQ(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD));
4074         CHKERRQ(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b));
4075         CHKERRQ(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r));
4076       }
4077       CHKERRQ(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3));
4078     } else {
4079       if (lda_rhs != n_R) {
4080         IS dummy;
4081 
4082         CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy));
4083         CHKERRQ(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2));
4084         CHKERRQ(ISDestroy(&dummy));
4085       } else {
4086         CHKERRQ(PetscObjectReference((PetscObject)local_auxmat2_R));
4087         pcbddc->local_auxmat2 = local_auxmat2_R;
4088       }
4089       CHKERRQ(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3));
4090     }
4091     CHKERRQ(ISDestroy(&is_aux));
4092     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4093     CHKERRQ(MatScale(M3,m_one));
4094     if (isCHOL) {
4095       CHKERRQ(MatCholeskyFactor(M3,NULL,NULL));
4096     } else {
4097       CHKERRQ(MatLUFactor(M3,NULL,NULL,NULL));
4098     }
4099     CHKERRQ(MatSeqDenseInvertFactors_Private(M3));
4100     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4101     CHKERRQ(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1));
4102     CHKERRQ(MatDestroy(&C_B));
4103     CHKERRQ(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4104     CHKERRQ(MatDestroy(&M3));
4105   }
4106 
4107   /* Get submatrices from subdomain matrix */
4108   if (n_vertices) {
4109 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4110     PetscBool oldpin;
4111 #endif
4112     PetscBool isaij;
4113     IS        is_aux;
4114 
4115     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4116       IS tis;
4117 
4118       CHKERRQ(ISDuplicate(pcbddc->is_R_local,&tis));
4119       CHKERRQ(ISSort(tis));
4120       CHKERRQ(ISComplement(tis,0,pcis->n,&is_aux));
4121       CHKERRQ(ISDestroy(&tis));
4122     } else {
4123       CHKERRQ(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux));
4124     }
4125 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4126     oldpin = pcbddc->local_mat->boundtocpu;
4127 #endif
4128     CHKERRQ(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE));
4129     CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV));
4130     CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR));
4131     CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij));
4132     if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4133       CHKERRQ(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR));
4134     }
4135     CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV));
4136 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4137     CHKERRQ(MatBindToCPU(pcbddc->local_mat,oldpin));
4138 #endif
4139     CHKERRQ(ISDestroy(&is_aux));
4140   }
4141 
4142   /* Matrix of coarse basis functions (local) */
4143   if (pcbddc->coarse_phi_B) {
4144     PetscInt on_B,on_primal,on_D=n_D;
4145     if (pcbddc->coarse_phi_D) {
4146       CHKERRQ(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL));
4147     }
4148     CHKERRQ(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal));
4149     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4150       PetscScalar *marray;
4151 
4152       CHKERRQ(MatDenseGetArray(pcbddc->coarse_phi_B,&marray));
4153       CHKERRQ(PetscFree(marray));
4154       CHKERRQ(MatDestroy(&pcbddc->coarse_phi_B));
4155       CHKERRQ(MatDestroy(&pcbddc->coarse_psi_B));
4156       CHKERRQ(MatDestroy(&pcbddc->coarse_phi_D));
4157       CHKERRQ(MatDestroy(&pcbddc->coarse_psi_D));
4158     }
4159   }
4160 
4161   if (!pcbddc->coarse_phi_B) {
4162     PetscScalar *marr;
4163 
4164     /* memory size */
4165     n = n_B*pcbddc->local_primal_size;
4166     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4167     if (!pcbddc->symmetric_primal) n *= 2;
4168     CHKERRQ(PetscCalloc1(n,&marr));
4169     CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B));
4170     marr += n_B*pcbddc->local_primal_size;
4171     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4172       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D));
4173       marr += n_D*pcbddc->local_primal_size;
4174     }
4175     if (!pcbddc->symmetric_primal) {
4176       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B));
4177       marr += n_B*pcbddc->local_primal_size;
4178       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4179         CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D));
4180       }
4181     } else {
4182       CHKERRQ(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
4183       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4184       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4185         CHKERRQ(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
4186         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4187       }
4188     }
4189   }
4190 
4191   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4192   p0_lidx_I = NULL;
4193   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4194     const PetscInt *idxs;
4195 
4196     CHKERRQ(ISGetIndices(pcis->is_I_local,&idxs));
4197     CHKERRQ(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I));
4198     for (i=0;i<pcbddc->benign_n;i++) {
4199       CHKERRQ(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]));
4200     }
4201     CHKERRQ(ISRestoreIndices(pcis->is_I_local,&idxs));
4202   }
4203 
4204   /* vertices */
4205   if (n_vertices) {
4206     PetscBool restoreavr = PETSC_FALSE;
4207 
4208     CHKERRQ(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV));
4209 
4210     if (n_R) {
4211       Mat               A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4212       PetscBLASInt      B_N,B_one = 1;
4213       const PetscScalar *x;
4214       PetscScalar       *y;
4215 
4216       CHKERRQ(MatScale(A_RV,m_one));
4217       if (need_benign_correction) {
4218         ISLocalToGlobalMapping RtoN;
4219         IS                     is_p0;
4220         PetscInt               *idxs_p0,n;
4221 
4222         CHKERRQ(PetscMalloc1(pcbddc->benign_n,&idxs_p0));
4223         CHKERRQ(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN));
4224         CHKERRQ(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0));
4225         PetscCheckFalse(n != pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4226         CHKERRQ(ISLocalToGlobalMappingDestroy(&RtoN));
4227         CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0));
4228         CHKERRQ(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr));
4229         CHKERRQ(ISDestroy(&is_p0));
4230       }
4231 
4232       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV));
4233       if (!sparserhs || need_benign_correction) {
4234         if (lda_rhs == n_R) {
4235           CHKERRQ(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV));
4236         } else {
4237           PetscScalar    *av,*array;
4238           const PetscInt *xadj,*adjncy;
4239           PetscInt       n;
4240           PetscBool      flg_row;
4241 
4242           array = work+lda_rhs*n_vertices;
4243           CHKERRQ(PetscArrayzero(array,lda_rhs*n_vertices));
4244           CHKERRQ(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV));
4245           CHKERRQ(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row));
4246           CHKERRQ(MatSeqAIJGetArray(A_RV,&av));
4247           for (i=0;i<n;i++) {
4248             PetscInt j;
4249             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4250           }
4251           CHKERRQ(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row));
4252           CHKERRQ(MatDestroy(&A_RV));
4253           CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV));
4254         }
4255         if (need_benign_correction) {
4256           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4257           PetscScalar        *marr;
4258 
4259           CHKERRQ(MatDenseGetArray(A_RV,&marr));
4260           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4261 
4262                  | 0 0  0 | (V)
4263              L = | 0 0 -1 | (P-p0)
4264                  | 0 0 -1 | (p0)
4265 
4266           */
4267           for (i=0;i<reuse_solver->benign_n;i++) {
4268             const PetscScalar *vals;
4269             const PetscInt    *idxs,*idxs_zero;
4270             PetscInt          n,j,nz;
4271 
4272             CHKERRQ(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz));
4273             CHKERRQ(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero));
4274             CHKERRQ(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals));
4275             for (j=0;j<n;j++) {
4276               PetscScalar val = vals[j];
4277               PetscInt    k,col = idxs[j];
4278               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4279             }
4280             CHKERRQ(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals));
4281             CHKERRQ(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero));
4282           }
4283           CHKERRQ(MatDenseRestoreArray(A_RV,&marr));
4284         }
4285         CHKERRQ(PetscObjectReference((PetscObject)A_RV));
4286         Brhs = A_RV;
4287       } else {
4288         Mat tA_RVT,A_RVT;
4289 
4290         if (!pcbddc->symmetric_primal) {
4291           /* A_RV already scaled by -1 */
4292           CHKERRQ(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT));
4293         } else {
4294           restoreavr = PETSC_TRUE;
4295           CHKERRQ(MatScale(A_VR,-1.0));
4296           CHKERRQ(PetscObjectReference((PetscObject)A_VR));
4297           A_RVT = A_VR;
4298         }
4299         if (lda_rhs != n_R) {
4300           PetscScalar *aa;
4301           PetscInt    r,*ii,*jj;
4302           PetscBool   done;
4303 
4304           CHKERRQ(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done));
4305           PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4306           CHKERRQ(MatSeqAIJGetArray(A_RVT,&aa));
4307           CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT));
4308           CHKERRQ(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done));
4309           PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4310         } else {
4311           CHKERRQ(PetscObjectReference((PetscObject)A_RVT));
4312           tA_RVT = A_RVT;
4313         }
4314         CHKERRQ(MatCreateTranspose(tA_RVT,&Brhs));
4315         CHKERRQ(MatDestroy(&tA_RVT));
4316         CHKERRQ(MatDestroy(&A_RVT));
4317       }
4318       if (F) {
4319         /* need to correct the rhs */
4320         if (need_benign_correction) {
4321           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4322           PetscScalar        *marr;
4323 
4324           CHKERRQ(MatDenseGetArray(Brhs,&marr));
4325           if (lda_rhs != n_R) {
4326             for (i=0;i<n_vertices;i++) {
4327               CHKERRQ(VecPlaceArray(dummy_vec,marr+i*lda_rhs));
4328               CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE));
4329               CHKERRQ(VecResetArray(dummy_vec));
4330             }
4331           } else {
4332             for (i=0;i<n_vertices;i++) {
4333               CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs));
4334               CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE));
4335               CHKERRQ(VecResetArray(pcbddc->vec1_R));
4336             }
4337           }
4338           CHKERRQ(MatDenseRestoreArray(Brhs,&marr));
4339         }
4340         CHKERRQ(MatMatSolve(F,Brhs,A_RRmA_RV));
4341         if (restoreavr) {
4342           CHKERRQ(MatScale(A_VR,-1.0));
4343         }
4344         /* need to correct the solution */
4345         if (need_benign_correction) {
4346           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4347           PetscScalar        *marr;
4348 
4349           CHKERRQ(MatDenseGetArray(A_RRmA_RV,&marr));
4350           if (lda_rhs != n_R) {
4351             for (i=0;i<n_vertices;i++) {
4352               CHKERRQ(VecPlaceArray(dummy_vec,marr+i*lda_rhs));
4353               CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE));
4354               CHKERRQ(VecResetArray(dummy_vec));
4355             }
4356           } else {
4357             for (i=0;i<n_vertices;i++) {
4358               CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs));
4359               CHKERRQ(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE));
4360               CHKERRQ(VecResetArray(pcbddc->vec1_R));
4361             }
4362           }
4363           CHKERRQ(MatDenseRestoreArray(A_RRmA_RV,&marr));
4364         }
4365       } else {
4366         CHKERRQ(MatDenseGetArray(Brhs,&y));
4367         for (i=0;i<n_vertices;i++) {
4368           CHKERRQ(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs));
4369           CHKERRQ(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs));
4370           CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R));
4371           CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R));
4372           CHKERRQ(VecResetArray(pcbddc->vec1_R));
4373           CHKERRQ(VecResetArray(pcbddc->vec2_R));
4374         }
4375         CHKERRQ(MatDenseRestoreArray(Brhs,&y));
4376       }
4377       CHKERRQ(MatDestroy(&A_RV));
4378       CHKERRQ(MatDestroy(&Brhs));
4379       /* S_VV and S_CV */
4380       if (n_constraints) {
4381         Mat B;
4382 
4383         CHKERRQ(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices));
4384         for (i=0;i<n_vertices;i++) {
4385           CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs));
4386           CHKERRQ(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B));
4387           CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
4388           CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
4389           CHKERRQ(VecResetArray(pcis->vec1_B));
4390           CHKERRQ(VecResetArray(pcbddc->vec1_R));
4391         }
4392         CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B));
4393         /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4394         CHKERRQ(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV));
4395         CHKERRQ(MatProductSetType(S_CV,MATPRODUCT_AB));
4396         CHKERRQ(MatProductSetFromOptions(S_CV));
4397         CHKERRQ(MatProductSymbolic(S_CV));
4398         CHKERRQ(MatProductNumeric(S_CV));
4399         CHKERRQ(MatProductClear(S_CV));
4400 
4401         CHKERRQ(MatDestroy(&B));
4402         CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B));
4403         /* Reuse B = local_auxmat2_R * S_CV */
4404         CHKERRQ(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B));
4405         CHKERRQ(MatProductSetType(B,MATPRODUCT_AB));
4406         CHKERRQ(MatProductSetFromOptions(B));
4407         CHKERRQ(MatProductSymbolic(B));
4408         CHKERRQ(MatProductNumeric(B));
4409 
4410         CHKERRQ(MatScale(S_CV,m_one));
4411         CHKERRQ(PetscBLASIntCast(lda_rhs*n_vertices,&B_N));
4412         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4413         CHKERRQ(MatDestroy(&B));
4414       }
4415       if (lda_rhs != n_R) {
4416         CHKERRQ(MatDestroy(&A_RRmA_RV));
4417         CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV));
4418         CHKERRQ(MatDenseSetLDA(A_RRmA_RV,lda_rhs));
4419       }
4420       CHKERRQ(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt));
4421       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4422       if (need_benign_correction) {
4423         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4424         PetscScalar        *marr,*sums;
4425 
4426         CHKERRQ(PetscMalloc1(n_vertices,&sums));
4427         CHKERRQ(MatDenseGetArray(S_VVt,&marr));
4428         for (i=0;i<reuse_solver->benign_n;i++) {
4429           const PetscScalar *vals;
4430           const PetscInt    *idxs,*idxs_zero;
4431           PetscInt          n,j,nz;
4432 
4433           CHKERRQ(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz));
4434           CHKERRQ(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero));
4435           for (j=0;j<n_vertices;j++) {
4436             PetscInt k;
4437             sums[j] = 0.;
4438             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4439           }
4440           CHKERRQ(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals));
4441           for (j=0;j<n;j++) {
4442             PetscScalar val = vals[j];
4443             PetscInt k;
4444             for (k=0;k<n_vertices;k++) {
4445               marr[idxs[j]+k*n_vertices] += val*sums[k];
4446             }
4447           }
4448           CHKERRQ(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals));
4449           CHKERRQ(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero));
4450         }
4451         CHKERRQ(PetscFree(sums));
4452         CHKERRQ(MatDenseRestoreArray(S_VVt,&marr));
4453         CHKERRQ(MatDestroy(&A_RV_bcorr));
4454       }
4455       CHKERRQ(MatDestroy(&A_RRmA_RV));
4456       CHKERRQ(PetscBLASIntCast(n_vertices*n_vertices,&B_N));
4457       CHKERRQ(MatDenseGetArrayRead(A_VV,&x));
4458       CHKERRQ(MatDenseGetArray(S_VVt,&y));
4459       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4460       CHKERRQ(MatDenseRestoreArrayRead(A_VV,&x));
4461       CHKERRQ(MatDenseRestoreArray(S_VVt,&y));
4462       CHKERRQ(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN));
4463       CHKERRQ(MatDestroy(&S_VVt));
4464     } else {
4465       CHKERRQ(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN));
4466     }
4467     CHKERRQ(MatDestroy(&A_VV));
4468 
4469     /* coarse basis functions */
4470     for (i=0;i<n_vertices;i++) {
4471       Vec         v;
4472       PetscScalar one = 1.0,zero = 0.0;
4473 
4474       CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i));
4475       CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v));
4476       CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4477       CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4478       if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4479         PetscMPIInt rank;
4480         CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank));
4481         PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix");
4482       }
4483       CHKERRQ(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES));
4484       CHKERRQ(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4485       CHKERRQ(VecAssemblyEnd(v));
4486       CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v));
4487 
4488       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4489         PetscInt j;
4490 
4491         CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v));
4492         CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4493         CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4494         if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4495           PetscMPIInt rank;
4496           CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank));
4497           PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix");
4498         }
4499         for (j=0;j<pcbddc->benign_n;j++) CHKERRQ(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES));
4500         CHKERRQ(VecAssemblyBegin(v));
4501         CHKERRQ(VecAssemblyEnd(v));
4502         CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v));
4503       }
4504       CHKERRQ(VecResetArray(pcbddc->vec1_R));
4505     }
4506     /* if n_R == 0 the object is not destroyed */
4507     CHKERRQ(MatDestroy(&A_RV));
4508   }
4509   CHKERRQ(VecDestroy(&dummy_vec));
4510 
4511   if (n_constraints) {
4512     Mat B;
4513 
4514     CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B));
4515     CHKERRQ(MatScale(S_CC,m_one));
4516     CHKERRQ(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B));
4517     CHKERRQ(MatProductSetType(B,MATPRODUCT_AB));
4518     CHKERRQ(MatProductSetFromOptions(B));
4519     CHKERRQ(MatProductSymbolic(B));
4520     CHKERRQ(MatProductNumeric(B));
4521 
4522     CHKERRQ(MatScale(S_CC,m_one));
4523     if (n_vertices) {
4524       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4525         CHKERRQ(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC));
4526       } else {
4527         Mat S_VCt;
4528 
4529         if (lda_rhs != n_R) {
4530           CHKERRQ(MatDestroy(&B));
4531           CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B));
4532           CHKERRQ(MatDenseSetLDA(B,lda_rhs));
4533         }
4534         CHKERRQ(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt));
4535         CHKERRQ(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN));
4536         CHKERRQ(MatDestroy(&S_VCt));
4537       }
4538     }
4539     CHKERRQ(MatDestroy(&B));
4540     /* coarse basis functions */
4541     for (i=0;i<n_constraints;i++) {
4542       Vec v;
4543 
4544       CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i));
4545       CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v));
4546       CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4547       CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4548       CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v));
4549       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4550         PetscInt    j;
4551         PetscScalar zero = 0.0;
4552         CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v));
4553         CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4554         CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4555         for (j=0;j<pcbddc->benign_n;j++) CHKERRQ(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES));
4556         CHKERRQ(VecAssemblyBegin(v));
4557         CHKERRQ(VecAssemblyEnd(v));
4558         CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v));
4559       }
4560       CHKERRQ(VecResetArray(pcbddc->vec1_R));
4561     }
4562   }
4563   if (n_constraints) {
4564     CHKERRQ(MatDestroy(&local_auxmat2_R));
4565   }
4566   CHKERRQ(PetscFree(p0_lidx_I));
4567 
4568   /* coarse matrix entries relative to B_0 */
4569   if (pcbddc->benign_n) {
4570     Mat               B0_B,B0_BPHI;
4571     IS                is_dummy;
4572     const PetscScalar *data;
4573     PetscInt          j;
4574 
4575     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy));
4576     CHKERRQ(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B));
4577     CHKERRQ(ISDestroy(&is_dummy));
4578     CHKERRQ(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI));
4579     CHKERRQ(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI));
4580     CHKERRQ(MatDenseGetArrayRead(B0_BPHI,&data));
4581     for (j=0;j<pcbddc->benign_n;j++) {
4582       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4583       for (i=0;i<pcbddc->local_primal_size;i++) {
4584         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4585         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4586       }
4587     }
4588     CHKERRQ(MatDenseRestoreArrayRead(B0_BPHI,&data));
4589     CHKERRQ(MatDestroy(&B0_B));
4590     CHKERRQ(MatDestroy(&B0_BPHI));
4591   }
4592 
4593   /* compute other basis functions for non-symmetric problems */
4594   if (!pcbddc->symmetric_primal) {
4595     Mat         B_V=NULL,B_C=NULL;
4596     PetscScalar *marray;
4597 
4598     if (n_constraints) {
4599       Mat S_CCT,C_CRT;
4600 
4601       CHKERRQ(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT));
4602       CHKERRQ(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT));
4603       CHKERRQ(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C));
4604       CHKERRQ(MatDestroy(&S_CCT));
4605       if (n_vertices) {
4606         Mat S_VCT;
4607 
4608         CHKERRQ(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT));
4609         CHKERRQ(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V));
4610         CHKERRQ(MatDestroy(&S_VCT));
4611       }
4612       CHKERRQ(MatDestroy(&C_CRT));
4613     } else {
4614       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V));
4615     }
4616     if (n_vertices && n_R) {
4617       PetscScalar    *av,*marray;
4618       const PetscInt *xadj,*adjncy;
4619       PetscInt       n;
4620       PetscBool      flg_row;
4621 
4622       /* B_V = B_V - A_VR^T */
4623       CHKERRQ(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR));
4624       CHKERRQ(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row));
4625       CHKERRQ(MatSeqAIJGetArray(A_VR,&av));
4626       CHKERRQ(MatDenseGetArray(B_V,&marray));
4627       for (i=0;i<n;i++) {
4628         PetscInt j;
4629         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4630       }
4631       CHKERRQ(MatDenseRestoreArray(B_V,&marray));
4632       CHKERRQ(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row));
4633       CHKERRQ(MatDestroy(&A_VR));
4634     }
4635 
4636     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4637     if (n_vertices) {
4638       CHKERRQ(MatDenseGetArray(B_V,&marray));
4639       for (i=0;i<n_vertices;i++) {
4640         CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R));
4641         CHKERRQ(VecPlaceArray(pcbddc->vec2_R,work+i*n_R));
4642         CHKERRQ(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R));
4643         CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R));
4644         CHKERRQ(VecResetArray(pcbddc->vec1_R));
4645         CHKERRQ(VecResetArray(pcbddc->vec2_R));
4646       }
4647       CHKERRQ(MatDenseRestoreArray(B_V,&marray));
4648     }
4649     if (B_C) {
4650       CHKERRQ(MatDenseGetArray(B_C,&marray));
4651       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4652         CHKERRQ(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R));
4653         CHKERRQ(VecPlaceArray(pcbddc->vec2_R,work+i*n_R));
4654         CHKERRQ(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R));
4655         CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R));
4656         CHKERRQ(VecResetArray(pcbddc->vec1_R));
4657         CHKERRQ(VecResetArray(pcbddc->vec2_R));
4658       }
4659       CHKERRQ(MatDenseRestoreArray(B_C,&marray));
4660     }
4661     /* coarse basis functions */
4662     for (i=0;i<pcbddc->local_primal_size;i++) {
4663       Vec  v;
4664 
4665       CHKERRQ(VecPlaceArray(pcbddc->vec1_R,work+i*n_R));
4666       CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v));
4667       CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4668       CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4669       if (i<n_vertices) {
4670         PetscScalar one = 1.0;
4671         CHKERRQ(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES));
4672         CHKERRQ(VecAssemblyBegin(v));
4673         CHKERRQ(VecAssemblyEnd(v));
4674       }
4675       CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v));
4676 
4677       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4678         CHKERRQ(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v));
4679         CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4680         CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD));
4681         CHKERRQ(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v));
4682       }
4683       CHKERRQ(VecResetArray(pcbddc->vec1_R));
4684     }
4685     CHKERRQ(MatDestroy(&B_V));
4686     CHKERRQ(MatDestroy(&B_C));
4687   }
4688 
4689   /* free memory */
4690   CHKERRQ(PetscFree(idx_V_B));
4691   CHKERRQ(MatDestroy(&S_VV));
4692   CHKERRQ(MatDestroy(&S_CV));
4693   CHKERRQ(MatDestroy(&S_VC));
4694   CHKERRQ(MatDestroy(&S_CC));
4695   CHKERRQ(PetscFree(work));
4696   if (n_vertices) {
4697     CHKERRQ(MatDestroy(&A_VR));
4698   }
4699   if (n_constraints) {
4700     CHKERRQ(MatDestroy(&C_CR));
4701   }
4702   CHKERRQ(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0));
4703 
4704   /* Checking coarse_sub_mat and coarse basis functios */
4705   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4706   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4707   if (pcbddc->dbg_flag) {
4708     Mat         coarse_sub_mat;
4709     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4710     Mat         coarse_phi_D,coarse_phi_B;
4711     Mat         coarse_psi_D,coarse_psi_B;
4712     Mat         A_II,A_BB,A_IB,A_BI;
4713     Mat         C_B,CPHI;
4714     IS          is_dummy;
4715     Vec         mones;
4716     MatType     checkmattype=MATSEQAIJ;
4717     PetscReal   real_value;
4718 
4719     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4720       Mat A;
4721       CHKERRQ(PCBDDCBenignProject(pc,NULL,NULL,&A));
4722       CHKERRQ(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II));
4723       CHKERRQ(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB));
4724       CHKERRQ(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI));
4725       CHKERRQ(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB));
4726       CHKERRQ(MatDestroy(&A));
4727     } else {
4728       CHKERRQ(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II));
4729       CHKERRQ(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB));
4730       CHKERRQ(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI));
4731       CHKERRQ(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB));
4732     }
4733     CHKERRQ(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D));
4734     CHKERRQ(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B));
4735     if (!pcbddc->symmetric_primal) {
4736       CHKERRQ(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D));
4737       CHKERRQ(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B));
4738     }
4739     CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat));
4740 
4741     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n"));
4742     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal));
4743     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
4744     if (!pcbddc->symmetric_primal) {
4745       CHKERRQ(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT));
4746       CHKERRQ(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1));
4747       CHKERRQ(MatDestroy(&AUXMAT));
4748       CHKERRQ(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT));
4749       CHKERRQ(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2));
4750       CHKERRQ(MatDestroy(&AUXMAT));
4751       CHKERRQ(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT));
4752       CHKERRQ(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3));
4753       CHKERRQ(MatDestroy(&AUXMAT));
4754       CHKERRQ(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT));
4755       CHKERRQ(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4));
4756       CHKERRQ(MatDestroy(&AUXMAT));
4757     } else {
4758       CHKERRQ(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1));
4759       CHKERRQ(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2));
4760       CHKERRQ(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT));
4761       CHKERRQ(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3));
4762       CHKERRQ(MatDestroy(&AUXMAT));
4763       CHKERRQ(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT));
4764       CHKERRQ(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4));
4765       CHKERRQ(MatDestroy(&AUXMAT));
4766     }
4767     CHKERRQ(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN));
4768     CHKERRQ(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN));
4769     CHKERRQ(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN));
4770     CHKERRQ(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1));
4771     if (pcbddc->benign_n) {
4772       Mat               B0_B,B0_BPHI;
4773       const PetscScalar *data2;
4774       PetscScalar       *data;
4775       PetscInt          j;
4776 
4777       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy));
4778       CHKERRQ(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B));
4779       CHKERRQ(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI));
4780       CHKERRQ(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI));
4781       CHKERRQ(MatDenseGetArray(TM1,&data));
4782       CHKERRQ(MatDenseGetArrayRead(B0_BPHI,&data2));
4783       for (j=0;j<pcbddc->benign_n;j++) {
4784         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4785         for (i=0;i<pcbddc->local_primal_size;i++) {
4786           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4787           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4788         }
4789       }
4790       CHKERRQ(MatDenseRestoreArray(TM1,&data));
4791       CHKERRQ(MatDenseRestoreArrayRead(B0_BPHI,&data2));
4792       CHKERRQ(MatDestroy(&B0_B));
4793       CHKERRQ(ISDestroy(&is_dummy));
4794       CHKERRQ(MatDestroy(&B0_BPHI));
4795     }
4796 #if 0
4797   {
4798     PetscViewer viewer;
4799     char filename[256];
4800     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4801     CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
4802     CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
4803     CHKERRQ(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed"));
4804     CHKERRQ(MatView(coarse_sub_mat,viewer));
4805     CHKERRQ(PetscObjectSetName((PetscObject)TM1,"projected"));
4806     CHKERRQ(MatView(TM1,viewer));
4807     if (pcbddc->coarse_phi_B) {
4808       CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
4809       CHKERRQ(MatView(pcbddc->coarse_phi_B,viewer));
4810     }
4811     if (pcbddc->coarse_phi_D) {
4812       CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
4813       CHKERRQ(MatView(pcbddc->coarse_phi_D,viewer));
4814     }
4815     if (pcbddc->coarse_psi_B) {
4816       CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
4817       CHKERRQ(MatView(pcbddc->coarse_psi_B,viewer));
4818     }
4819     if (pcbddc->coarse_psi_D) {
4820       CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
4821       CHKERRQ(MatView(pcbddc->coarse_psi_D,viewer));
4822     }
4823     CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
4824     CHKERRQ(MatView(pcbddc->local_mat,viewer));
4825     CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
4826     CHKERRQ(MatView(pcbddc->ConstraintMatrix,viewer));
4827     CHKERRQ(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
4828     CHKERRQ(ISView(pcis->is_I_local,viewer));
4829     CHKERRQ(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
4830     CHKERRQ(ISView(pcis->is_B_local,viewer));
4831     CHKERRQ(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
4832     CHKERRQ(ISView(pcbddc->is_R_local,viewer));
4833     CHKERRQ(PetscViewerDestroy(&viewer));
4834   }
4835 #endif
4836     CHKERRQ(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN));
4837     CHKERRQ(MatNorm(TM1,NORM_FROBENIUS,&real_value));
4838     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
4839     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value));
4840 
4841     /* check constraints */
4842     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy));
4843     CHKERRQ(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B));
4844     if (!pcbddc->benign_n) { /* TODO: add benign case */
4845       CHKERRQ(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI));
4846     } else {
4847       PetscScalar *data;
4848       Mat         tmat;
4849       CHKERRQ(MatDenseGetArray(pcbddc->coarse_phi_B,&data));
4850       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat));
4851       CHKERRQ(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data));
4852       CHKERRQ(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI));
4853       CHKERRQ(MatDestroy(&tmat));
4854     }
4855     CHKERRQ(MatCreateVecs(CPHI,&mones,NULL));
4856     CHKERRQ(VecSet(mones,-1.0));
4857     CHKERRQ(MatDiagonalSet(CPHI,mones,ADD_VALUES));
4858     CHKERRQ(MatNorm(CPHI,NORM_FROBENIUS,&real_value));
4859     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value));
4860     if (!pcbddc->symmetric_primal) {
4861       CHKERRQ(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI));
4862       CHKERRQ(VecSet(mones,-1.0));
4863       CHKERRQ(MatDiagonalSet(CPHI,mones,ADD_VALUES));
4864       CHKERRQ(MatNorm(CPHI,NORM_FROBENIUS,&real_value));
4865       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value));
4866     }
4867     CHKERRQ(MatDestroy(&C_B));
4868     CHKERRQ(MatDestroy(&CPHI));
4869     CHKERRQ(ISDestroy(&is_dummy));
4870     CHKERRQ(VecDestroy(&mones));
4871     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
4872     CHKERRQ(MatDestroy(&A_II));
4873     CHKERRQ(MatDestroy(&A_BB));
4874     CHKERRQ(MatDestroy(&A_IB));
4875     CHKERRQ(MatDestroy(&A_BI));
4876     CHKERRQ(MatDestroy(&TM1));
4877     CHKERRQ(MatDestroy(&TM2));
4878     CHKERRQ(MatDestroy(&TM3));
4879     CHKERRQ(MatDestroy(&TM4));
4880     CHKERRQ(MatDestroy(&coarse_phi_D));
4881     CHKERRQ(MatDestroy(&coarse_phi_B));
4882     if (!pcbddc->symmetric_primal) {
4883       CHKERRQ(MatDestroy(&coarse_psi_D));
4884       CHKERRQ(MatDestroy(&coarse_psi_B));
4885     }
4886     CHKERRQ(MatDestroy(&coarse_sub_mat));
4887   }
4888   /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4889   {
4890     PetscBool gpu;
4891 
4892     CHKERRQ(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu));
4893     if (gpu) {
4894       if (pcbddc->local_auxmat1) {
4895         CHKERRQ(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1));
4896       }
4897       if (pcbddc->local_auxmat2) {
4898         CHKERRQ(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2));
4899       }
4900       if (pcbddc->coarse_phi_B) {
4901         CHKERRQ(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B));
4902       }
4903       if (pcbddc->coarse_phi_D) {
4904         CHKERRQ(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D));
4905       }
4906       if (pcbddc->coarse_psi_B) {
4907         CHKERRQ(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B));
4908       }
4909       if (pcbddc->coarse_psi_D) {
4910         CHKERRQ(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D));
4911       }
4912     }
4913   }
4914   /* get back data */
4915   *coarse_submat_vals_n = coarse_submat_vals;
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4920 {
4921   Mat            *work_mat;
4922   IS             isrow_s,iscol_s;
4923   PetscBool      rsorted,csorted;
4924   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4925 
4926   PetscFunctionBegin;
4927   CHKERRQ(ISSorted(isrow,&rsorted));
4928   CHKERRQ(ISSorted(iscol,&csorted));
4929   CHKERRQ(ISGetLocalSize(isrow,&rsize));
4930   CHKERRQ(ISGetLocalSize(iscol,&csize));
4931 
4932   if (!rsorted) {
4933     const PetscInt *idxs;
4934     PetscInt *idxs_sorted,i;
4935 
4936     CHKERRQ(PetscMalloc1(rsize,&idxs_perm_r));
4937     CHKERRQ(PetscMalloc1(rsize,&idxs_sorted));
4938     for (i=0;i<rsize;i++) {
4939       idxs_perm_r[i] = i;
4940     }
4941     CHKERRQ(ISGetIndices(isrow,&idxs));
4942     CHKERRQ(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r));
4943     for (i=0;i<rsize;i++) {
4944       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4945     }
4946     CHKERRQ(ISRestoreIndices(isrow,&idxs));
4947     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s));
4948   } else {
4949     CHKERRQ(PetscObjectReference((PetscObject)isrow));
4950     isrow_s = isrow;
4951   }
4952 
4953   if (!csorted) {
4954     if (isrow == iscol) {
4955       CHKERRQ(PetscObjectReference((PetscObject)isrow_s));
4956       iscol_s = isrow_s;
4957     } else {
4958       const PetscInt *idxs;
4959       PetscInt       *idxs_sorted,i;
4960 
4961       CHKERRQ(PetscMalloc1(csize,&idxs_perm_c));
4962       CHKERRQ(PetscMalloc1(csize,&idxs_sorted));
4963       for (i=0;i<csize;i++) {
4964         idxs_perm_c[i] = i;
4965       }
4966       CHKERRQ(ISGetIndices(iscol,&idxs));
4967       CHKERRQ(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c));
4968       for (i=0;i<csize;i++) {
4969         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4970       }
4971       CHKERRQ(ISRestoreIndices(iscol,&idxs));
4972       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s));
4973     }
4974   } else {
4975     CHKERRQ(PetscObjectReference((PetscObject)iscol));
4976     iscol_s = iscol;
4977   }
4978 
4979   CHKERRQ(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat));
4980 
4981   if (!rsorted || !csorted) {
4982     Mat      new_mat;
4983     IS       is_perm_r,is_perm_c;
4984 
4985     if (!rsorted) {
4986       PetscInt *idxs_r,i;
4987       CHKERRQ(PetscMalloc1(rsize,&idxs_r));
4988       for (i=0;i<rsize;i++) {
4989         idxs_r[idxs_perm_r[i]] = i;
4990       }
4991       CHKERRQ(PetscFree(idxs_perm_r));
4992       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r));
4993     } else {
4994       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r));
4995     }
4996     CHKERRQ(ISSetPermutation(is_perm_r));
4997 
4998     if (!csorted) {
4999       if (isrow_s == iscol_s) {
5000         CHKERRQ(PetscObjectReference((PetscObject)is_perm_r));
5001         is_perm_c = is_perm_r;
5002       } else {
5003         PetscInt *idxs_c,i;
5004         PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
5005         CHKERRQ(PetscMalloc1(csize,&idxs_c));
5006         for (i=0;i<csize;i++) {
5007           idxs_c[idxs_perm_c[i]] = i;
5008         }
5009         CHKERRQ(PetscFree(idxs_perm_c));
5010         CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c));
5011       }
5012     } else {
5013       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c));
5014     }
5015     CHKERRQ(ISSetPermutation(is_perm_c));
5016 
5017     CHKERRQ(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat));
5018     CHKERRQ(MatDestroy(&work_mat[0]));
5019     work_mat[0] = new_mat;
5020     CHKERRQ(ISDestroy(&is_perm_r));
5021     CHKERRQ(ISDestroy(&is_perm_c));
5022   }
5023 
5024   CHKERRQ(PetscObjectReference((PetscObject)work_mat[0]));
5025   *B = work_mat[0];
5026   CHKERRQ(MatDestroyMatrices(1,&work_mat));
5027   CHKERRQ(ISDestroy(&isrow_s));
5028   CHKERRQ(ISDestroy(&iscol_s));
5029   PetscFunctionReturn(0);
5030 }
5031 
5032 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5033 {
5034   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5035   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5036   Mat            new_mat,lA;
5037   IS             is_local,is_global;
5038   PetscInt       local_size;
5039   PetscBool      isseqaij;
5040 
5041   PetscFunctionBegin;
5042   CHKERRQ(MatDestroy(&pcbddc->local_mat));
5043   CHKERRQ(MatGetSize(matis->A,&local_size,NULL));
5044   CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local));
5045   CHKERRQ(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global));
5046   CHKERRQ(ISDestroy(&is_local));
5047   CHKERRQ(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat));
5048   CHKERRQ(ISDestroy(&is_global));
5049 
5050   if (pcbddc->dbg_flag) {
5051     Vec       x,x_change;
5052     PetscReal error;
5053 
5054     CHKERRQ(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change));
5055     CHKERRQ(VecSetRandom(x,NULL));
5056     CHKERRQ(MatMult(ChangeOfBasisMatrix,x,x_change));
5057     CHKERRQ(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD));
5058     CHKERRQ(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD));
5059     CHKERRQ(MatMult(new_mat,matis->x,matis->y));
5060     if (!pcbddc->change_interior) {
5061       const PetscScalar *x,*y,*v;
5062       PetscReal         lerror = 0.;
5063       PetscInt          i;
5064 
5065       CHKERRQ(VecGetArrayRead(matis->x,&x));
5066       CHKERRQ(VecGetArrayRead(matis->y,&y));
5067       CHKERRQ(VecGetArrayRead(matis->counter,&v));
5068       for (i=0;i<local_size;i++)
5069         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5070           lerror = PetscAbsScalar(x[i]-y[i]);
5071       CHKERRQ(VecRestoreArrayRead(matis->x,&x));
5072       CHKERRQ(VecRestoreArrayRead(matis->y,&y));
5073       CHKERRQ(VecRestoreArrayRead(matis->counter,&v));
5074       CHKERRMPI(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc)));
5075       if (error > PETSC_SMALL) {
5076         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5077           SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5078         } else {
5079           SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5080         }
5081       }
5082     }
5083     CHKERRQ(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE));
5084     CHKERRQ(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE));
5085     CHKERRQ(VecAXPY(x,-1.0,x_change));
5086     CHKERRQ(VecNorm(x,NORM_INFINITY,&error));
5087     if (error > PETSC_SMALL) {
5088       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5089         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5090       } else {
5091         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5092       }
5093     }
5094     CHKERRQ(VecDestroy(&x));
5095     CHKERRQ(VecDestroy(&x_change));
5096   }
5097 
5098   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5099   CHKERRQ(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA));
5100 
5101   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5102   CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij));
5103   if (isseqaij) {
5104     CHKERRQ(MatDestroy(&pcbddc->local_mat));
5105     CHKERRQ(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat));
5106     if (lA) {
5107       Mat work;
5108       CHKERRQ(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work));
5109       CHKERRQ(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work));
5110       CHKERRQ(MatDestroy(&work));
5111     }
5112   } else {
5113     Mat work_mat;
5114 
5115     CHKERRQ(MatDestroy(&pcbddc->local_mat));
5116     CHKERRQ(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat));
5117     CHKERRQ(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat));
5118     CHKERRQ(MatDestroy(&work_mat));
5119     if (lA) {
5120       Mat work;
5121       CHKERRQ(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat));
5122       CHKERRQ(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work));
5123       CHKERRQ(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work));
5124       CHKERRQ(MatDestroy(&work));
5125     }
5126   }
5127   if (matis->A->symmetric_set) {
5128     CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric));
5129 #if !defined(PETSC_USE_COMPLEX)
5130     CHKERRQ(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric));
5131 #endif
5132   }
5133   CHKERRQ(MatDestroy(&new_mat));
5134   PetscFunctionReturn(0);
5135 }
5136 
5137 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5138 {
5139   PC_IS*          pcis = (PC_IS*)(pc->data);
5140   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5141   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5142   PetscInt        *idx_R_local=NULL;
5143   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5144   PetscInt        vbs,bs;
5145   PetscBT         bitmask=NULL;
5146 
5147   PetscFunctionBegin;
5148   /*
5149     No need to setup local scatters if
5150       - primal space is unchanged
5151         AND
5152       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5153         AND
5154       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5155   */
5156   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5157     PetscFunctionReturn(0);
5158   }
5159   /* destroy old objects */
5160   CHKERRQ(ISDestroy(&pcbddc->is_R_local));
5161   CHKERRQ(VecScatterDestroy(&pcbddc->R_to_B));
5162   CHKERRQ(VecScatterDestroy(&pcbddc->R_to_D));
5163   /* Set Non-overlapping dimensions */
5164   n_B = pcis->n_B;
5165   n_D = pcis->n - n_B;
5166   n_vertices = pcbddc->n_vertices;
5167 
5168   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5169 
5170   /* create auxiliary bitmask and allocate workspace */
5171   if (!sub_schurs || !sub_schurs->reuse_solver) {
5172     CHKERRQ(PetscMalloc1(pcis->n-n_vertices,&idx_R_local));
5173     CHKERRQ(PetscBTCreate(pcis->n,&bitmask));
5174     for (i=0;i<n_vertices;i++) {
5175       CHKERRQ(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]));
5176     }
5177 
5178     for (i=0, n_R=0; i<pcis->n; i++) {
5179       if (!PetscBTLookup(bitmask,i)) {
5180         idx_R_local[n_R++] = i;
5181       }
5182     }
5183   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5184     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5185 
5186     CHKERRQ(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local));
5187     CHKERRQ(ISGetLocalSize(reuse_solver->is_R,&n_R));
5188   }
5189 
5190   /* Block code */
5191   vbs = 1;
5192   CHKERRQ(MatGetBlockSize(pcbddc->local_mat,&bs));
5193   if (bs>1 && !(n_vertices%bs)) {
5194     PetscBool is_blocked = PETSC_TRUE;
5195     PetscInt  *vary;
5196     if (!sub_schurs || !sub_schurs->reuse_solver) {
5197       CHKERRQ(PetscMalloc1(pcis->n/bs,&vary));
5198       CHKERRQ(PetscArrayzero(vary,pcis->n/bs));
5199       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5200       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5201       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5202       for (i=0; i<pcis->n/bs; i++) {
5203         if (vary[i]!=0 && vary[i]!=bs) {
5204           is_blocked = PETSC_FALSE;
5205           break;
5206         }
5207       }
5208       CHKERRQ(PetscFree(vary));
5209     } else {
5210       /* Verify directly the R set */
5211       for (i=0; i<n_R/bs; i++) {
5212         PetscInt j,node=idx_R_local[bs*i];
5213         for (j=1; j<bs; j++) {
5214           if (node != idx_R_local[bs*i+j]-j) {
5215             is_blocked = PETSC_FALSE;
5216             break;
5217           }
5218         }
5219       }
5220     }
5221     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5222       vbs = bs;
5223       for (i=0;i<n_R/vbs;i++) {
5224         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5225       }
5226     }
5227   }
5228   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local));
5229   if (sub_schurs && sub_schurs->reuse_solver) {
5230     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5231 
5232     CHKERRQ(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local));
5233     CHKERRQ(ISDestroy(&reuse_solver->is_R));
5234     CHKERRQ(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5235     reuse_solver->is_R = pcbddc->is_R_local;
5236   } else {
5237     CHKERRQ(PetscFree(idx_R_local));
5238   }
5239 
5240   /* print some info if requested */
5241   if (pcbddc->dbg_flag) {
5242     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n"));
5243     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
5244     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5245     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank));
5246     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B));
5247     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size));
5248     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
5249   }
5250 
5251   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5252   if (!sub_schurs || !sub_schurs->reuse_solver) {
5253     IS       is_aux1,is_aux2;
5254     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5255 
5256     CHKERRQ(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local));
5257     CHKERRQ(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1));
5258     CHKERRQ(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2));
5259     CHKERRQ(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices));
5260     for (i=0; i<n_D; i++) {
5261       CHKERRQ(PetscBTSet(bitmask,is_indices[i]));
5262     }
5263     CHKERRQ(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices));
5264     for (i=0, j=0; i<n_R; i++) {
5265       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5266         aux_array1[j++] = i;
5267       }
5268     }
5269     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1));
5270     CHKERRQ(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices));
5271     for (i=0, j=0; i<n_B; i++) {
5272       if (!PetscBTLookup(bitmask,is_indices[i])) {
5273         aux_array2[j++] = i;
5274       }
5275     }
5276     CHKERRQ(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices));
5277     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2));
5278     CHKERRQ(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B));
5279     CHKERRQ(ISDestroy(&is_aux1));
5280     CHKERRQ(ISDestroy(&is_aux2));
5281 
5282     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5283       CHKERRQ(PetscMalloc1(n_D,&aux_array1));
5284       for (i=0, j=0; i<n_R; i++) {
5285         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5286           aux_array1[j++] = i;
5287         }
5288       }
5289       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1));
5290       CHKERRQ(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D));
5291       CHKERRQ(ISDestroy(&is_aux1));
5292     }
5293     CHKERRQ(PetscBTDestroy(&bitmask));
5294     CHKERRQ(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local));
5295   } else {
5296     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5297     IS                 tis;
5298     PetscInt           schur_size;
5299 
5300     CHKERRQ(ISGetLocalSize(reuse_solver->is_B,&schur_size));
5301     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis));
5302     CHKERRQ(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B));
5303     CHKERRQ(ISDestroy(&tis));
5304     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5305       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis));
5306       CHKERRQ(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D));
5307       CHKERRQ(ISDestroy(&tis));
5308     }
5309   }
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5314 {
5315   MatNullSpace   NullSpace;
5316   Mat            dmat;
5317   const Vec      *nullvecs;
5318   Vec            v,v2,*nullvecs2;
5319   VecScatter     sct = NULL;
5320   PetscContainer c;
5321   PetscScalar    *ddata;
5322   PetscInt       k,nnsp_size,bsiz,bsiz2,n,N,bs;
5323   PetscBool      nnsp_has_cnst;
5324 
5325   PetscFunctionBegin;
5326   if (!is && !B) { /* MATIS */
5327     Mat_IS* matis = (Mat_IS*)A->data;
5328 
5329     if (!B) {
5330       CHKERRQ(MatISGetLocalMat(A,&B));
5331     }
5332     sct  = matis->cctx;
5333     CHKERRQ(PetscObjectReference((PetscObject)sct));
5334   } else {
5335     CHKERRQ(MatGetNullSpace(B,&NullSpace));
5336     if (!NullSpace) {
5337       CHKERRQ(MatGetNearNullSpace(B,&NullSpace));
5338     }
5339     if (NullSpace) PetscFunctionReturn(0);
5340   }
5341   CHKERRQ(MatGetNullSpace(A,&NullSpace));
5342   if (!NullSpace) {
5343     CHKERRQ(MatGetNearNullSpace(A,&NullSpace));
5344   }
5345   if (!NullSpace) PetscFunctionReturn(0);
5346 
5347   CHKERRQ(MatCreateVecs(A,&v,NULL));
5348   CHKERRQ(MatCreateVecs(B,&v2,NULL));
5349   if (!sct) {
5350     CHKERRQ(VecScatterCreate(v,is,v2,NULL,&sct));
5351   }
5352   CHKERRQ(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs));
5353   bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5354   CHKERRQ(PetscMalloc1(bsiz,&nullvecs2));
5355   CHKERRQ(VecGetBlockSize(v2,&bs));
5356   CHKERRQ(VecGetSize(v2,&N));
5357   CHKERRQ(VecGetLocalSize(v2,&n));
5358   CHKERRQ(PetscMalloc1(n*bsiz,&ddata));
5359   for (k=0;k<nnsp_size;k++) {
5360     CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]));
5361     CHKERRQ(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD));
5362     CHKERRQ(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD));
5363   }
5364   if (nnsp_has_cnst) {
5365     CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]));
5366     CHKERRQ(VecSet(nullvecs2[nnsp_size],1.0));
5367   }
5368   CHKERRQ(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2));
5369   CHKERRQ(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace));
5370 
5371   CHKERRQ(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat));
5372   CHKERRQ(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c));
5373   CHKERRQ(PetscContainerSetPointer(c,ddata));
5374   CHKERRQ(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault));
5375   CHKERRQ(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c));
5376   CHKERRQ(PetscContainerDestroy(&c));
5377   CHKERRQ(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat));
5378   CHKERRQ(MatDestroy(&dmat));
5379 
5380   for (k=0;k<bsiz;k++) {
5381     CHKERRQ(VecDestroy(&nullvecs2[k]));
5382   }
5383   CHKERRQ(PetscFree(nullvecs2));
5384   CHKERRQ(MatSetNearNullSpace(B,NullSpace));
5385   CHKERRQ(MatNullSpaceDestroy(&NullSpace));
5386   CHKERRQ(VecDestroy(&v));
5387   CHKERRQ(VecDestroy(&v2));
5388   CHKERRQ(VecScatterDestroy(&sct));
5389   PetscFunctionReturn(0);
5390 }
5391 
5392 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5393 {
5394   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5395   PC_IS          *pcis = (PC_IS*)pc->data;
5396   PC             pc_temp;
5397   Mat            A_RR;
5398   MatNullSpace   nnsp;
5399   MatReuse       reuse;
5400   PetscScalar    m_one = -1.0;
5401   PetscReal      value;
5402   PetscInt       n_D,n_R;
5403   PetscBool      issbaij,opts;
5404   void           (*f)(void) = NULL;
5405   char           dir_prefix[256],neu_prefix[256],str_level[16];
5406   size_t         len;
5407 
5408   PetscFunctionBegin;
5409   CHKERRQ(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0));
5410   /* approximate solver, propagate NearNullSpace if needed */
5411   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5412     MatNullSpace gnnsp1,gnnsp2;
5413     PetscBool    lhas,ghas;
5414 
5415     CHKERRQ(MatGetNearNullSpace(pcbddc->local_mat,&nnsp));
5416     CHKERRQ(MatGetNearNullSpace(pc->pmat,&gnnsp1));
5417     CHKERRQ(MatGetNullSpace(pc->pmat,&gnnsp2));
5418     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5419     CHKERRMPI(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
5420     if (!ghas && (gnnsp1 || gnnsp2)) {
5421       CHKERRQ(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL));
5422     }
5423   }
5424 
5425   /* compute prefixes */
5426   CHKERRQ(PetscStrcpy(dir_prefix,""));
5427   CHKERRQ(PetscStrcpy(neu_prefix,""));
5428   if (!pcbddc->current_level) {
5429     CHKERRQ(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix)));
5430     CHKERRQ(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix)));
5431     CHKERRQ(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix)));
5432     CHKERRQ(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix)));
5433   } else {
5434     CHKERRQ(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level)));
5435     CHKERRQ(PetscStrlen(((PetscObject)pc)->prefix,&len));
5436     len -= 15; /* remove "pc_bddc_coarse_" */
5437     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5438     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5439     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5440     CHKERRQ(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1));
5441     CHKERRQ(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1));
5442     CHKERRQ(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix)));
5443     CHKERRQ(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix)));
5444     CHKERRQ(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix)));
5445     CHKERRQ(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix)));
5446   }
5447 
5448   /* DIRICHLET PROBLEM */
5449   if (dirichlet) {
5450     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5451     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5452       PetscCheckFalse(!sub_schurs || !sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5453       if (pcbddc->dbg_flag) {
5454         Mat    A_IIn;
5455 
5456         CHKERRQ(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn));
5457         CHKERRQ(MatDestroy(&pcis->A_II));
5458         pcis->A_II = A_IIn;
5459       }
5460     }
5461     if (pcbddc->local_mat->symmetric_set) {
5462       CHKERRQ(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric));
5463     }
5464     /* Matrix for Dirichlet problem is pcis->A_II */
5465     n_D  = pcis->n - pcis->n_B;
5466     opts = PETSC_FALSE;
5467     if (!pcbddc->ksp_D) { /* create object if not yet build */
5468       opts = PETSC_TRUE;
5469       CHKERRQ(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D));
5470       CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1));
5471       /* default */
5472       CHKERRQ(KSPSetType(pcbddc->ksp_D,KSPPREONLY));
5473       CHKERRQ(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix));
5474       CHKERRQ(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij));
5475       CHKERRQ(KSPGetPC(pcbddc->ksp_D,&pc_temp));
5476       if (issbaij) {
5477         CHKERRQ(PCSetType(pc_temp,PCCHOLESKY));
5478       } else {
5479         CHKERRQ(PCSetType(pc_temp,PCLU));
5480       }
5481       CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure));
5482     }
5483     CHKERRQ(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix));
5484     CHKERRQ(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II));
5485     /* Allow user's customization */
5486     if (opts) {
5487       CHKERRQ(KSPSetFromOptions(pcbddc->ksp_D));
5488     }
5489     CHKERRQ(MatGetNearNullSpace(pcis->pA_II,&nnsp));
5490     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5491       CHKERRQ(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II));
5492     }
5493     CHKERRQ(MatGetNearNullSpace(pcis->pA_II,&nnsp));
5494     CHKERRQ(KSPGetPC(pcbddc->ksp_D,&pc_temp));
5495     CHKERRQ(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f));
5496     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5497       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5498       const PetscInt *idxs;
5499       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;
5500 
5501       CHKERRQ(ISGetLocalSize(pcis->is_I_local,&nl));
5502       CHKERRQ(ISGetIndices(pcis->is_I_local,&idxs));
5503       CHKERRQ(PetscMalloc1(nl*cdim,&scoords));
5504       for (i=0;i<nl;i++) {
5505         for (d=0;d<cdim;d++) {
5506           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5507         }
5508       }
5509       CHKERRQ(ISRestoreIndices(pcis->is_I_local,&idxs));
5510       CHKERRQ(PCSetCoordinates(pc_temp,cdim,nl,scoords));
5511       CHKERRQ(PetscFree(scoords));
5512     }
5513     if (sub_schurs && sub_schurs->reuse_solver) {
5514       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5515 
5516       CHKERRQ(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver));
5517     }
5518 
5519     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5520     if (!n_D) {
5521       CHKERRQ(KSPGetPC(pcbddc->ksp_D,&pc_temp));
5522       CHKERRQ(PCSetType(pc_temp,PCNONE));
5523     }
5524     CHKERRQ(KSPSetUp(pcbddc->ksp_D));
5525     /* set ksp_D into pcis data */
5526     CHKERRQ(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5527     CHKERRQ(KSPDestroy(&pcis->ksp_D));
5528     pcis->ksp_D = pcbddc->ksp_D;
5529   }
5530 
5531   /* NEUMANN PROBLEM */
5532   A_RR = NULL;
5533   if (neumann) {
5534     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5535     PetscInt        ibs,mbs;
5536     PetscBool       issbaij, reuse_neumann_solver;
5537     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
5538 
5539     reuse_neumann_solver = PETSC_FALSE;
5540     if (sub_schurs && sub_schurs->reuse_solver) {
5541       IS iP;
5542 
5543       reuse_neumann_solver = PETSC_TRUE;
5544       CHKERRQ(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP));
5545       if (iP) reuse_neumann_solver = PETSC_FALSE;
5546     }
5547     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5548     CHKERRQ(ISGetSize(pcbddc->is_R_local,&n_R));
5549     if (pcbddc->ksp_R) { /* already created ksp */
5550       PetscInt nn_R;
5551       CHKERRQ(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR));
5552       CHKERRQ(PetscObjectReference((PetscObject)A_RR));
5553       CHKERRQ(MatGetSize(A_RR,&nn_R,NULL));
5554       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5555         CHKERRQ(KSPReset(pcbddc->ksp_R));
5556         CHKERRQ(MatDestroy(&A_RR));
5557         reuse = MAT_INITIAL_MATRIX;
5558       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5559         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5560           CHKERRQ(MatDestroy(&A_RR));
5561           reuse = MAT_INITIAL_MATRIX;
5562         } else { /* safe to reuse the matrix */
5563           reuse = MAT_REUSE_MATRIX;
5564         }
5565       }
5566       /* last check */
5567       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5568         CHKERRQ(MatDestroy(&A_RR));
5569         reuse = MAT_INITIAL_MATRIX;
5570       }
5571     } else { /* first time, so we need to create the matrix */
5572       reuse = MAT_INITIAL_MATRIX;
5573     }
5574     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5575        TODO: Get Rid of these conversions */
5576     CHKERRQ(MatGetBlockSize(pcbddc->local_mat,&mbs));
5577     CHKERRQ(ISGetBlockSize(pcbddc->is_R_local,&ibs));
5578     CHKERRQ(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij));
5579     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5580       if (matis->A == pcbddc->local_mat) {
5581         CHKERRQ(MatDestroy(&pcbddc->local_mat));
5582         CHKERRQ(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat));
5583       } else {
5584         CHKERRQ(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat));
5585       }
5586     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5587       if (matis->A == pcbddc->local_mat) {
5588         CHKERRQ(MatDestroy(&pcbddc->local_mat));
5589         CHKERRQ(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat));
5590       } else {
5591         CHKERRQ(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat));
5592       }
5593     }
5594     /* extract A_RR */
5595     if (reuse_neumann_solver) {
5596       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5597 
5598       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5599         CHKERRQ(MatDestroy(&A_RR));
5600         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5601           CHKERRQ(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR));
5602         } else {
5603           CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR));
5604         }
5605       } else {
5606         CHKERRQ(MatDestroy(&A_RR));
5607         CHKERRQ(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL));
5608         CHKERRQ(PetscObjectReference((PetscObject)A_RR));
5609       }
5610     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5611       CHKERRQ(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR));
5612     }
5613     if (pcbddc->local_mat->symmetric_set) {
5614       CHKERRQ(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric));
5615     }
5616     opts = PETSC_FALSE;
5617     if (!pcbddc->ksp_R) { /* create object if not present */
5618       opts = PETSC_TRUE;
5619       CHKERRQ(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R));
5620       CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1));
5621       /* default */
5622       CHKERRQ(KSPSetType(pcbddc->ksp_R,KSPPREONLY));
5623       CHKERRQ(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix));
5624       CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_temp));
5625       CHKERRQ(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij));
5626       if (issbaij) {
5627         CHKERRQ(PCSetType(pc_temp,PCCHOLESKY));
5628       } else {
5629         CHKERRQ(PCSetType(pc_temp,PCLU));
5630       }
5631       CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure));
5632     }
5633     CHKERRQ(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR));
5634     CHKERRQ(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix));
5635     if (opts) { /* Allow user's customization once */
5636       CHKERRQ(KSPSetFromOptions(pcbddc->ksp_R));
5637     }
5638     CHKERRQ(MatGetNearNullSpace(A_RR,&nnsp));
5639     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5640       CHKERRQ(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR));
5641     }
5642     CHKERRQ(MatGetNearNullSpace(A_RR,&nnsp));
5643     CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_temp));
5644     CHKERRQ(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f));
5645     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5646       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5647       const PetscInt *idxs;
5648       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;
5649 
5650       CHKERRQ(ISGetLocalSize(pcbddc->is_R_local,&nl));
5651       CHKERRQ(ISGetIndices(pcbddc->is_R_local,&idxs));
5652       CHKERRQ(PetscMalloc1(nl*cdim,&scoords));
5653       for (i=0;i<nl;i++) {
5654         for (d=0;d<cdim;d++) {
5655           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5656         }
5657       }
5658       CHKERRQ(ISRestoreIndices(pcbddc->is_R_local,&idxs));
5659       CHKERRQ(PCSetCoordinates(pc_temp,cdim,nl,scoords));
5660       CHKERRQ(PetscFree(scoords));
5661     }
5662 
5663     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5664     if (!n_R) {
5665       CHKERRQ(KSPGetPC(pcbddc->ksp_R,&pc_temp));
5666       CHKERRQ(PCSetType(pc_temp,PCNONE));
5667     }
5668     /* Reuse solver if it is present */
5669     if (reuse_neumann_solver) {
5670       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5671 
5672       CHKERRQ(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver));
5673     }
5674     CHKERRQ(KSPSetUp(pcbddc->ksp_R));
5675   }
5676 
5677   if (pcbddc->dbg_flag) {
5678     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
5679     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5680     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n"));
5681   }
5682   CHKERRQ(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0));
5683 
5684   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5685   if (pcbddc->NullSpace_corr[0]) {
5686     CHKERRQ(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE));
5687   }
5688   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5689     CHKERRQ(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]));
5690   }
5691   if (neumann && pcbddc->NullSpace_corr[2]) {
5692     CHKERRQ(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]));
5693   }
5694   /* check Dirichlet and Neumann solvers */
5695   if (pcbddc->dbg_flag) {
5696     if (dirichlet) { /* Dirichlet */
5697       CHKERRQ(VecSetRandom(pcis->vec1_D,NULL));
5698       CHKERRQ(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D));
5699       CHKERRQ(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D));
5700       CHKERRQ(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D));
5701       CHKERRQ(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D));
5702       CHKERRQ(VecNorm(pcis->vec1_D,NORM_INFINITY,&value));
5703       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value));
5704       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
5705     }
5706     if (neumann) { /* Neumann */
5707       CHKERRQ(VecSetRandom(pcbddc->vec1_R,NULL));
5708       CHKERRQ(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R));
5709       CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R));
5710       CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R));
5711       CHKERRQ(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R));
5712       CHKERRQ(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value));
5713       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value));
5714       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
5715     }
5716   }
5717   /* free Neumann problem's matrix */
5718   CHKERRQ(MatDestroy(&A_RR));
5719   PetscFunctionReturn(0);
5720 }
5721 
5722 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5723 {
5724   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5725   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5726   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5727 
5728   PetscFunctionBegin;
5729   if (!reuse_solver) {
5730     CHKERRQ(VecSet(pcbddc->vec1_R,0.));
5731   }
5732   if (!pcbddc->switch_static) {
5733     if (applytranspose && pcbddc->local_auxmat1) {
5734       CHKERRQ(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C));
5735       CHKERRQ(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B));
5736     }
5737     if (!reuse_solver) {
5738       CHKERRQ(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5739       CHKERRQ(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5740     } else {
5741       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5742 
5743       CHKERRQ(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD));
5744       CHKERRQ(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD));
5745     }
5746   } else {
5747     CHKERRQ(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5748     CHKERRQ(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5749     CHKERRQ(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5750     CHKERRQ(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5751     if (applytranspose && pcbddc->local_auxmat1) {
5752       CHKERRQ(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C));
5753       CHKERRQ(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B));
5754       CHKERRQ(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5755       CHKERRQ(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE));
5756     }
5757   }
5758   CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0));
5759   if (!reuse_solver || pcbddc->switch_static) {
5760     if (applytranspose) {
5761       CHKERRQ(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R));
5762     } else {
5763       CHKERRQ(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R));
5764     }
5765     CHKERRQ(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R));
5766   } else {
5767     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5768 
5769     if (applytranspose) {
5770       CHKERRQ(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B));
5771     } else {
5772       CHKERRQ(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B));
5773     }
5774   }
5775   CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0));
5776   CHKERRQ(VecSet(inout_B,0.));
5777   if (!pcbddc->switch_static) {
5778     if (!reuse_solver) {
5779       CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD));
5780       CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD));
5781     } else {
5782       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5783 
5784       CHKERRQ(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE));
5785       CHKERRQ(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE));
5786     }
5787     if (!applytranspose && pcbddc->local_auxmat1) {
5788       CHKERRQ(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C));
5789       CHKERRQ(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B));
5790     }
5791   } else {
5792     CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD));
5793     CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD));
5794     CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD));
5795     CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD));
5796     if (!applytranspose && pcbddc->local_auxmat1) {
5797       CHKERRQ(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C));
5798       CHKERRQ(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R));
5799     }
5800     CHKERRQ(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD));
5801     CHKERRQ(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD));
5802     CHKERRQ(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD));
5803     CHKERRQ(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD));
5804   }
5805   PetscFunctionReturn(0);
5806 }
5807 
5808 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5809 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5810 {
5811   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5812   PC_IS*            pcis = (PC_IS*)  (pc->data);
5813   const PetscScalar zero = 0.0;
5814 
5815   PetscFunctionBegin;
5816   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5817   if (!pcbddc->benign_apply_coarse_only) {
5818     if (applytranspose) {
5819       CHKERRQ(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P));
5820       if (pcbddc->switch_static) CHKERRQ(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P));
5821     } else {
5822       CHKERRQ(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P));
5823       if (pcbddc->switch_static) CHKERRQ(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P));
5824     }
5825   } else {
5826     CHKERRQ(VecSet(pcbddc->vec1_P,zero));
5827   }
5828 
5829   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5830   if (pcbddc->benign_n) {
5831     PetscScalar *array;
5832     PetscInt    j;
5833 
5834     CHKERRQ(VecGetArray(pcbddc->vec1_P,&array));
5835     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5836     CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array));
5837   }
5838 
5839   /* start communications from local primal nodes to rhs of coarse solver */
5840   CHKERRQ(VecSet(pcbddc->coarse_vec,zero));
5841   CHKERRQ(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD));
5842   CHKERRQ(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD));
5843 
5844   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5845   if (pcbddc->coarse_ksp) {
5846     Mat          coarse_mat;
5847     Vec          rhs,sol;
5848     MatNullSpace nullsp;
5849     PetscBool    isbddc = PETSC_FALSE;
5850 
5851     if (pcbddc->benign_have_null) {
5852       PC        coarse_pc;
5853 
5854       CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc));
5855       CHKERRQ(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc));
5856       /* we need to propagate to coarser levels the need for a possible benign correction */
5857       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5858         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5859         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5860         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5861       }
5862     }
5863     CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&rhs));
5864     CHKERRQ(KSPGetSolution(pcbddc->coarse_ksp,&sol));
5865     CHKERRQ(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL));
5866     if (applytranspose) {
5867       PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5868       CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0));
5869       CHKERRQ(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol));
5870       CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0));
5871       CHKERRQ(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol));
5872       CHKERRQ(MatGetTransposeNullSpace(coarse_mat,&nullsp));
5873       if (nullsp) {
5874         CHKERRQ(MatNullSpaceRemove(nullsp,sol));
5875       }
5876     } else {
5877       CHKERRQ(MatGetNullSpace(coarse_mat,&nullsp));
5878       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5879         PC        coarse_pc;
5880 
5881         if (nullsp) {
5882           CHKERRQ(MatNullSpaceRemove(nullsp,rhs));
5883         }
5884         CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc));
5885         CHKERRQ(PCPreSolve(coarse_pc,pcbddc->coarse_ksp));
5886         CHKERRQ(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol));
5887         CHKERRQ(PCPostSolve(coarse_pc,pcbddc->coarse_ksp));
5888       } else {
5889         CHKERRQ(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0));
5890         CHKERRQ(KSPSolve(pcbddc->coarse_ksp,rhs,sol));
5891         CHKERRQ(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0));
5892         CHKERRQ(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol));
5893         if (nullsp) {
5894           CHKERRQ(MatNullSpaceRemove(nullsp,sol));
5895         }
5896       }
5897     }
5898     /* we don't need the benign correction at coarser levels anymore */
5899     if (pcbddc->benign_have_null && isbddc) {
5900       PC        coarse_pc;
5901       PC_BDDC*  coarsepcbddc;
5902 
5903       CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc));
5904       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5905       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5906       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5907     }
5908   }
5909 
5910   /* Local solution on R nodes */
5911   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5912     CHKERRQ(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose));
5913   }
5914   /* communications from coarse sol to local primal nodes */
5915   CHKERRQ(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE));
5916   CHKERRQ(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE));
5917 
5918   /* Sum contributions from the two levels */
5919   if (!pcbddc->benign_apply_coarse_only) {
5920     if (applytranspose) {
5921       CHKERRQ(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B));
5922       if (pcbddc->switch_static) CHKERRQ(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D));
5923     } else {
5924       CHKERRQ(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B));
5925       if (pcbddc->switch_static) CHKERRQ(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D));
5926     }
5927     /* store p0 */
5928     if (pcbddc->benign_n) {
5929       PetscScalar *array;
5930       PetscInt    j;
5931 
5932       CHKERRQ(VecGetArray(pcbddc->vec1_P,&array));
5933       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5934       CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array));
5935     }
5936   } else { /* expand the coarse solution */
5937     if (applytranspose) {
5938       CHKERRQ(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B));
5939     } else {
5940       CHKERRQ(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B));
5941     }
5942   }
5943   PetscFunctionReturn(0);
5944 }
5945 
5946 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5947 {
5948   PC_BDDC*          pcbddc = (PC_BDDC*)(pc->data);
5949   Vec               from,to;
5950   const PetscScalar *array;
5951 
5952   PetscFunctionBegin;
5953   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5954     from = pcbddc->coarse_vec;
5955     to = pcbddc->vec1_P;
5956     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5957       Vec tvec;
5958 
5959       CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&tvec));
5960       CHKERRQ(VecResetArray(tvec));
5961       CHKERRQ(KSPGetSolution(pcbddc->coarse_ksp,&tvec));
5962       CHKERRQ(VecGetArrayRead(tvec,&array));
5963       CHKERRQ(VecPlaceArray(from,array));
5964       CHKERRQ(VecRestoreArrayRead(tvec,&array));
5965     }
5966   } else { /* from local to global -> put data in coarse right hand side */
5967     from = pcbddc->vec1_P;
5968     to = pcbddc->coarse_vec;
5969   }
5970   CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode));
5971   PetscFunctionReturn(0);
5972 }
5973 
5974 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5975 {
5976   PC_BDDC*          pcbddc = (PC_BDDC*)(pc->data);
5977   Vec               from,to;
5978   const PetscScalar *array;
5979 
5980   PetscFunctionBegin;
5981   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5982     from = pcbddc->coarse_vec;
5983     to = pcbddc->vec1_P;
5984   } else { /* from local to global -> put data in coarse right hand side */
5985     from = pcbddc->vec1_P;
5986     to = pcbddc->coarse_vec;
5987   }
5988   CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode));
5989   if (smode == SCATTER_FORWARD) {
5990     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5991       Vec tvec;
5992 
5993       CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&tvec));
5994       CHKERRQ(VecGetArrayRead(to,&array));
5995       CHKERRQ(VecPlaceArray(tvec,array));
5996       CHKERRQ(VecRestoreArrayRead(to,&array));
5997     }
5998   } else {
5999     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6000      CHKERRQ(VecResetArray(from));
6001     }
6002   }
6003   PetscFunctionReturn(0);
6004 }
6005 
6006 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6007 {
6008   PetscErrorCode    ierr;
6009   PC_IS*            pcis = (PC_IS*)(pc->data);
6010   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
6011   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
6012   /* one and zero */
6013   PetscScalar       one=1.0,zero=0.0;
6014   /* space to store constraints and their local indices */
6015   PetscScalar       *constraints_data;
6016   PetscInt          *constraints_idxs,*constraints_idxs_B;
6017   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
6018   PetscInt          *constraints_n;
6019   /* iterators */
6020   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
6021   /* BLAS integers */
6022   PetscBLASInt      lwork,lierr;
6023   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
6024   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
6025   /* reuse */
6026   PetscInt          olocal_primal_size,olocal_primal_size_cc;
6027   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
6028   /* change of basis */
6029   PetscBool         qr_needed;
6030   PetscBT           change_basis,qr_needed_idx;
6031   /* auxiliary stuff */
6032   PetscInt          *nnz,*is_indices;
6033   PetscInt          ncc;
6034   /* some quantities */
6035   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
6036   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6037   PetscReal         tol; /* tolerance for retaining eigenmodes */
6038 
6039   PetscFunctionBegin;
6040   tol  = PetscSqrtReal(PETSC_SMALL);
6041   /* Destroy Mat objects computed previously */
6042   CHKERRQ(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6043   CHKERRQ(MatDestroy(&pcbddc->ConstraintMatrix));
6044   CHKERRQ(MatDestroy(&pcbddc->switch_static_change));
6045   /* save info on constraints from previous setup (if any) */
6046   olocal_primal_size = pcbddc->local_primal_size;
6047   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6048   CHKERRQ(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult));
6049   CHKERRQ(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc));
6050   CHKERRQ(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc));
6051   CHKERRQ(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult));
6052   CHKERRQ(PetscFree(pcbddc->primal_indices_local_idxs));
6053 
6054   if (!pcbddc->adaptive_selection) {
6055     IS           ISForVertices,*ISForFaces,*ISForEdges;
6056     MatNullSpace nearnullsp;
6057     const Vec    *nearnullvecs;
6058     Vec          *localnearnullsp;
6059     PetscScalar  *array;
6060     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
6061     PetscBool    nnsp_has_cnst;
6062     /* LAPACK working arrays for SVD or POD */
6063     PetscBool    skip_lapack,boolforchange;
6064     PetscScalar  *work;
6065     PetscReal    *singular_vals;
6066 #if defined(PETSC_USE_COMPLEX)
6067     PetscReal    *rwork;
6068 #endif
6069     PetscScalar  *temp_basis = NULL,*correlation_mat = NULL;
6070     PetscBLASInt dummy_int=1;
6071     PetscScalar  dummy_scalar=1.;
6072     PetscBool    use_pod = PETSC_FALSE;
6073 
6074     /* MKL SVD with same input gives different results on different processes! */
6075 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6076     use_pod = PETSC_TRUE;
6077 #endif
6078     /* Get index sets for faces, edges and vertices from graph */
6079     CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices));
6080     /* print some info */
6081     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6082       PetscInt nv;
6083 
6084       CHKERRQ(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer));
6085       CHKERRQ(ISGetSize(ISForVertices,&nv));
6086       CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6087       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n"));
6088       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices));
6089       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges));
6090       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces));
6091       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
6092       CHKERRQ(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6093     }
6094 
6095     /* free unneeded index sets */
6096     if (!pcbddc->use_vertices) {
6097       CHKERRQ(ISDestroy(&ISForVertices));
6098     }
6099     if (!pcbddc->use_edges) {
6100       for (i=0;i<n_ISForEdges;i++) {
6101         CHKERRQ(ISDestroy(&ISForEdges[i]));
6102       }
6103       CHKERRQ(PetscFree(ISForEdges));
6104       n_ISForEdges = 0;
6105     }
6106     if (!pcbddc->use_faces) {
6107       for (i=0;i<n_ISForFaces;i++) {
6108         CHKERRQ(ISDestroy(&ISForFaces[i]));
6109       }
6110       CHKERRQ(PetscFree(ISForFaces));
6111       n_ISForFaces = 0;
6112     }
6113 
6114     /* check if near null space is attached to global mat */
6115     if (pcbddc->use_nnsp) {
6116       CHKERRQ(MatGetNearNullSpace(pc->pmat,&nearnullsp));
6117     } else nearnullsp = NULL;
6118 
6119     if (nearnullsp) {
6120       CHKERRQ(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs));
6121       /* remove any stored info */
6122       CHKERRQ(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6123       CHKERRQ(PetscFree(pcbddc->onearnullvecs_state));
6124       /* store information for BDDC solver reuse */
6125       CHKERRQ(PetscObjectReference((PetscObject)nearnullsp));
6126       pcbddc->onearnullspace = nearnullsp;
6127       CHKERRQ(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state));
6128       for (i=0;i<nnsp_size;i++) {
6129         CHKERRQ(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]));
6130       }
6131     } else { /* if near null space is not provided BDDC uses constants by default */
6132       nnsp_size = 0;
6133       nnsp_has_cnst = PETSC_TRUE;
6134     }
6135     /* get max number of constraints on a single cc */
6136     max_constraints = nnsp_size;
6137     if (nnsp_has_cnst) max_constraints++;
6138 
6139     /*
6140          Evaluate maximum storage size needed by the procedure
6141          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6142          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6143          There can be multiple constraints per connected component
6144                                                                                                                                                            */
6145     n_vertices = 0;
6146     if (ISForVertices) {
6147       CHKERRQ(ISGetSize(ISForVertices,&n_vertices));
6148     }
6149     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6150     CHKERRQ(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n));
6151 
6152     total_counts = n_ISForFaces+n_ISForEdges;
6153     total_counts *= max_constraints;
6154     total_counts += n_vertices;
6155     CHKERRQ(PetscBTCreate(total_counts,&change_basis));
6156 
6157     total_counts = 0;
6158     max_size_of_constraint = 0;
6159     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6160       IS used_is;
6161       if (i<n_ISForEdges) {
6162         used_is = ISForEdges[i];
6163       } else {
6164         used_is = ISForFaces[i-n_ISForEdges];
6165       }
6166       CHKERRQ(ISGetSize(used_is,&j));
6167       total_counts += j;
6168       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6169     }
6170     CHKERRQ(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B));
6171 
6172     /* get local part of global near null space vectors */
6173     CHKERRQ(PetscMalloc1(nnsp_size,&localnearnullsp));
6174     for (k=0;k<nnsp_size;k++) {
6175       CHKERRQ(VecDuplicate(pcis->vec1_N,&localnearnullsp[k]));
6176       CHKERRQ(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD));
6177       CHKERRQ(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD));
6178     }
6179 
6180     /* whether or not to skip lapack calls */
6181     skip_lapack = PETSC_TRUE;
6182     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6183 
6184     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6185     if (!skip_lapack) {
6186       PetscScalar temp_work;
6187 
6188       if (use_pod) {
6189         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6190         CHKERRQ(PetscMalloc1(max_constraints*max_constraints,&correlation_mat));
6191         CHKERRQ(PetscMalloc1(max_constraints,&singular_vals));
6192         CHKERRQ(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis));
6193 #if defined(PETSC_USE_COMPLEX)
6194         CHKERRQ(PetscMalloc1(3*max_constraints,&rwork));
6195 #endif
6196         /* now we evaluate the optimal workspace using query with lwork=-1 */
6197         CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_N));
6198         CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_LDA));
6199         lwork = -1;
6200         CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6201 #if !defined(PETSC_USE_COMPLEX)
6202         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6203 #else
6204         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6205 #endif
6206         CHKERRQ(PetscFPTrapPop());
6207         PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6208       } else {
6209 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6210         /* SVD */
6211         PetscInt max_n,min_n;
6212         max_n = max_size_of_constraint;
6213         min_n = max_constraints;
6214         if (max_size_of_constraint < max_constraints) {
6215           min_n = max_size_of_constraint;
6216           max_n = max_constraints;
6217         }
6218         CHKERRQ(PetscMalloc1(min_n,&singular_vals));
6219 #if defined(PETSC_USE_COMPLEX)
6220         CHKERRQ(PetscMalloc1(5*min_n,&rwork));
6221 #endif
6222         /* now we evaluate the optimal workspace using query with lwork=-1 */
6223         lwork = -1;
6224         CHKERRQ(PetscBLASIntCast(max_n,&Blas_M));
6225         CHKERRQ(PetscBLASIntCast(min_n,&Blas_N));
6226         CHKERRQ(PetscBLASIntCast(max_n,&Blas_LDA));
6227         CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6228 #if !defined(PETSC_USE_COMPLEX)
6229         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6230 #else
6231         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6232 #endif
6233         CHKERRQ(PetscFPTrapPop());
6234         PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6235 #else
6236         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6237 #endif /* on missing GESVD */
6238       }
6239       /* Allocate optimal workspace */
6240       CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork));
6241       CHKERRQ(PetscMalloc1(lwork,&work));
6242     }
6243     /* Now we can loop on constraining sets */
6244     total_counts = 0;
6245     constraints_idxs_ptr[0] = 0;
6246     constraints_data_ptr[0] = 0;
6247     /* vertices */
6248     if (n_vertices) {
6249       CHKERRQ(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices));
6250       CHKERRQ(PetscArraycpy(constraints_idxs,is_indices,n_vertices));
6251       for (i=0;i<n_vertices;i++) {
6252         constraints_n[total_counts] = 1;
6253         constraints_data[total_counts] = 1.0;
6254         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6255         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6256         total_counts++;
6257       }
6258       CHKERRQ(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices));
6259       n_vertices = total_counts;
6260     }
6261 
6262     /* edges and faces */
6263     total_counts_cc = total_counts;
6264     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6265       IS        used_is;
6266       PetscBool idxs_copied = PETSC_FALSE;
6267 
6268       if (ncc<n_ISForEdges) {
6269         used_is = ISForEdges[ncc];
6270         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6271       } else {
6272         used_is = ISForFaces[ncc-n_ISForEdges];
6273         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6274       }
6275       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
6276 
6277       CHKERRQ(ISGetSize(used_is,&size_of_constraint));
6278       CHKERRQ(ISGetIndices(used_is,(const PetscInt**)&is_indices));
6279       /* change of basis should not be performed on local periodic nodes */
6280       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6281       if (nnsp_has_cnst) {
6282         PetscScalar quad_value;
6283 
6284         CHKERRQ(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint));
6285         idxs_copied = PETSC_TRUE;
6286 
6287         if (!pcbddc->use_nnsp_true) {
6288           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6289         } else {
6290           quad_value = 1.0;
6291         }
6292         for (j=0;j<size_of_constraint;j++) {
6293           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6294         }
6295         temp_constraints++;
6296         total_counts++;
6297       }
6298       for (k=0;k<nnsp_size;k++) {
6299         PetscReal real_value;
6300         PetscScalar *ptr_to_data;
6301 
6302         CHKERRQ(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array));
6303         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6304         for (j=0;j<size_of_constraint;j++) {
6305           ptr_to_data[j] = array[is_indices[j]];
6306         }
6307         CHKERRQ(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array));
6308         /* check if array is null on the connected component */
6309         CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N));
6310         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6311         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6312           temp_constraints++;
6313           total_counts++;
6314           if (!idxs_copied) {
6315             CHKERRQ(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint));
6316             idxs_copied = PETSC_TRUE;
6317           }
6318         }
6319       }
6320       CHKERRQ(ISRestoreIndices(used_is,(const PetscInt**)&is_indices));
6321       valid_constraints = temp_constraints;
6322       if (!pcbddc->use_nnsp_true && temp_constraints) {
6323         if (temp_constraints == 1) { /* just normalize the constraint */
6324           PetscScalar norm,*ptr_to_data;
6325 
6326           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6327           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N));
6328           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6329           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6330           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6331         } else { /* perform SVD */
6332           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6333 
6334           if (use_pod) {
6335             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6336                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6337                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6338                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6339                   from that computed using LAPACKgesvd
6340                -> This is due to a different computation of eigenvectors in LAPACKheev
6341                -> The quality of the POD-computed basis will be the same */
6342             CHKERRQ(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints));
6343             /* Store upper triangular part of correlation matrix */
6344             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N));
6345             CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6346             for (j=0;j<temp_constraints;j++) {
6347               for (k=0;k<j+1;k++) {
6348                 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6349               }
6350             }
6351             /* compute eigenvalues and eigenvectors of correlation matrix */
6352             CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_N));
6353             CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_LDA));
6354 #if !defined(PETSC_USE_COMPLEX)
6355             PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6356 #else
6357             PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6358 #endif
6359             CHKERRQ(PetscFPTrapPop());
6360             PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6361             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6362             j = 0;
6363             while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6364             total_counts = total_counts-j;
6365             valid_constraints = temp_constraints-j;
6366             /* scale and copy POD basis into used quadrature memory */
6367             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M));
6368             CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_N));
6369             CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_K));
6370             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6371             CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_LDB));
6372             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDC));
6373             if (j<temp_constraints) {
6374               PetscInt ii;
6375               for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6376               CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6377               PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6378               CHKERRQ(PetscFPTrapPop());
6379               for (k=0;k<temp_constraints-j;k++) {
6380                 for (ii=0;ii<size_of_constraint;ii++) {
6381                   ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6382                 }
6383               }
6384             }
6385           } else {
6386 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6387             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M));
6388             CHKERRQ(PetscBLASIntCast(temp_constraints,&Blas_N));
6389             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6390             CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6391 #if !defined(PETSC_USE_COMPLEX)
6392             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6393 #else
6394             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6395 #endif
6396             PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6397             CHKERRQ(PetscFPTrapPop());
6398             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6399             k = temp_constraints;
6400             if (k > size_of_constraint) k = size_of_constraint;
6401             j = 0;
6402             while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6403             valid_constraints = k-j;
6404             total_counts = total_counts-temp_constraints+valid_constraints;
6405 #else
6406             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6407 #endif /* on missing GESVD */
6408           }
6409         }
6410       }
6411       /* update pointers information */
6412       if (valid_constraints) {
6413         constraints_n[total_counts_cc] = valid_constraints;
6414         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6415         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6416         /* set change_of_basis flag */
6417         if (boolforchange) {
6418           PetscBTSet(change_basis,total_counts_cc);
6419         }
6420         total_counts_cc++;
6421       }
6422     }
6423     /* free workspace */
6424     if (!skip_lapack) {
6425       CHKERRQ(PetscFree(work));
6426 #if defined(PETSC_USE_COMPLEX)
6427       CHKERRQ(PetscFree(rwork));
6428 #endif
6429       CHKERRQ(PetscFree(singular_vals));
6430       CHKERRQ(PetscFree(correlation_mat));
6431       CHKERRQ(PetscFree(temp_basis));
6432     }
6433     for (k=0;k<nnsp_size;k++) {
6434       CHKERRQ(VecDestroy(&localnearnullsp[k]));
6435     }
6436     CHKERRQ(PetscFree(localnearnullsp));
6437     /* free index sets of faces, edges and vertices */
6438     for (i=0;i<n_ISForFaces;i++) {
6439       CHKERRQ(ISDestroy(&ISForFaces[i]));
6440     }
6441     if (n_ISForFaces) {
6442       CHKERRQ(PetscFree(ISForFaces));
6443     }
6444     for (i=0;i<n_ISForEdges;i++) {
6445       CHKERRQ(ISDestroy(&ISForEdges[i]));
6446     }
6447     if (n_ISForEdges) {
6448       CHKERRQ(PetscFree(ISForEdges));
6449     }
6450     CHKERRQ(ISDestroy(&ISForVertices));
6451   } else {
6452     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6453 
6454     total_counts = 0;
6455     n_vertices = 0;
6456     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6457       CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices));
6458     }
6459     max_constraints = 0;
6460     total_counts_cc = 0;
6461     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6462       total_counts += pcbddc->adaptive_constraints_n[i];
6463       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6464       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6465     }
6466     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6467     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6468     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6469     constraints_data = pcbddc->adaptive_constraints_data;
6470     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6471     CHKERRQ(PetscMalloc1(total_counts_cc,&constraints_n));
6472     total_counts_cc = 0;
6473     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6474       if (pcbddc->adaptive_constraints_n[i]) {
6475         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6476       }
6477     }
6478 
6479     max_size_of_constraint = 0;
6480     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6481     CHKERRQ(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B));
6482     /* Change of basis */
6483     CHKERRQ(PetscBTCreate(total_counts_cc,&change_basis));
6484     if (pcbddc->use_change_of_basis) {
6485       for (i=0;i<sub_schurs->n_subs;i++) {
6486         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6487           CHKERRQ(PetscBTSet(change_basis,i+n_vertices));
6488         }
6489       }
6490     }
6491   }
6492   pcbddc->local_primal_size = total_counts;
6493   CHKERRQ(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs));
6494 
6495   /* map constraints_idxs in boundary numbering */
6496   CHKERRQ(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B));
6497   PetscCheckFalse(i != constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6498 
6499   /* Create constraint matrix */
6500   CHKERRQ(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix));
6501   CHKERRQ(MatSetType(pcbddc->ConstraintMatrix,MATAIJ));
6502   CHKERRQ(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n));
6503 
6504   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6505   /* determine if a QR strategy is needed for change of basis */
6506   qr_needed = pcbddc->use_qr_single;
6507   CHKERRQ(PetscBTCreate(total_counts_cc,&qr_needed_idx));
6508   total_primal_vertices=0;
6509   pcbddc->local_primal_size_cc = 0;
6510   for (i=0;i<total_counts_cc;i++) {
6511     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6512     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6513       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6514       pcbddc->local_primal_size_cc += 1;
6515     } else if (PetscBTLookup(change_basis,i)) {
6516       for (k=0;k<constraints_n[i];k++) {
6517         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6518       }
6519       pcbddc->local_primal_size_cc += constraints_n[i];
6520       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6521         PetscBTSet(qr_needed_idx,i);
6522         qr_needed = PETSC_TRUE;
6523       }
6524     } else {
6525       pcbddc->local_primal_size_cc += 1;
6526     }
6527   }
6528   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6529   pcbddc->n_vertices = total_primal_vertices;
6530   /* permute indices in order to have a sorted set of vertices */
6531   CHKERRQ(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs));
6532   CHKERRQ(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult));
6533   CHKERRQ(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices));
6534   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6535 
6536   /* nonzero structure of constraint matrix */
6537   /* and get reference dof for local constraints */
6538   CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&nnz));
6539   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6540 
6541   j = total_primal_vertices;
6542   total_counts = total_primal_vertices;
6543   cum = total_primal_vertices;
6544   for (i=n_vertices;i<total_counts_cc;i++) {
6545     if (!PetscBTLookup(change_basis,i)) {
6546       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6547       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6548       cum++;
6549       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6550       for (k=0;k<constraints_n[i];k++) {
6551         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6552         nnz[j+k] = size_of_constraint;
6553       }
6554       j += constraints_n[i];
6555     }
6556   }
6557   CHKERRQ(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz));
6558   CHKERRQ(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
6559   CHKERRQ(PetscFree(nnz));
6560 
6561   /* set values in constraint matrix */
6562   for (i=0;i<total_primal_vertices;i++) {
6563     CHKERRQ(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES));
6564   }
6565   total_counts = total_primal_vertices;
6566   for (i=n_vertices;i<total_counts_cc;i++) {
6567     if (!PetscBTLookup(change_basis,i)) {
6568       PetscInt *cols;
6569 
6570       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6571       cols = constraints_idxs+constraints_idxs_ptr[i];
6572       for (k=0;k<constraints_n[i];k++) {
6573         PetscInt    row = total_counts+k;
6574         PetscScalar *vals;
6575 
6576         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6577         CHKERRQ(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES));
6578       }
6579       total_counts += constraints_n[i];
6580     }
6581   }
6582   /* assembling */
6583   CHKERRQ(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY));
6584   CHKERRQ(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY));
6585   CHKERRQ(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view"));
6586 
6587   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6588   if (pcbddc->use_change_of_basis) {
6589     /* dual and primal dofs on a single cc */
6590     PetscInt     dual_dofs,primal_dofs;
6591     /* working stuff for GEQRF */
6592     PetscScalar  *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6593     PetscBLASInt lqr_work;
6594     /* working stuff for UNGQR */
6595     PetscScalar  *gqr_work = NULL,lgqr_work_t=0.0;
6596     PetscBLASInt lgqr_work;
6597     /* working stuff for TRTRS */
6598     PetscScalar  *trs_rhs = NULL;
6599     PetscBLASInt Blas_NRHS;
6600     /* pointers for values insertion into change of basis matrix */
6601     PetscInt     *start_rows,*start_cols;
6602     PetscScalar  *start_vals;
6603     /* working stuff for values insertion */
6604     PetscBT      is_primal;
6605     PetscInt     *aux_primal_numbering_B;
6606     /* matrix sizes */
6607     PetscInt     global_size,local_size;
6608     /* temporary change of basis */
6609     Mat          localChangeOfBasisMatrix;
6610     /* extra space for debugging */
6611     PetscScalar  *dbg_work = NULL;
6612 
6613     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6614     CHKERRQ(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix));
6615     CHKERRQ(MatSetType(localChangeOfBasisMatrix,MATAIJ));
6616     CHKERRQ(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n));
6617     /* nonzeros for local mat */
6618     CHKERRQ(PetscMalloc1(pcis->n,&nnz));
6619     if (!pcbddc->benign_change || pcbddc->fake_change) {
6620       for (i=0;i<pcis->n;i++) nnz[i]=1;
6621     } else {
6622       const PetscInt *ii;
6623       PetscInt       n;
6624       PetscBool      flg_row;
6625       CHKERRQ(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row));
6626       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6627       CHKERRQ(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row));
6628     }
6629     for (i=n_vertices;i<total_counts_cc;i++) {
6630       if (PetscBTLookup(change_basis,i)) {
6631         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6632         if (PetscBTLookup(qr_needed_idx,i)) {
6633           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6634         } else {
6635           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6636           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6637         }
6638       }
6639     }
6640     CHKERRQ(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz));
6641     CHKERRQ(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
6642     CHKERRQ(PetscFree(nnz));
6643     /* Set interior change in the matrix */
6644     if (!pcbddc->benign_change || pcbddc->fake_change) {
6645       for (i=0;i<pcis->n;i++) {
6646         CHKERRQ(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES));
6647       }
6648     } else {
6649       const PetscInt *ii,*jj;
6650       PetscScalar    *aa;
6651       PetscInt       n;
6652       PetscBool      flg_row;
6653       CHKERRQ(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row));
6654       CHKERRQ(MatSeqAIJGetArray(pcbddc->benign_change,&aa));
6655       for (i=0;i<n;i++) {
6656         CHKERRQ(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES));
6657       }
6658       CHKERRQ(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa));
6659       CHKERRQ(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row));
6660     }
6661 
6662     if (pcbddc->dbg_flag) {
6663       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n"));
6664       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank));
6665     }
6666 
6667     /* Now we loop on the constraints which need a change of basis */
6668     /*
6669        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6670        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6671 
6672        Basic blocks of change of basis matrix T computed by
6673 
6674           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6675 
6676             | 1        0   ...        0         s_1/S |
6677             | 0        1   ...        0         s_2/S |
6678             |              ...                        |
6679             | 0        ...            1     s_{n-1}/S |
6680             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
6681 
6682             with S = \sum_{i=1}^n s_i^2
6683             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6684                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6685 
6686           - QR decomposition of constraints otherwise
6687     */
6688     if (qr_needed && max_size_of_constraint) {
6689       /* space to store Q */
6690       CHKERRQ(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis));
6691       /* array to store scaling factors for reflectors */
6692       CHKERRQ(PetscMalloc1(max_constraints,&qr_tau));
6693       /* first we issue queries for optimal work */
6694       CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_M));
6695       CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_N));
6696       CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA));
6697       lqr_work = -1;
6698       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6699       PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6700       CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work));
6701       CHKERRQ(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work));
6702       lgqr_work = -1;
6703       CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_M));
6704       CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_N));
6705       CHKERRQ(PetscBLASIntCast(max_constraints,&Blas_K));
6706       CHKERRQ(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA));
6707       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6708       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6709       PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6710       CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work));
6711       CHKERRQ(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work));
6712       /* array to store rhs and solution of triangular solver */
6713       CHKERRQ(PetscMalloc1(max_constraints*max_constraints,&trs_rhs));
6714       /* allocating workspace for check */
6715       if (pcbddc->dbg_flag) {
6716         CHKERRQ(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work));
6717       }
6718     }
6719     /* array to store whether a node is primal or not */
6720     CHKERRQ(PetscBTCreate(pcis->n_B,&is_primal));
6721     CHKERRQ(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B));
6722     CHKERRQ(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B));
6723     PetscCheckFalse(i != total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6724     for (i=0;i<total_primal_vertices;i++) {
6725       CHKERRQ(PetscBTSet(is_primal,aux_primal_numbering_B[i]));
6726     }
6727     CHKERRQ(PetscFree(aux_primal_numbering_B));
6728 
6729     /* loop on constraints and see whether or not they need a change of basis and compute it */
6730     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6731       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6732       if (PetscBTLookup(change_basis,total_counts)) {
6733         /* get constraint info */
6734         primal_dofs = constraints_n[total_counts];
6735         dual_dofs = size_of_constraint-primal_dofs;
6736 
6737         if (pcbddc->dbg_flag) {
6738           CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint));
6739         }
6740 
6741         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6742 
6743           /* copy quadrature constraints for change of basis check */
6744           if (pcbddc->dbg_flag) {
6745             CHKERRQ(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs));
6746           }
6747           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6748           CHKERRQ(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs));
6749 
6750           /* compute QR decomposition of constraints */
6751           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M));
6752           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_N));
6753           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6754           CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6755           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6756           PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6757           CHKERRQ(PetscFPTrapPop());
6758 
6759           /* explicitly compute R^-T */
6760           CHKERRQ(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs));
6761           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6762           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_N));
6763           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_NRHS));
6764           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6765           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_LDB));
6766           CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6767           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6768           PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6769           CHKERRQ(PetscFPTrapPop());
6770 
6771           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6772           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M));
6773           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N));
6774           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_K));
6775           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6776           CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6777           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6778           PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6779           CHKERRQ(PetscFPTrapPop());
6780 
6781           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6782              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6783              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6784           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_M));
6785           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_N));
6786           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_K));
6787           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6788           CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_LDB));
6789           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDC));
6790           CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6791           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6792           CHKERRQ(PetscFPTrapPop());
6793           CHKERRQ(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs));
6794 
6795           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6796           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6797           /* insert cols for primal dofs */
6798           for (j=0;j<primal_dofs;j++) {
6799             start_vals = &qr_basis[j*size_of_constraint];
6800             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6801             CHKERRQ(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES));
6802           }
6803           /* insert cols for dual dofs */
6804           for (j=0,k=0;j<dual_dofs;k++) {
6805             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6806               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6807               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6808               CHKERRQ(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES));
6809               j++;
6810             }
6811           }
6812 
6813           /* check change of basis */
6814           if (pcbddc->dbg_flag) {
6815             PetscInt   ii,jj;
6816             PetscBool valid_qr=PETSC_TRUE;
6817             CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_M));
6818             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N));
6819             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_K));
6820             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDA));
6821             CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_LDB));
6822             CHKERRQ(PetscBLASIntCast(primal_dofs,&Blas_LDC));
6823             CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6824             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6825             CHKERRQ(PetscFPTrapPop());
6826             for (jj=0;jj<size_of_constraint;jj++) {
6827               for (ii=0;ii<primal_dofs;ii++) {
6828                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6829                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6830               }
6831             }
6832             if (!valid_qr) {
6833               CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n"));
6834               for (jj=0;jj<size_of_constraint;jj++) {
6835                 for (ii=0;ii<primal_dofs;ii++) {
6836                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6837                     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])));
6838                   }
6839                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6840                     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])));
6841                   }
6842                 }
6843               }
6844             } else {
6845               CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n"));
6846             }
6847           }
6848         } else { /* simple transformation block */
6849           PetscInt    row,col;
6850           PetscScalar val,norm;
6851 
6852           CHKERRQ(PetscBLASIntCast(size_of_constraint,&Blas_N));
6853           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6854           for (j=0;j<size_of_constraint;j++) {
6855             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6856             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6857             if (!PetscBTLookup(is_primal,row_B)) {
6858               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6859               CHKERRQ(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES));
6860               CHKERRQ(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES));
6861             } else {
6862               for (k=0;k<size_of_constraint;k++) {
6863                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6864                 if (row != col) {
6865                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6866                 } else {
6867                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6868                 }
6869                 CHKERRQ(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES));
6870               }
6871             }
6872           }
6873           if (pcbddc->dbg_flag) {
6874             CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n"));
6875           }
6876         }
6877       } else {
6878         if (pcbddc->dbg_flag) {
6879           CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint));
6880         }
6881       }
6882     }
6883 
6884     /* free workspace */
6885     if (qr_needed) {
6886       if (pcbddc->dbg_flag) {
6887         CHKERRQ(PetscFree(dbg_work));
6888       }
6889       CHKERRQ(PetscFree(trs_rhs));
6890       CHKERRQ(PetscFree(qr_tau));
6891       CHKERRQ(PetscFree(qr_work));
6892       CHKERRQ(PetscFree(gqr_work));
6893       CHKERRQ(PetscFree(qr_basis));
6894     }
6895     CHKERRQ(PetscBTDestroy(&is_primal));
6896     CHKERRQ(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY));
6897     CHKERRQ(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY));
6898 
6899     /* assembling of global change of variable */
6900     if (!pcbddc->fake_change) {
6901       Mat      tmat;
6902       PetscInt bs;
6903 
6904       CHKERRQ(VecGetSize(pcis->vec1_global,&global_size));
6905       CHKERRQ(VecGetLocalSize(pcis->vec1_global,&local_size));
6906       CHKERRQ(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat));
6907       CHKERRQ(MatISSetLocalMat(tmat,localChangeOfBasisMatrix));
6908       CHKERRQ(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY));
6909       CHKERRQ(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY));
6910       CHKERRQ(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix));
6911       CHKERRQ(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ));
6912       CHKERRQ(MatGetBlockSize(pc->pmat,&bs));
6913       CHKERRQ(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs));
6914       CHKERRQ(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size));
6915       CHKERRQ(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE));
6916       CHKERRQ(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix));
6917       CHKERRQ(MatDestroy(&tmat));
6918       CHKERRQ(VecSet(pcis->vec1_global,0.0));
6919       CHKERRQ(VecSet(pcis->vec1_N,1.0));
6920       CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
6921       CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
6922       CHKERRQ(VecReciprocal(pcis->vec1_global));
6923       CHKERRQ(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL));
6924 
6925       /* check */
6926       if (pcbddc->dbg_flag) {
6927         PetscReal error;
6928         Vec       x,x_change;
6929 
6930         CHKERRQ(VecDuplicate(pcis->vec1_global,&x));
6931         CHKERRQ(VecDuplicate(pcis->vec1_global,&x_change));
6932         CHKERRQ(VecSetRandom(x,NULL));
6933         CHKERRQ(VecCopy(x,pcis->vec1_global));
6934         CHKERRQ(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD));
6935         CHKERRQ(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD));
6936         CHKERRQ(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N));
6937         CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE));
6938         CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE));
6939         CHKERRQ(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change));
6940         CHKERRQ(VecAXPY(x,-1.0,x_change));
6941         CHKERRQ(VecNorm(x,NORM_INFINITY,&error));
6942         if (error > PETSC_SMALL) {
6943           SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6944         }
6945         CHKERRQ(VecDestroy(&x));
6946         CHKERRQ(VecDestroy(&x_change));
6947       }
6948       /* adapt sub_schurs computed (if any) */
6949       if (pcbddc->use_deluxe_scaling) {
6950         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6951 
6952         PetscCheckFalse(pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6953         if (sub_schurs && sub_schurs->S_Ej_all) {
6954           Mat                    S_new,tmat;
6955           IS                     is_all_N,is_V_Sall = NULL;
6956 
6957           CHKERRQ(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N));
6958           CHKERRQ(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat));
6959           if (pcbddc->deluxe_zerorows) {
6960             ISLocalToGlobalMapping NtoSall;
6961             IS                     is_V;
6962             CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V));
6963             CHKERRQ(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall));
6964             CHKERRQ(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall));
6965             CHKERRQ(ISLocalToGlobalMappingDestroy(&NtoSall));
6966             CHKERRQ(ISDestroy(&is_V));
6967           }
6968           CHKERRQ(ISDestroy(&is_all_N));
6969           CHKERRQ(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new));
6970           CHKERRQ(MatDestroy(&sub_schurs->S_Ej_all));
6971           CHKERRQ(PetscObjectReference((PetscObject)S_new));
6972           if (pcbddc->deluxe_zerorows) {
6973             const PetscScalar *array;
6974             const PetscInt    *idxs_V,*idxs_all;
6975             PetscInt          i,n_V;
6976 
6977             CHKERRQ(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL));
6978             CHKERRQ(ISGetLocalSize(is_V_Sall,&n_V));
6979             CHKERRQ(ISGetIndices(is_V_Sall,&idxs_V));
6980             CHKERRQ(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all));
6981             CHKERRQ(VecGetArrayRead(pcis->D,&array));
6982             for (i=0;i<n_V;i++) {
6983               PetscScalar val;
6984               PetscInt    idx;
6985 
6986               idx = idxs_V[i];
6987               val = array[idxs_all[idxs_V[i]]];
6988               CHKERRQ(MatSetValue(S_new,idx,idx,val,INSERT_VALUES));
6989             }
6990             CHKERRQ(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY));
6991             CHKERRQ(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY));
6992             CHKERRQ(VecRestoreArrayRead(pcis->D,&array));
6993             CHKERRQ(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all));
6994             CHKERRQ(ISRestoreIndices(is_V_Sall,&idxs_V));
6995           }
6996           sub_schurs->S_Ej_all = S_new;
6997           CHKERRQ(MatDestroy(&S_new));
6998           if (sub_schurs->sum_S_Ej_all) {
6999             CHKERRQ(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new));
7000             CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_all));
7001             CHKERRQ(PetscObjectReference((PetscObject)S_new));
7002             if (pcbddc->deluxe_zerorows) {
7003               CHKERRQ(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL));
7004             }
7005             sub_schurs->sum_S_Ej_all = S_new;
7006             CHKERRQ(MatDestroy(&S_new));
7007           }
7008           CHKERRQ(ISDestroy(&is_V_Sall));
7009           CHKERRQ(MatDestroy(&tmat));
7010         }
7011         /* destroy any change of basis context in sub_schurs */
7012         if (sub_schurs && sub_schurs->change) {
7013           PetscInt i;
7014 
7015           for (i=0;i<sub_schurs->n_subs;i++) {
7016             CHKERRQ(KSPDestroy(&sub_schurs->change[i]));
7017           }
7018           CHKERRQ(PetscFree(sub_schurs->change));
7019         }
7020       }
7021       if (pcbddc->switch_static) { /* need to save the local change */
7022         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7023       } else {
7024         CHKERRQ(MatDestroy(&localChangeOfBasisMatrix));
7025       }
7026       /* determine if any process has changed the pressures locally */
7027       pcbddc->change_interior = pcbddc->benign_have_null;
7028     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7029       CHKERRQ(MatDestroy(&pcbddc->ConstraintMatrix));
7030       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7031       pcbddc->use_qr_single = qr_needed;
7032     }
7033   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7034     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7035       CHKERRQ(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7036       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7037     } else {
7038       Mat benign_global = NULL;
7039       if (pcbddc->benign_have_null) {
7040         Mat M;
7041 
7042         pcbddc->change_interior = PETSC_TRUE;
7043         CHKERRQ(VecCopy(matis->counter,pcis->vec1_N));
7044         CHKERRQ(VecReciprocal(pcis->vec1_N));
7045         CHKERRQ(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global));
7046         if (pcbddc->benign_change) {
7047           CHKERRQ(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M));
7048           CHKERRQ(MatDiagonalScale(M,pcis->vec1_N,NULL));
7049         } else {
7050           CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M));
7051           CHKERRQ(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES));
7052         }
7053         CHKERRQ(MatISSetLocalMat(benign_global,M));
7054         CHKERRQ(MatDestroy(&M));
7055         CHKERRQ(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY));
7056         CHKERRQ(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY));
7057       }
7058       if (pcbddc->user_ChangeOfBasisMatrix) {
7059         CHKERRQ(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix));
7060         CHKERRQ(MatDestroy(&benign_global));
7061       } else if (pcbddc->benign_have_null) {
7062         pcbddc->ChangeOfBasisMatrix = benign_global;
7063       }
7064     }
7065     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7066       IS             is_global;
7067       const PetscInt *gidxs;
7068 
7069       CHKERRQ(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs));
7070       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global));
7071       CHKERRQ(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs));
7072       CHKERRQ(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change));
7073       CHKERRQ(ISDestroy(&is_global));
7074     }
7075   }
7076   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7077     CHKERRQ(VecDuplicate(pcis->vec1_global,&pcbddc->work_change));
7078   }
7079 
7080   if (!pcbddc->fake_change) {
7081     /* add pressure dofs to set of primal nodes for numbering purposes */
7082     for (i=0;i<pcbddc->benign_n;i++) {
7083       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7084       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7085       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7086       pcbddc->local_primal_size_cc++;
7087       pcbddc->local_primal_size++;
7088     }
7089 
7090     /* check if a new primal space has been introduced (also take into account benign trick) */
7091     pcbddc->new_primal_space_local = PETSC_TRUE;
7092     if (olocal_primal_size == pcbddc->local_primal_size) {
7093       CHKERRQ(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local));
7094       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7095       if (!pcbddc->new_primal_space_local) {
7096         CHKERRQ(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local));
7097         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7098       }
7099     }
7100     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7101     CHKERRMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
7102   }
7103   CHKERRQ(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult));
7104 
7105   /* flush dbg viewer */
7106   if (pcbddc->dbg_flag) {
7107     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
7108   }
7109 
7110   /* free workspace */
7111   CHKERRQ(PetscBTDestroy(&qr_needed_idx));
7112   CHKERRQ(PetscBTDestroy(&change_basis));
7113   if (!pcbddc->adaptive_selection) {
7114     CHKERRQ(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n));
7115     CHKERRQ(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B));
7116   } else {
7117     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
7118                       pcbddc->adaptive_constraints_idxs_ptr,
7119                       pcbddc->adaptive_constraints_data_ptr,
7120                       pcbddc->adaptive_constraints_idxs,
7121                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
7122     CHKERRQ(PetscFree(constraints_n));
7123     CHKERRQ(PetscFree(constraints_idxs_B));
7124   }
7125   PetscFunctionReturn(0);
7126 }
7127 
7128 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7129 {
7130   ISLocalToGlobalMapping map;
7131   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7132   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
7133   PetscInt               i,N;
7134   PetscBool              rcsr = PETSC_FALSE;
7135 
7136   PetscFunctionBegin;
7137   if (pcbddc->recompute_topography) {
7138     pcbddc->graphanalyzed = PETSC_FALSE;
7139     /* Reset previously computed graph */
7140     CHKERRQ(PCBDDCGraphReset(pcbddc->mat_graph));
7141     /* Init local Graph struct */
7142     CHKERRQ(MatGetSize(pc->pmat,&N,NULL));
7143     CHKERRQ(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL));
7144     CHKERRQ(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount));
7145 
7146     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7147       CHKERRQ(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local));
7148     }
7149     /* Check validity of the csr graph passed in by the user */
7150     PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
7151 
7152     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7153     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7154       PetscInt  *xadj,*adjncy;
7155       PetscInt  nvtxs;
7156       PetscBool flg_row=PETSC_FALSE;
7157 
7158       CHKERRQ(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row));
7159       if (flg_row) {
7160         CHKERRQ(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES));
7161         pcbddc->computed_rowadj = PETSC_TRUE;
7162       }
7163       CHKERRQ(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row));
7164       rcsr = PETSC_TRUE;
7165     }
7166     if (pcbddc->dbg_flag) {
7167       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
7168     }
7169 
7170     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7171       PetscReal    *lcoords;
7172       PetscInt     n;
7173       MPI_Datatype dimrealtype;
7174 
7175       /* TODO: support for blocked */
7176       PetscCheckFalse(pcbddc->mat_graph->cnloc != pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7177       CHKERRQ(MatGetLocalSize(matis->A,&n,NULL));
7178       CHKERRQ(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords));
7179       CHKERRMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype));
7180       CHKERRMPI(MPI_Type_commit(&dimrealtype));
7181       CHKERRQ(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE));
7182       CHKERRQ(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE));
7183       CHKERRMPI(MPI_Type_free(&dimrealtype));
7184       CHKERRQ(PetscFree(pcbddc->mat_graph->coords));
7185 
7186       pcbddc->mat_graph->coords = lcoords;
7187       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7188       pcbddc->mat_graph->cnloc  = n;
7189     }
7190     PetscCheckFalse(pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7191     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7192 
7193     /* Setup of Graph */
7194     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7195     CHKERRQ(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local));
7196 
7197     /* attach info on disconnected subdomains if present */
7198     if (pcbddc->n_local_subs) {
7199       PetscInt *local_subs,n,totn;
7200 
7201       CHKERRQ(MatGetLocalSize(matis->A,&n,NULL));
7202       CHKERRQ(PetscMalloc1(n,&local_subs));
7203       for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7204       for (i=0;i<pcbddc->n_local_subs;i++) {
7205         const PetscInt *idxs;
7206         PetscInt       nl,j;
7207 
7208         CHKERRQ(ISGetLocalSize(pcbddc->local_subs[i],&nl));
7209         CHKERRQ(ISGetIndices(pcbddc->local_subs[i],&idxs));
7210         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7211         CHKERRQ(ISRestoreIndices(pcbddc->local_subs[i],&idxs));
7212       }
7213       for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7214       pcbddc->mat_graph->n_local_subs = totn + 1;
7215       pcbddc->mat_graph->local_subs = local_subs;
7216     }
7217   }
7218 
7219   if (!pcbddc->graphanalyzed) {
7220     /* Graph's connected components analysis */
7221     CHKERRQ(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7222     pcbddc->graphanalyzed = PETSC_TRUE;
7223     pcbddc->corner_selected = pcbddc->corner_selection;
7224   }
7225   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7226   PetscFunctionReturn(0);
7227 }
7228 
7229 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7230 {
7231   PetscInt       i,j,n;
7232   PetscScalar    *alphas;
7233   PetscReal      norm,*onorms;
7234 
7235   PetscFunctionBegin;
7236   n = *nio;
7237   if (!n) PetscFunctionReturn(0);
7238   CHKERRQ(PetscMalloc2(n,&alphas,n,&onorms));
7239   CHKERRQ(VecNormalize(vecs[0],&norm));
7240   if (norm < PETSC_SMALL) {
7241     onorms[0] = 0.0;
7242     CHKERRQ(VecSet(vecs[0],0.0));
7243   } else {
7244     onorms[0] = norm;
7245   }
7246 
7247   for (i=1;i<n;i++) {
7248     CHKERRQ(VecMDot(vecs[i],i,vecs,alphas));
7249     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7250     CHKERRQ(VecMAXPY(vecs[i],i,alphas,vecs));
7251     CHKERRQ(VecNormalize(vecs[i],&norm));
7252     if (norm < PETSC_SMALL) {
7253       onorms[i] = 0.0;
7254       CHKERRQ(VecSet(vecs[i],0.0));
7255     } else {
7256       onorms[i] = norm;
7257     }
7258   }
7259   /* push nonzero vectors at the beginning */
7260   for (i=0;i<n;i++) {
7261     if (onorms[i] == 0.0) {
7262       for (j=i+1;j<n;j++) {
7263         if (onorms[j] != 0.0) {
7264           CHKERRQ(VecCopy(vecs[j],vecs[i]));
7265           onorms[j] = 0.0;
7266         }
7267       }
7268     }
7269   }
7270   for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7271   CHKERRQ(PetscFree2(alphas,onorms));
7272   PetscFunctionReturn(0);
7273 }
7274 
7275 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7276 {
7277   ISLocalToGlobalMapping mapping;
7278   Mat                    A;
7279   PetscInt               n_neighs,*neighs,*n_shared,**shared;
7280   PetscMPIInt            size,rank,color;
7281   PetscInt               *xadj,*adjncy;
7282   PetscInt               *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7283   PetscInt               im_active,active_procs,N,n,i,j,threshold = 2;
7284   PetscInt               void_procs,*procs_candidates = NULL;
7285   PetscInt               xadj_count,*count;
7286   PetscBool              ismatis,use_vwgt=PETSC_FALSE;
7287   PetscSubcomm           psubcomm;
7288   MPI_Comm               subcomm;
7289 
7290   PetscFunctionBegin;
7291   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
7292   CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis));
7293   PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7294   PetscValidLogicalCollectiveInt(mat,*n_subdomains,2);
7295   PetscValidLogicalCollectiveInt(mat,redprocs,3);
7296   PetscCheckFalse(*n_subdomains <=0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
7297 
7298   if (have_void) *have_void = PETSC_FALSE;
7299   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size));
7300   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
7301   CHKERRQ(MatISGetLocalMat(mat,&A));
7302   CHKERRQ(MatGetLocalSize(A,&n,NULL));
7303   im_active = !!n;
7304   CHKERRMPI(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat)));
7305   void_procs = size - active_procs;
7306   /* get ranks of of non-active processes in mat communicator */
7307   if (void_procs) {
7308     PetscInt ncand;
7309 
7310     if (have_void) *have_void = PETSC_TRUE;
7311     CHKERRQ(PetscMalloc1(size,&procs_candidates));
7312     CHKERRMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
7313     for (i=0,ncand=0;i<size;i++) {
7314       if (!procs_candidates[i]) {
7315         procs_candidates[ncand++] = i;
7316       }
7317     }
7318     /* force n_subdomains to be not greater that the number of non-active processes */
7319     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7320   }
7321 
7322   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7323      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7324   CHKERRQ(MatGetSize(mat,&N,NULL));
7325   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7326     PetscInt issize,isidx,dest;
7327     if (*n_subdomains == 1) dest = 0;
7328     else dest = rank;
7329     if (im_active) {
7330       issize = 1;
7331       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7332         isidx = procs_candidates[dest];
7333       } else {
7334         isidx = dest;
7335       }
7336     } else {
7337       issize = 0;
7338       isidx = -1;
7339     }
7340     if (*n_subdomains != 1) *n_subdomains = active_procs;
7341     CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends));
7342     CHKERRQ(PetscFree(procs_candidates));
7343     PetscFunctionReturn(0);
7344   }
7345   CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL));
7346   CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL));
7347   threshold = PetscMax(threshold,2);
7348 
7349   /* Get info on mapping */
7350   CHKERRQ(MatISGetLocalToGlobalMapping(mat,&mapping,NULL));
7351   CHKERRQ(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared));
7352 
7353   /* build local CSR graph of subdomains' connectivity */
7354   CHKERRQ(PetscMalloc1(2,&xadj));
7355   xadj[0] = 0;
7356   xadj[1] = PetscMax(n_neighs-1,0);
7357   CHKERRQ(PetscMalloc1(xadj[1],&adjncy));
7358   CHKERRQ(PetscMalloc1(xadj[1],&adjncy_wgt));
7359   CHKERRQ(PetscCalloc1(n,&count));
7360   for (i=1;i<n_neighs;i++)
7361     for (j=0;j<n_shared[i];j++)
7362       count[shared[i][j]] += 1;
7363 
7364   xadj_count = 0;
7365   for (i=1;i<n_neighs;i++) {
7366     for (j=0;j<n_shared[i];j++) {
7367       if (count[shared[i][j]] < threshold) {
7368         adjncy[xadj_count] = neighs[i];
7369         adjncy_wgt[xadj_count] = n_shared[i];
7370         xadj_count++;
7371         break;
7372       }
7373     }
7374   }
7375   xadj[1] = xadj_count;
7376   CHKERRQ(PetscFree(count));
7377   CHKERRQ(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared));
7378   CHKERRQ(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt));
7379 
7380   CHKERRQ(PetscMalloc1(1,&ranks_send_to_idx));
7381 
7382   /* Restrict work on active processes only */
7383   CHKERRQ(PetscMPIIntCast(im_active,&color));
7384   if (void_procs) {
7385     CHKERRQ(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm));
7386     CHKERRQ(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */
7387     CHKERRQ(PetscSubcommSetTypeGeneral(psubcomm,color,rank));
7388     subcomm = PetscSubcommChild(psubcomm);
7389   } else {
7390     psubcomm = NULL;
7391     subcomm = PetscObjectComm((PetscObject)mat);
7392   }
7393 
7394   v_wgt = NULL;
7395   if (!color) {
7396     CHKERRQ(PetscFree(xadj));
7397     CHKERRQ(PetscFree(adjncy));
7398     CHKERRQ(PetscFree(adjncy_wgt));
7399   } else {
7400     Mat             subdomain_adj;
7401     IS              new_ranks,new_ranks_contig;
7402     MatPartitioning partitioner;
7403     PetscInt        rstart=0,rend=0;
7404     PetscInt        *is_indices,*oldranks;
7405     PetscMPIInt     size;
7406     PetscBool       aggregate;
7407 
7408     CHKERRMPI(MPI_Comm_size(subcomm,&size));
7409     if (void_procs) {
7410       PetscInt prank = rank;
7411       CHKERRQ(PetscMalloc1(size,&oldranks));
7412       CHKERRMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm));
7413       for (i=0;i<xadj[1];i++) {
7414         CHKERRQ(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]));
7415       }
7416       CHKERRQ(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt));
7417     } else {
7418       oldranks = NULL;
7419     }
7420     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7421     if (aggregate) { /* TODO: all this part could be made more efficient */
7422       PetscInt    lrows,row,ncols,*cols;
7423       PetscMPIInt nrank;
7424       PetscScalar *vals;
7425 
7426       CHKERRMPI(MPI_Comm_rank(subcomm,&nrank));
7427       lrows = 0;
7428       if (nrank<redprocs) {
7429         lrows = size/redprocs;
7430         if (nrank<size%redprocs) lrows++;
7431       }
7432       CHKERRQ(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj));
7433       CHKERRQ(MatGetOwnershipRange(subdomain_adj,&rstart,&rend));
7434       CHKERRQ(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
7435       CHKERRQ(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
7436       row = nrank;
7437       ncols = xadj[1]-xadj[0];
7438       cols = adjncy;
7439       CHKERRQ(PetscMalloc1(ncols,&vals));
7440       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7441       CHKERRQ(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES));
7442       CHKERRQ(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY));
7443       CHKERRQ(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY));
7444       CHKERRQ(PetscFree(xadj));
7445       CHKERRQ(PetscFree(adjncy));
7446       CHKERRQ(PetscFree(adjncy_wgt));
7447       CHKERRQ(PetscFree(vals));
7448       if (use_vwgt) {
7449         Vec               v;
7450         const PetscScalar *array;
7451         PetscInt          nl;
7452 
7453         CHKERRQ(MatCreateVecs(subdomain_adj,&v,NULL));
7454         CHKERRQ(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES));
7455         CHKERRQ(VecAssemblyBegin(v));
7456         CHKERRQ(VecAssemblyEnd(v));
7457         CHKERRQ(VecGetLocalSize(v,&nl));
7458         CHKERRQ(VecGetArrayRead(v,&array));
7459         CHKERRQ(PetscMalloc1(nl,&v_wgt));
7460         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7461         CHKERRQ(VecRestoreArrayRead(v,&array));
7462         CHKERRQ(VecDestroy(&v));
7463       }
7464     } else {
7465       CHKERRQ(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj));
7466       if (use_vwgt) {
7467         CHKERRQ(PetscMalloc1(1,&v_wgt));
7468         v_wgt[0] = n;
7469       }
7470     }
7471     /* CHKERRQ(MatView(subdomain_adj,0)); */
7472 
7473     /* Partition */
7474     CHKERRQ(MatPartitioningCreate(subcomm,&partitioner));
7475 #if defined(PETSC_HAVE_PTSCOTCH)
7476     CHKERRQ(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH));
7477 #elif defined(PETSC_HAVE_PARMETIS)
7478     CHKERRQ(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS));
7479 #else
7480     CHKERRQ(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE));
7481 #endif
7482     CHKERRQ(MatPartitioningSetAdjacency(partitioner,subdomain_adj));
7483     if (v_wgt) {
7484       CHKERRQ(MatPartitioningSetVertexWeights(partitioner,v_wgt));
7485     }
7486     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7487     CHKERRQ(MatPartitioningSetNParts(partitioner,*n_subdomains));
7488     CHKERRQ(MatPartitioningSetFromOptions(partitioner));
7489     CHKERRQ(MatPartitioningApply(partitioner,&new_ranks));
7490     /* CHKERRQ(MatPartitioningView(partitioner,0)); */
7491 
7492     /* renumber new_ranks to avoid "holes" in new set of processors */
7493     CHKERRQ(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig));
7494     CHKERRQ(ISDestroy(&new_ranks));
7495     CHKERRQ(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices));
7496     if (!aggregate) {
7497       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7498         PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7499         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7500       } else if (oldranks) {
7501         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7502       } else {
7503         ranks_send_to_idx[0] = is_indices[0];
7504       }
7505     } else {
7506       PetscInt    idx = 0;
7507       PetscMPIInt tag;
7508       MPI_Request *reqs;
7509 
7510       CHKERRQ(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag));
7511       CHKERRQ(PetscMalloc1(rend-rstart,&reqs));
7512       for (i=rstart;i<rend;i++) {
7513         CHKERRMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]));
7514       }
7515       CHKERRMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE));
7516       CHKERRMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE));
7517       CHKERRQ(PetscFree(reqs));
7518       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7519         PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7520         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7521       } else if (oldranks) {
7522         ranks_send_to_idx[0] = oldranks[idx];
7523       } else {
7524         ranks_send_to_idx[0] = idx;
7525       }
7526     }
7527     CHKERRQ(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices));
7528     /* clean up */
7529     CHKERRQ(PetscFree(oldranks));
7530     CHKERRQ(ISDestroy(&new_ranks_contig));
7531     CHKERRQ(MatDestroy(&subdomain_adj));
7532     CHKERRQ(MatPartitioningDestroy(&partitioner));
7533   }
7534   CHKERRQ(PetscSubcommDestroy(&psubcomm));
7535   CHKERRQ(PetscFree(procs_candidates));
7536 
7537   /* assemble parallel IS for sends */
7538   i = 1;
7539   if (!color) i=0;
7540   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends));
7541   PetscFunctionReturn(0);
7542 }
7543 
7544 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7545 
7546 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7547 {
7548   Mat                    local_mat;
7549   IS                     is_sends_internal;
7550   PetscInt               rows,cols,new_local_rows;
7551   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7552   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7553   ISLocalToGlobalMapping l2gmap;
7554   PetscInt*              l2gmap_indices;
7555   const PetscInt*        is_indices;
7556   MatType                new_local_type;
7557   /* buffers */
7558   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7559   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7560   PetscInt               *recv_buffer_idxs_local;
7561   PetscScalar            *ptr_vals,*recv_buffer_vals;
7562   const PetscScalar      *send_buffer_vals;
7563   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7564   /* MPI */
7565   MPI_Comm               comm,comm_n;
7566   PetscSubcomm           subcomm;
7567   PetscMPIInt            n_sends,n_recvs,size;
7568   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7569   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7570   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7571   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7572   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7573 
7574   PetscFunctionBegin;
7575   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
7576   CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis));
7577   PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7578   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
7579   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
7580   PetscValidLogicalCollectiveBool(mat,restrict_full,5);
7581   PetscValidLogicalCollectiveBool(mat,reuse,6);
7582   PetscValidLogicalCollectiveInt(mat,nis,8);
7583   PetscValidLogicalCollectiveInt(mat,nvecs,10);
7584   if (nvecs) {
7585     PetscCheckFalse(nvecs > 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7586     PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11);
7587   }
7588   /* further checks */
7589   CHKERRQ(MatISGetLocalMat(mat,&local_mat));
7590   CHKERRQ(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense));
7591   PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7592   CHKERRQ(MatGetSize(local_mat,&rows,&cols));
7593   PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7594   if (reuse && *mat_n) {
7595     PetscInt mrows,mcols,mnrows,mncols;
7596     PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7);
7597     CHKERRQ(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis));
7598     PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7599     CHKERRQ(MatGetSize(mat,&mrows,&mcols));
7600     CHKERRQ(MatGetSize(*mat_n,&mnrows,&mncols));
7601     PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7602     PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7603   }
7604   CHKERRQ(MatGetBlockSize(local_mat,&bs));
7605   PetscValidLogicalCollectiveInt(mat,bs,1);
7606 
7607   /* prepare IS for sending if not provided */
7608   if (!is_sends) {
7609     PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7610     CHKERRQ(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL));
7611   } else {
7612     CHKERRQ(PetscObjectReference((PetscObject)is_sends));
7613     is_sends_internal = is_sends;
7614   }
7615 
7616   /* get comm */
7617   CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
7618 
7619   /* compute number of sends */
7620   CHKERRQ(ISGetLocalSize(is_sends_internal,&i));
7621   CHKERRQ(PetscMPIIntCast(i,&n_sends));
7622 
7623   /* compute number of receives */
7624   CHKERRMPI(MPI_Comm_size(comm,&size));
7625   CHKERRQ(PetscMalloc1(size,&iflags));
7626   CHKERRQ(PetscArrayzero(iflags,size));
7627   CHKERRQ(ISGetIndices(is_sends_internal,&is_indices));
7628   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7629   CHKERRQ(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs));
7630   CHKERRQ(PetscFree(iflags));
7631 
7632   /* restrict comm if requested */
7633   subcomm = NULL;
7634   destroy_mat = PETSC_FALSE;
7635   if (restrict_comm) {
7636     PetscMPIInt color,subcommsize;
7637 
7638     color = 0;
7639     if (restrict_full) {
7640       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7641     } else {
7642       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7643     }
7644     CHKERRMPI(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm));
7645     subcommsize = size - subcommsize;
7646     /* check if reuse has been requested */
7647     if (reuse) {
7648       if (*mat_n) {
7649         PetscMPIInt subcommsize2;
7650         CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2));
7651         PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7652         comm_n = PetscObjectComm((PetscObject)*mat_n);
7653       } else {
7654         comm_n = PETSC_COMM_SELF;
7655       }
7656     } else { /* MAT_INITIAL_MATRIX */
7657       PetscMPIInt rank;
7658 
7659       CHKERRMPI(MPI_Comm_rank(comm,&rank));
7660       CHKERRQ(PetscSubcommCreate(comm,&subcomm));
7661       CHKERRQ(PetscSubcommSetNumber(subcomm,2));
7662       CHKERRQ(PetscSubcommSetTypeGeneral(subcomm,color,rank));
7663       comm_n = PetscSubcommChild(subcomm);
7664     }
7665     /* flag to destroy *mat_n if not significative */
7666     if (color) destroy_mat = PETSC_TRUE;
7667   } else {
7668     comm_n = comm;
7669   }
7670 
7671   /* prepare send/receive buffers */
7672   CHKERRQ(PetscMalloc1(size,&ilengths_idxs));
7673   CHKERRQ(PetscArrayzero(ilengths_idxs,size));
7674   CHKERRQ(PetscMalloc1(size,&ilengths_vals));
7675   CHKERRQ(PetscArrayzero(ilengths_vals,size));
7676   if (nis) {
7677     CHKERRQ(PetscCalloc1(size,&ilengths_idxs_is));
7678   }
7679 
7680   /* Get data from local matrices */
7681   PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7682     /* TODO: See below some guidelines on how to prepare the local buffers */
7683     /*
7684        send_buffer_vals should contain the raw values of the local matrix
7685        send_buffer_idxs should contain:
7686        - MatType_PRIVATE type
7687        - PetscInt        size_of_l2gmap
7688        - PetscInt        global_row_indices[size_of_l2gmap]
7689        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7690     */
7691   {
7692     ISLocalToGlobalMapping mapping;
7693 
7694     CHKERRQ(MatISGetLocalToGlobalMapping(mat,&mapping,NULL));
7695     CHKERRQ(MatDenseGetArrayRead(local_mat,&send_buffer_vals));
7696     CHKERRQ(ISLocalToGlobalMappingGetSize(mapping,&i));
7697     CHKERRQ(PetscMalloc1(i+2,&send_buffer_idxs));
7698     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7699     send_buffer_idxs[1] = i;
7700     CHKERRQ(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs));
7701     CHKERRQ(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i));
7702     CHKERRQ(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs));
7703     CHKERRQ(PetscMPIIntCast(i,&len));
7704     for (i=0;i<n_sends;i++) {
7705       ilengths_vals[is_indices[i]] = len*len;
7706       ilengths_idxs[is_indices[i]] = len+2;
7707     }
7708   }
7709   CHKERRQ(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals));
7710   /* additional is (if any) */
7711   if (nis) {
7712     PetscMPIInt psum;
7713     PetscInt j;
7714     for (j=0,psum=0;j<nis;j++) {
7715       PetscInt plen;
7716       CHKERRQ(ISGetLocalSize(isarray[j],&plen));
7717       CHKERRQ(PetscMPIIntCast(plen,&len));
7718       psum += len+1; /* indices + lenght */
7719     }
7720     CHKERRQ(PetscMalloc1(psum,&send_buffer_idxs_is));
7721     for (j=0,psum=0;j<nis;j++) {
7722       PetscInt plen;
7723       const PetscInt *is_array_idxs;
7724       CHKERRQ(ISGetLocalSize(isarray[j],&plen));
7725       send_buffer_idxs_is[psum] = plen;
7726       CHKERRQ(ISGetIndices(isarray[j],&is_array_idxs));
7727       CHKERRQ(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen));
7728       CHKERRQ(ISRestoreIndices(isarray[j],&is_array_idxs));
7729       psum += plen+1; /* indices + lenght */
7730     }
7731     for (i=0;i<n_sends;i++) {
7732       ilengths_idxs_is[is_indices[i]] = psum;
7733     }
7734     CHKERRQ(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is));
7735   }
7736   CHKERRQ(MatISRestoreLocalMat(mat,&local_mat));
7737 
7738   buf_size_idxs = 0;
7739   buf_size_vals = 0;
7740   buf_size_idxs_is = 0;
7741   buf_size_vecs = 0;
7742   for (i=0;i<n_recvs;i++) {
7743     buf_size_idxs += (PetscInt)olengths_idxs[i];
7744     buf_size_vals += (PetscInt)olengths_vals[i];
7745     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7746     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7747   }
7748   CHKERRQ(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs));
7749   CHKERRQ(PetscMalloc1(buf_size_vals,&recv_buffer_vals));
7750   CHKERRQ(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is));
7751   CHKERRQ(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs));
7752 
7753   /* get new tags for clean communications */
7754   CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs));
7755   CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_vals));
7756   CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is));
7757   CHKERRQ(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs));
7758 
7759   /* allocate for requests */
7760   CHKERRQ(PetscMalloc1(n_sends,&send_req_idxs));
7761   CHKERRQ(PetscMalloc1(n_sends,&send_req_vals));
7762   CHKERRQ(PetscMalloc1(n_sends,&send_req_idxs_is));
7763   CHKERRQ(PetscMalloc1(n_sends,&send_req_vecs));
7764   CHKERRQ(PetscMalloc1(n_recvs,&recv_req_idxs));
7765   CHKERRQ(PetscMalloc1(n_recvs,&recv_req_vals));
7766   CHKERRQ(PetscMalloc1(n_recvs,&recv_req_idxs_is));
7767   CHKERRQ(PetscMalloc1(n_recvs,&recv_req_vecs));
7768 
7769   /* communications */
7770   ptr_idxs = recv_buffer_idxs;
7771   ptr_vals = recv_buffer_vals;
7772   ptr_idxs_is = recv_buffer_idxs_is;
7773   ptr_vecs = recv_buffer_vecs;
7774   for (i=0;i<n_recvs;i++) {
7775     source_dest = onodes[i];
7776     CHKERRMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]));
7777     CHKERRMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]));
7778     ptr_idxs += olengths_idxs[i];
7779     ptr_vals += olengths_vals[i];
7780     if (nis) {
7781       source_dest = onodes_is[i];
7782       CHKERRMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]));
7783       ptr_idxs_is += olengths_idxs_is[i];
7784     }
7785     if (nvecs) {
7786       source_dest = onodes[i];
7787       CHKERRMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]));
7788       ptr_vecs += olengths_idxs[i]-2;
7789     }
7790   }
7791   for (i=0;i<n_sends;i++) {
7792     CHKERRQ(PetscMPIIntCast(is_indices[i],&source_dest));
7793     CHKERRMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]));
7794     CHKERRMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]));
7795     if (nis) {
7796       CHKERRMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]));
7797     }
7798     if (nvecs) {
7799       CHKERRQ(VecGetArray(nnsp_vec[0],&send_buffer_vecs));
7800       CHKERRMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]));
7801     }
7802   }
7803   CHKERRQ(ISRestoreIndices(is_sends_internal,&is_indices));
7804   CHKERRQ(ISDestroy(&is_sends_internal));
7805 
7806   /* assemble new l2g map */
7807   CHKERRMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE));
7808   ptr_idxs = recv_buffer_idxs;
7809   new_local_rows = 0;
7810   for (i=0;i<n_recvs;i++) {
7811     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7812     ptr_idxs += olengths_idxs[i];
7813   }
7814   CHKERRQ(PetscMalloc1(new_local_rows,&l2gmap_indices));
7815   ptr_idxs = recv_buffer_idxs;
7816   new_local_rows = 0;
7817   for (i=0;i<n_recvs;i++) {
7818     CHKERRQ(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1)));
7819     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7820     ptr_idxs += olengths_idxs[i];
7821   }
7822   CHKERRQ(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices));
7823   CHKERRQ(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap));
7824   CHKERRQ(PetscFree(l2gmap_indices));
7825 
7826   /* infer new local matrix type from received local matrices type */
7827   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7828   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7829   if (n_recvs) {
7830     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7831     ptr_idxs = recv_buffer_idxs;
7832     for (i=0;i<n_recvs;i++) {
7833       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7834         new_local_type_private = MATAIJ_PRIVATE;
7835         break;
7836       }
7837       ptr_idxs += olengths_idxs[i];
7838     }
7839     switch (new_local_type_private) {
7840       case MATDENSE_PRIVATE:
7841         new_local_type = MATSEQAIJ;
7842         bs = 1;
7843         break;
7844       case MATAIJ_PRIVATE:
7845         new_local_type = MATSEQAIJ;
7846         bs = 1;
7847         break;
7848       case MATBAIJ_PRIVATE:
7849         new_local_type = MATSEQBAIJ;
7850         break;
7851       case MATSBAIJ_PRIVATE:
7852         new_local_type = MATSEQSBAIJ;
7853         break;
7854       default:
7855         SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7856     }
7857   } else { /* by default, new_local_type is seqaij */
7858     new_local_type = MATSEQAIJ;
7859     bs = 1;
7860   }
7861 
7862   /* create MATIS object if needed */
7863   if (!reuse) {
7864     CHKERRQ(MatGetSize(mat,&rows,&cols));
7865     CHKERRQ(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n));
7866   } else {
7867     /* it also destroys the local matrices */
7868     if (*mat_n) {
7869       CHKERRQ(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap));
7870     } else { /* this is a fake object */
7871       CHKERRQ(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n));
7872     }
7873   }
7874   CHKERRQ(MatISGetLocalMat(*mat_n,&local_mat));
7875   CHKERRQ(MatSetType(local_mat,new_local_type));
7876 
7877   CHKERRMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE));
7878 
7879   /* Global to local map of received indices */
7880   CHKERRQ(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */
7881   CHKERRQ(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local));
7882   CHKERRQ(ISLocalToGlobalMappingDestroy(&l2gmap));
7883 
7884   /* restore attributes -> type of incoming data and its size */
7885   buf_size_idxs = 0;
7886   for (i=0;i<n_recvs;i++) {
7887     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7888     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7889     buf_size_idxs += (PetscInt)olengths_idxs[i];
7890   }
7891   CHKERRQ(PetscFree(recv_buffer_idxs));
7892 
7893   /* set preallocation */
7894   CHKERRQ(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense));
7895   if (!newisdense) {
7896     PetscInt *new_local_nnz=NULL;
7897 
7898     ptr_idxs = recv_buffer_idxs_local;
7899     if (n_recvs) {
7900       CHKERRQ(PetscCalloc1(new_local_rows,&new_local_nnz));
7901     }
7902     for (i=0;i<n_recvs;i++) {
7903       PetscInt j;
7904       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7905         for (j=0;j<*(ptr_idxs+1);j++) {
7906           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7907         }
7908       } else {
7909         /* TODO */
7910       }
7911       ptr_idxs += olengths_idxs[i];
7912     }
7913     if (new_local_nnz) {
7914       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7915       CHKERRQ(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz));
7916       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7917       CHKERRQ(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz));
7918       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7919       CHKERRQ(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz));
7920     } else {
7921       CHKERRQ(MatSetUp(local_mat));
7922     }
7923     CHKERRQ(PetscFree(new_local_nnz));
7924   } else {
7925     CHKERRQ(MatSetUp(local_mat));
7926   }
7927 
7928   /* set values */
7929   ptr_vals = recv_buffer_vals;
7930   ptr_idxs = recv_buffer_idxs_local;
7931   for (i=0;i<n_recvs;i++) {
7932     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7933       CHKERRQ(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE));
7934       CHKERRQ(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES));
7935       CHKERRQ(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY));
7936       CHKERRQ(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY));
7937       CHKERRQ(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE));
7938     } else {
7939       /* TODO */
7940     }
7941     ptr_idxs += olengths_idxs[i];
7942     ptr_vals += olengths_vals[i];
7943   }
7944   CHKERRQ(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY));
7945   CHKERRQ(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY));
7946   CHKERRQ(MatISRestoreLocalMat(*mat_n,&local_mat));
7947   CHKERRQ(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY));
7948   CHKERRQ(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY));
7949   CHKERRQ(PetscFree(recv_buffer_vals));
7950 
7951 #if 0
7952   if (!restrict_comm) { /* check */
7953     Vec       lvec,rvec;
7954     PetscReal infty_error;
7955 
7956     CHKERRQ(MatCreateVecs(mat,&rvec,&lvec));
7957     CHKERRQ(VecSetRandom(rvec,NULL));
7958     CHKERRQ(MatMult(mat,rvec,lvec));
7959     CHKERRQ(VecScale(lvec,-1.0));
7960     CHKERRQ(MatMultAdd(*mat_n,rvec,lvec,lvec));
7961     CHKERRQ(VecNorm(lvec,NORM_INFINITY,&infty_error));
7962     CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
7963     CHKERRQ(VecDestroy(&rvec));
7964     CHKERRQ(VecDestroy(&lvec));
7965   }
7966 #endif
7967 
7968   /* assemble new additional is (if any) */
7969   if (nis) {
7970     PetscInt **temp_idxs,*count_is,j,psum;
7971 
7972     CHKERRMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE));
7973     CHKERRQ(PetscCalloc1(nis,&count_is));
7974     ptr_idxs = recv_buffer_idxs_is;
7975     psum = 0;
7976     for (i=0;i<n_recvs;i++) {
7977       for (j=0;j<nis;j++) {
7978         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7979         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7980         psum += plen;
7981         ptr_idxs += plen+1; /* shift pointer to received data */
7982       }
7983     }
7984     CHKERRQ(PetscMalloc1(nis,&temp_idxs));
7985     CHKERRQ(PetscMalloc1(psum,&temp_idxs[0]));
7986     for (i=1;i<nis;i++) {
7987       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7988     }
7989     CHKERRQ(PetscArrayzero(count_is,nis));
7990     ptr_idxs = recv_buffer_idxs_is;
7991     for (i=0;i<n_recvs;i++) {
7992       for (j=0;j<nis;j++) {
7993         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7994         CHKERRQ(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen));
7995         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7996         ptr_idxs += plen+1; /* shift pointer to received data */
7997       }
7998     }
7999     for (i=0;i<nis;i++) {
8000       CHKERRQ(ISDestroy(&isarray[i]));
8001       CHKERRQ(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]));
8002       CHKERRQ(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]));
8003     }
8004     CHKERRQ(PetscFree(count_is));
8005     CHKERRQ(PetscFree(temp_idxs[0]));
8006     CHKERRQ(PetscFree(temp_idxs));
8007   }
8008   /* free workspace */
8009   CHKERRQ(PetscFree(recv_buffer_idxs_is));
8010   CHKERRMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE));
8011   CHKERRQ(PetscFree(send_buffer_idxs));
8012   CHKERRMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE));
8013   if (isdense) {
8014     CHKERRQ(MatISGetLocalMat(mat,&local_mat));
8015     CHKERRQ(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals));
8016     CHKERRQ(MatISRestoreLocalMat(mat,&local_mat));
8017   } else {
8018     /* CHKERRQ(PetscFree(send_buffer_vals)); */
8019   }
8020   if (nis) {
8021     CHKERRMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE));
8022     CHKERRQ(PetscFree(send_buffer_idxs_is));
8023   }
8024 
8025   if (nvecs) {
8026     CHKERRMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE));
8027     CHKERRMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE));
8028     CHKERRQ(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs));
8029     CHKERRQ(VecDestroy(&nnsp_vec[0]));
8030     CHKERRQ(VecCreate(comm_n,&nnsp_vec[0]));
8031     CHKERRQ(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE));
8032     CHKERRQ(VecSetType(nnsp_vec[0],VECSTANDARD));
8033     /* set values */
8034     ptr_vals = recv_buffer_vecs;
8035     ptr_idxs = recv_buffer_idxs_local;
8036     CHKERRQ(VecGetArray(nnsp_vec[0],&send_buffer_vecs));
8037     for (i=0;i<n_recvs;i++) {
8038       PetscInt j;
8039       for (j=0;j<*(ptr_idxs+1);j++) {
8040         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8041       }
8042       ptr_idxs += olengths_idxs[i];
8043       ptr_vals += olengths_idxs[i]-2;
8044     }
8045     CHKERRQ(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs));
8046     CHKERRQ(VecAssemblyBegin(nnsp_vec[0]));
8047     CHKERRQ(VecAssemblyEnd(nnsp_vec[0]));
8048   }
8049 
8050   CHKERRQ(PetscFree(recv_buffer_vecs));
8051   CHKERRQ(PetscFree(recv_buffer_idxs_local));
8052   CHKERRQ(PetscFree(recv_req_idxs));
8053   CHKERRQ(PetscFree(recv_req_vals));
8054   CHKERRQ(PetscFree(recv_req_vecs));
8055   CHKERRQ(PetscFree(recv_req_idxs_is));
8056   CHKERRQ(PetscFree(send_req_idxs));
8057   CHKERRQ(PetscFree(send_req_vals));
8058   CHKERRQ(PetscFree(send_req_vecs));
8059   CHKERRQ(PetscFree(send_req_idxs_is));
8060   CHKERRQ(PetscFree(ilengths_vals));
8061   CHKERRQ(PetscFree(ilengths_idxs));
8062   CHKERRQ(PetscFree(olengths_vals));
8063   CHKERRQ(PetscFree(olengths_idxs));
8064   CHKERRQ(PetscFree(onodes));
8065   if (nis) {
8066     CHKERRQ(PetscFree(ilengths_idxs_is));
8067     CHKERRQ(PetscFree(olengths_idxs_is));
8068     CHKERRQ(PetscFree(onodes_is));
8069   }
8070   CHKERRQ(PetscSubcommDestroy(&subcomm));
8071   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
8072     CHKERRQ(MatDestroy(mat_n));
8073     for (i=0;i<nis;i++) {
8074       CHKERRQ(ISDestroy(&isarray[i]));
8075     }
8076     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8077       CHKERRQ(VecDestroy(&nnsp_vec[0]));
8078     }
8079     *mat_n = NULL;
8080   }
8081   PetscFunctionReturn(0);
8082 }
8083 
8084 /* temporary hack into ksp private data structure */
8085 #include <petsc/private/kspimpl.h>
8086 
8087 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8088 {
8089   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
8090   PC_IS                  *pcis = (PC_IS*)pc->data;
8091   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
8092   Mat                    coarsedivudotp = NULL;
8093   Mat                    coarseG,t_coarse_mat_is;
8094   MatNullSpace           CoarseNullSpace = NULL;
8095   ISLocalToGlobalMapping coarse_islg;
8096   IS                     coarse_is,*isarray,corners;
8097   PetscInt               i,im_active=-1,active_procs=-1;
8098   PetscInt               nis,nisdofs,nisneu,nisvert;
8099   PetscInt               coarse_eqs_per_proc;
8100   PC                     pc_temp;
8101   PCType                 coarse_pc_type;
8102   KSPType                coarse_ksp_type;
8103   PetscBool              multilevel_requested,multilevel_allowed;
8104   PetscBool              coarse_reuse;
8105   PetscInt               ncoarse,nedcfield;
8106   PetscBool              compute_vecs = PETSC_FALSE;
8107   PetscScalar            *array;
8108   MatReuse               coarse_mat_reuse;
8109   PetscBool              restr, full_restr, have_void;
8110   PetscMPIInt            size;
8111   PetscErrorCode         ierr;
8112 
8113   PetscFunctionBegin;
8114   CHKERRQ(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0));
8115   /* Assign global numbering to coarse dofs */
8116   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8117     PetscInt ocoarse_size;
8118     compute_vecs = PETSC_TRUE;
8119 
8120     pcbddc->new_primal_space = PETSC_TRUE;
8121     ocoarse_size = pcbddc->coarse_size;
8122     CHKERRQ(PetscFree(pcbddc->global_primal_indices));
8123     CHKERRQ(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices));
8124     /* see if we can avoid some work */
8125     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8126       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8127       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8128         CHKERRQ(KSPReset(pcbddc->coarse_ksp));
8129         coarse_reuse = PETSC_FALSE;
8130       } else { /* we can safely reuse already computed coarse matrix */
8131         coarse_reuse = PETSC_TRUE;
8132       }
8133     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8134       coarse_reuse = PETSC_FALSE;
8135     }
8136     /* reset any subassembling information */
8137     if (!coarse_reuse || pcbddc->recompute_topography) {
8138       CHKERRQ(ISDestroy(&pcbddc->coarse_subassembling));
8139     }
8140   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8141     coarse_reuse = PETSC_TRUE;
8142   }
8143   if (coarse_reuse && pcbddc->coarse_ksp) {
8144     CHKERRQ(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL));
8145     CHKERRQ(PetscObjectReference((PetscObject)coarse_mat));
8146     coarse_mat_reuse = MAT_REUSE_MATRIX;
8147   } else {
8148     coarse_mat = NULL;
8149     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8150   }
8151 
8152   /* creates temporary l2gmap and IS for coarse indexes */
8153   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is));
8154   CHKERRQ(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg));
8155 
8156   /* creates temporary MATIS object for coarse matrix */
8157   CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense));
8158   CHKERRQ(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is));
8159   CHKERRQ(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense));
8160   CHKERRQ(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY));
8161   CHKERRQ(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY));
8162   CHKERRQ(MatDestroy(&coarse_submat_dense));
8163 
8164   /* count "active" (i.e. with positive local size) and "void" processes */
8165   im_active = !!(pcis->n);
8166   CHKERRMPI(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc)));
8167 
8168   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8169   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8170   /* full_restr : just use the receivers from the subassembling pattern */
8171   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size));
8172   coarse_mat_is        = NULL;
8173   multilevel_allowed   = PETSC_FALSE;
8174   multilevel_requested = PETSC_FALSE;
8175   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8176   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8177   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8178   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8179   if (multilevel_requested) {
8180     ncoarse    = active_procs/pcbddc->coarsening_ratio;
8181     restr      = PETSC_FALSE;
8182     full_restr = PETSC_FALSE;
8183   } else {
8184     ncoarse    = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8185     restr      = PETSC_TRUE;
8186     full_restr = PETSC_TRUE;
8187   }
8188   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8189   ncoarse = PetscMax(1,ncoarse);
8190   if (!pcbddc->coarse_subassembling) {
8191     if (pcbddc->coarsening_ratio > 1) {
8192       if (multilevel_requested) {
8193         CHKERRQ(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void));
8194       } else {
8195         CHKERRQ(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void));
8196       }
8197     } else {
8198       PetscMPIInt rank;
8199 
8200       CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank));
8201       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8202       CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling));
8203     }
8204   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8205     PetscInt    psum;
8206     if (pcbddc->coarse_ksp) psum = 1;
8207     else psum = 0;
8208     CHKERRMPI(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc)));
8209     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8210   }
8211   /* determine if we can go multilevel */
8212   if (multilevel_requested) {
8213     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8214     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8215   }
8216   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8217 
8218   /* dump subassembling pattern */
8219   if (pcbddc->dbg_flag && multilevel_allowed) {
8220     CHKERRQ(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer));
8221   }
8222   /* compute dofs splitting and neumann boundaries for coarse dofs */
8223   nedcfield = -1;
8224   corners = NULL;
8225   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8226     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
8227     const PetscInt         *idxs;
8228     ISLocalToGlobalMapping tmap;
8229 
8230     /* create map between primal indices (in local representative ordering) and local primal numbering */
8231     CHKERRQ(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap));
8232     /* allocate space for temporary storage */
8233     CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&tidxs));
8234     CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&tidxs2));
8235     /* allocate for IS array */
8236     nisdofs = pcbddc->n_ISForDofsLocal;
8237     if (pcbddc->nedclocal) {
8238       if (pcbddc->nedfield > -1) {
8239         nedcfield = pcbddc->nedfield;
8240       } else {
8241         nedcfield = 0;
8242         PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8243         nisdofs = 1;
8244       }
8245     }
8246     nisneu = !!pcbddc->NeumannBoundariesLocal;
8247     nisvert = 0; /* nisvert is not used */
8248     nis = nisdofs + nisneu + nisvert;
8249     CHKERRQ(PetscMalloc1(nis,&isarray));
8250     /* dofs splitting */
8251     for (i=0;i<nisdofs;i++) {
8252       /* CHKERRQ(ISView(pcbddc->ISForDofsLocal[i],0)); */
8253       if (nedcfield != i) {
8254         CHKERRQ(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize));
8255         CHKERRQ(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs));
8256         CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs));
8257         CHKERRQ(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs));
8258       } else {
8259         CHKERRQ(ISGetLocalSize(pcbddc->nedclocal,&tsize));
8260         CHKERRQ(ISGetIndices(pcbddc->nedclocal,&idxs));
8261         CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs));
8262         PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8263         CHKERRQ(ISRestoreIndices(pcbddc->nedclocal,&idxs));
8264       }
8265       CHKERRQ(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2));
8266       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]));
8267       /* CHKERRQ(ISView(isarray[i],0)); */
8268     }
8269     /* neumann boundaries */
8270     if (pcbddc->NeumannBoundariesLocal) {
8271       /* CHKERRQ(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8272       CHKERRQ(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize));
8273       CHKERRQ(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs));
8274       CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs));
8275       CHKERRQ(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs));
8276       CHKERRQ(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2));
8277       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]));
8278       /* CHKERRQ(ISView(isarray[nisdofs],0)); */
8279     }
8280     /* coordinates */
8281     if (pcbddc->corner_selected) {
8282       CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners));
8283       CHKERRQ(ISGetLocalSize(corners,&tsize));
8284       CHKERRQ(ISGetIndices(corners,&idxs));
8285       CHKERRQ(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs));
8286       PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8287       CHKERRQ(ISRestoreIndices(corners,&idxs));
8288       CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners));
8289       CHKERRQ(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2));
8290       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners));
8291     }
8292     CHKERRQ(PetscFree(tidxs));
8293     CHKERRQ(PetscFree(tidxs2));
8294     CHKERRQ(ISLocalToGlobalMappingDestroy(&tmap));
8295   } else {
8296     nis = 0;
8297     nisdofs = 0;
8298     nisneu = 0;
8299     nisvert = 0;
8300     isarray = NULL;
8301   }
8302   /* destroy no longer needed map */
8303   CHKERRQ(ISLocalToGlobalMappingDestroy(&coarse_islg));
8304 
8305   /* subassemble */
8306   if (multilevel_allowed) {
8307     Vec       vp[1];
8308     PetscInt  nvecs = 0;
8309     PetscBool reuse,reuser;
8310 
8311     if (coarse_mat) reuse = PETSC_TRUE;
8312     else reuse = PETSC_FALSE;
8313     CHKERRMPI(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
8314     vp[0] = NULL;
8315     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8316       CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]));
8317       CHKERRQ(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE));
8318       CHKERRQ(VecSetType(vp[0],VECSTANDARD));
8319       nvecs = 1;
8320 
8321       if (pcbddc->divudotp) {
8322         Mat      B,loc_divudotp;
8323         Vec      v,p;
8324         IS       dummy;
8325         PetscInt np;
8326 
8327         CHKERRQ(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp));
8328         CHKERRQ(MatGetSize(loc_divudotp,&np,NULL));
8329         CHKERRQ(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy));
8330         CHKERRQ(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B));
8331         CHKERRQ(MatCreateVecs(B,&v,&p));
8332         CHKERRQ(VecSet(p,1.));
8333         CHKERRQ(MatMultTranspose(B,p,v));
8334         CHKERRQ(VecDestroy(&p));
8335         CHKERRQ(MatDestroy(&B));
8336         CHKERRQ(VecGetArray(vp[0],&array));
8337         CHKERRQ(VecPlaceArray(pcbddc->vec1_P,array));
8338         CHKERRQ(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P));
8339         CHKERRQ(VecResetArray(pcbddc->vec1_P));
8340         CHKERRQ(VecRestoreArray(vp[0],&array));
8341         CHKERRQ(ISDestroy(&dummy));
8342         CHKERRQ(VecDestroy(&v));
8343       }
8344     }
8345     if (reuser) {
8346       CHKERRQ(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp));
8347     } else {
8348       CHKERRQ(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp));
8349     }
8350     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8351       PetscScalar       *arraym;
8352       const PetscScalar *arrayv;
8353       PetscInt          nl;
8354       CHKERRQ(VecGetLocalSize(vp[0],&nl));
8355       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp));
8356       CHKERRQ(MatDenseGetArray(coarsedivudotp,&arraym));
8357       CHKERRQ(VecGetArrayRead(vp[0],&arrayv));
8358       CHKERRQ(PetscArraycpy(arraym,arrayv,nl));
8359       CHKERRQ(VecRestoreArrayRead(vp[0],&arrayv));
8360       CHKERRQ(MatDenseRestoreArray(coarsedivudotp,&arraym));
8361       CHKERRQ(VecDestroy(&vp[0]));
8362     } else {
8363       CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp));
8364     }
8365   } else {
8366     CHKERRQ(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL));
8367   }
8368   if (coarse_mat_is || coarse_mat) {
8369     if (!multilevel_allowed) {
8370       CHKERRQ(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat));
8371     } else {
8372       /* if this matrix is present, it means we are not reusing the coarse matrix */
8373       if (coarse_mat_is) {
8374         PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8375         CHKERRQ(PetscObjectReference((PetscObject)coarse_mat_is));
8376         coarse_mat = coarse_mat_is;
8377       }
8378     }
8379   }
8380   CHKERRQ(MatDestroy(&t_coarse_mat_is));
8381   CHKERRQ(MatDestroy(&coarse_mat_is));
8382 
8383   /* create local to global scatters for coarse problem */
8384   if (compute_vecs) {
8385     PetscInt lrows;
8386     CHKERRQ(VecDestroy(&pcbddc->coarse_vec));
8387     if (coarse_mat) {
8388       CHKERRQ(MatGetLocalSize(coarse_mat,&lrows,NULL));
8389     } else {
8390       lrows = 0;
8391     }
8392     CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec));
8393     CHKERRQ(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE));
8394     CHKERRQ(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8395     CHKERRQ(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8396     CHKERRQ(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob));
8397   }
8398   CHKERRQ(ISDestroy(&coarse_is));
8399 
8400   /* set defaults for coarse KSP and PC */
8401   if (multilevel_allowed) {
8402     coarse_ksp_type = KSPRICHARDSON;
8403     coarse_pc_type  = PCBDDC;
8404   } else {
8405     coarse_ksp_type = KSPPREONLY;
8406     coarse_pc_type  = PCREDUNDANT;
8407   }
8408 
8409   /* print some info if requested */
8410   if (pcbddc->dbg_flag) {
8411     if (!multilevel_allowed) {
8412       CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n"));
8413       if (multilevel_requested) {
8414         CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio));
8415       } else if (pcbddc->max_levels) {
8416         CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels));
8417       }
8418       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8419     }
8420   }
8421 
8422   /* communicate coarse discrete gradient */
8423   coarseG = NULL;
8424   if (pcbddc->nedcG && multilevel_allowed) {
8425     MPI_Comm ccomm;
8426     if (coarse_mat) {
8427       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8428     } else {
8429       ccomm = MPI_COMM_NULL;
8430     }
8431     CHKERRQ(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG));
8432   }
8433 
8434   /* create the coarse KSP object only once with defaults */
8435   if (coarse_mat) {
8436     PetscBool   isredundant,isbddc,force,valid;
8437     PetscViewer dbg_viewer = NULL;
8438 
8439     if (pcbddc->dbg_flag) {
8440       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8441       CHKERRQ(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level));
8442     }
8443     if (!pcbddc->coarse_ksp) {
8444       char   prefix[256],str_level[16];
8445       size_t len;
8446 
8447       CHKERRQ(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp));
8448       CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure));
8449       CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1));
8450       CHKERRQ(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1));
8451       CHKERRQ(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat));
8452       CHKERRQ(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type));
8453       CHKERRQ(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE));
8454       CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&pc_temp));
8455       /* TODO is this logic correct? should check for coarse_mat type */
8456       CHKERRQ(PCSetType(pc_temp,coarse_pc_type));
8457       /* prefix */
8458       CHKERRQ(PetscStrcpy(prefix,""));
8459       CHKERRQ(PetscStrcpy(str_level,""));
8460       if (!pcbddc->current_level) {
8461         CHKERRQ(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix)));
8462         CHKERRQ(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix)));
8463       } else {
8464         CHKERRQ(PetscStrlen(((PetscObject)pc)->prefix,&len));
8465         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8466         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8467         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8468         CHKERRQ(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1));
8469         CHKERRQ(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level)));
8470         CHKERRQ(PetscStrlcat(prefix,str_level,sizeof(prefix)));
8471       }
8472       CHKERRQ(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix));
8473       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8474       CHKERRQ(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1));
8475       CHKERRQ(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio));
8476       CHKERRQ(PCBDDCSetLevels(pc_temp,pcbddc->max_levels));
8477       /* allow user customization */
8478       CHKERRQ(KSPSetFromOptions(pcbddc->coarse_ksp));
8479       /* get some info after set from options */
8480       CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&pc_temp));
8481       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8482       force = PETSC_FALSE;
8483       CHKERRQ(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL));
8484       CHKERRQ(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,""));
8485       CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc));
8486       if (multilevel_allowed && !force && !valid) {
8487         isbddc = PETSC_TRUE;
8488         CHKERRQ(PCSetType(pc_temp,PCBDDC));
8489         CHKERRQ(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1));
8490         CHKERRQ(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio));
8491         CHKERRQ(PCBDDCSetLevels(pc_temp,pcbddc->max_levels));
8492         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8493           ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr);
8494           CHKERRQ((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp));
8495           CHKERRQ(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp));
8496           ierr = PetscOptionsEnd();CHKERRQ(ierr);
8497           pc_temp->setfromoptionscalled++;
8498         }
8499       }
8500     }
8501     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8502     CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&pc_temp));
8503     if (nisdofs) {
8504       CHKERRQ(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray));
8505       for (i=0;i<nisdofs;i++) {
8506         CHKERRQ(ISDestroy(&isarray[i]));
8507       }
8508     }
8509     if (nisneu) {
8510       CHKERRQ(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]));
8511       CHKERRQ(ISDestroy(&isarray[nisdofs]));
8512     }
8513     if (nisvert) {
8514       CHKERRQ(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]));
8515       CHKERRQ(ISDestroy(&isarray[nis-1]));
8516     }
8517     if (coarseG) {
8518       CHKERRQ(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE));
8519     }
8520 
8521     /* get some info after set from options */
8522     CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc));
8523 
8524     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8525     if (isbddc && !multilevel_allowed) {
8526       CHKERRQ(PCSetType(pc_temp,coarse_pc_type));
8527     }
8528     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8529     force = PETSC_FALSE;
8530     CHKERRQ(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL));
8531     CHKERRQ(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,""));
8532     if (multilevel_requested && multilevel_allowed && !valid && !force) {
8533       CHKERRQ(PCSetType(pc_temp,PCBDDC));
8534     }
8535     CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant));
8536     if (isredundant) {
8537       KSP inner_ksp;
8538       PC  inner_pc;
8539 
8540       CHKERRQ(PCRedundantGetKSP(pc_temp,&inner_ksp));
8541       CHKERRQ(KSPGetPC(inner_ksp,&inner_pc));
8542     }
8543 
8544     /* parameters which miss an API */
8545     CHKERRQ(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc));
8546     if (isbddc) {
8547       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8548 
8549       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8550       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8551       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8552       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8553       if (pcbddc_coarse->benign_saddle_point) {
8554         Mat                    coarsedivudotp_is;
8555         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8556         IS                     row,col;
8557         const PetscInt         *gidxs;
8558         PetscInt               n,st,M,N;
8559 
8560         CHKERRQ(MatGetSize(coarsedivudotp,&n,NULL));
8561         CHKERRMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat)));
8562         st   = st-n;
8563         CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row));
8564         CHKERRQ(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL));
8565         CHKERRQ(ISLocalToGlobalMappingGetSize(l2gmap,&n));
8566         CHKERRQ(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs));
8567         CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col));
8568         CHKERRQ(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs));
8569         CHKERRQ(ISLocalToGlobalMappingCreateIS(row,&rl2g));
8570         CHKERRQ(ISLocalToGlobalMappingCreateIS(col,&cl2g));
8571         CHKERRQ(ISGetSize(row,&M));
8572         CHKERRQ(MatGetSize(coarse_mat,&N,NULL));
8573         CHKERRQ(ISDestroy(&row));
8574         CHKERRQ(ISDestroy(&col));
8575         CHKERRQ(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is));
8576         CHKERRQ(MatSetType(coarsedivudotp_is,MATIS));
8577         CHKERRQ(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N));
8578         CHKERRQ(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g));
8579         CHKERRQ(ISLocalToGlobalMappingDestroy(&rl2g));
8580         CHKERRQ(ISLocalToGlobalMappingDestroy(&cl2g));
8581         CHKERRQ(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp));
8582         CHKERRQ(MatDestroy(&coarsedivudotp));
8583         CHKERRQ(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL));
8584         CHKERRQ(MatDestroy(&coarsedivudotp_is));
8585         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8586         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8587       }
8588     }
8589 
8590     /* propagate symmetry info of coarse matrix */
8591     CHKERRQ(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE));
8592     if (pc->pmat->symmetric_set) {
8593       CHKERRQ(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric));
8594     }
8595     if (pc->pmat->hermitian_set) {
8596       CHKERRQ(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian));
8597     }
8598     if (pc->pmat->spd_set) {
8599       CHKERRQ(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd));
8600     }
8601     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8602       CHKERRQ(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE));
8603     }
8604     /* set operators */
8605     CHKERRQ(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view"));
8606     CHKERRQ(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix));
8607     CHKERRQ(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat));
8608     if (pcbddc->dbg_flag) {
8609       CHKERRQ(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level));
8610     }
8611   }
8612   CHKERRQ(MatDestroy(&coarseG));
8613   CHKERRQ(PetscFree(isarray));
8614 #if 0
8615   {
8616     PetscViewer viewer;
8617     char filename[256];
8618     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8619     CHKERRQ(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8620     CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8621     CHKERRQ(MatView(coarse_mat,viewer));
8622     CHKERRQ(PetscViewerPopFormat(viewer));
8623     CHKERRQ(PetscViewerDestroy(&viewer));
8624   }
8625 #endif
8626 
8627   if (corners) {
8628     Vec            gv;
8629     IS             is;
8630     const PetscInt *idxs;
8631     PetscInt       i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8632     PetscScalar    *coords;
8633 
8634     PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8635     CHKERRQ(VecGetSize(pcbddc->coarse_vec,&N));
8636     CHKERRQ(VecGetLocalSize(pcbddc->coarse_vec,&n));
8637     CHKERRQ(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv));
8638     CHKERRQ(VecSetBlockSize(gv,cdim));
8639     CHKERRQ(VecSetSizes(gv,n*cdim,N*cdim));
8640     CHKERRQ(VecSetType(gv,VECSTANDARD));
8641     CHKERRQ(VecSetFromOptions(gv));
8642     CHKERRQ(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8643 
8644     CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is));
8645     CHKERRQ(ISGetLocalSize(is,&n));
8646     CHKERRQ(ISGetIndices(is,&idxs));
8647     CHKERRQ(PetscMalloc1(n*cdim,&coords));
8648     for (i=0;i<n;i++) {
8649       for (d=0;d<cdim;d++) {
8650         coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8651       }
8652     }
8653     CHKERRQ(ISRestoreIndices(is,&idxs));
8654     CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is));
8655 
8656     CHKERRQ(ISGetLocalSize(corners,&n));
8657     CHKERRQ(ISGetIndices(corners,&idxs));
8658     CHKERRQ(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES));
8659     CHKERRQ(ISRestoreIndices(corners,&idxs));
8660     CHKERRQ(PetscFree(coords));
8661     CHKERRQ(VecAssemblyBegin(gv));
8662     CHKERRQ(VecAssemblyEnd(gv));
8663     CHKERRQ(VecGetArray(gv,&coords));
8664     if (pcbddc->coarse_ksp) {
8665       PC        coarse_pc;
8666       PetscBool isbddc;
8667 
8668       CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc));
8669       CHKERRQ(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc));
8670       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8671         PetscReal *realcoords;
8672 
8673         CHKERRQ(VecGetLocalSize(gv,&n));
8674 #if defined(PETSC_USE_COMPLEX)
8675         CHKERRQ(PetscMalloc1(n,&realcoords));
8676         for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8677 #else
8678         realcoords = coords;
8679 #endif
8680         CHKERRQ(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords));
8681 #if defined(PETSC_USE_COMPLEX)
8682         CHKERRQ(PetscFree(realcoords));
8683 #endif
8684       }
8685     }
8686     CHKERRQ(VecRestoreArray(gv,&coords));
8687     CHKERRQ(VecDestroy(&gv));
8688   }
8689   CHKERRQ(ISDestroy(&corners));
8690 
8691   if (pcbddc->coarse_ksp) {
8692     Vec crhs,csol;
8693 
8694     CHKERRQ(KSPGetSolution(pcbddc->coarse_ksp,&csol));
8695     CHKERRQ(KSPGetRhs(pcbddc->coarse_ksp,&crhs));
8696     if (!csol) {
8697       CHKERRQ(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL));
8698     }
8699     if (!crhs) {
8700       CHKERRQ(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs)));
8701     }
8702   }
8703   CHKERRQ(MatDestroy(&coarsedivudotp));
8704 
8705   /* compute null space for coarse solver if the benign trick has been requested */
8706   if (pcbddc->benign_null) {
8707 
8708     CHKERRQ(VecSet(pcbddc->vec1_P,0.));
8709     for (i=0;i<pcbddc->benign_n;i++) {
8710       CHKERRQ(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES));
8711     }
8712     CHKERRQ(VecAssemblyBegin(pcbddc->vec1_P));
8713     CHKERRQ(VecAssemblyEnd(pcbddc->vec1_P));
8714     CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD));
8715     CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD));
8716     if (coarse_mat) {
8717       Vec         nullv;
8718       PetscScalar *array,*array2;
8719       PetscInt    nl;
8720 
8721       CHKERRQ(MatCreateVecs(coarse_mat,&nullv,NULL));
8722       CHKERRQ(VecGetLocalSize(nullv,&nl));
8723       CHKERRQ(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array));
8724       CHKERRQ(VecGetArray(nullv,&array2));
8725       CHKERRQ(PetscArraycpy(array2,array,nl));
8726       CHKERRQ(VecRestoreArray(nullv,&array2));
8727       CHKERRQ(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array));
8728       CHKERRQ(VecNormalize(nullv,NULL));
8729       CHKERRQ(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace));
8730       CHKERRQ(VecDestroy(&nullv));
8731     }
8732   }
8733   CHKERRQ(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0));
8734 
8735   CHKERRQ(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0));
8736   if (pcbddc->coarse_ksp) {
8737     PetscBool ispreonly;
8738 
8739     if (CoarseNullSpace) {
8740       PetscBool isnull;
8741       CHKERRQ(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull));
8742       if (isnull) {
8743         CHKERRQ(MatSetNullSpace(coarse_mat,CoarseNullSpace));
8744       }
8745       /* TODO: add local nullspaces (if any) */
8746     }
8747     /* setup coarse ksp */
8748     CHKERRQ(KSPSetUp(pcbddc->coarse_ksp));
8749     /* Check coarse problem if in debug mode or if solving with an iterative method */
8750     CHKERRQ(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly));
8751     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8752       KSP       check_ksp;
8753       KSPType   check_ksp_type;
8754       PC        check_pc;
8755       Vec       check_vec,coarse_vec;
8756       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8757       PetscInt  its;
8758       PetscBool compute_eigs;
8759       PetscReal *eigs_r,*eigs_c;
8760       PetscInt  neigs;
8761       const char *prefix;
8762 
8763       /* Create ksp object suitable for estimation of extreme eigenvalues */
8764       CHKERRQ(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp));
8765       CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0));
8766       CHKERRQ(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE));
8767       CHKERRQ(KSPSetOperators(check_ksp,coarse_mat,coarse_mat));
8768       CHKERRQ(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size));
8769       /* prevent from setup unneeded object */
8770       CHKERRQ(KSPGetPC(check_ksp,&check_pc));
8771       CHKERRQ(PCSetType(check_pc,PCNONE));
8772       if (ispreonly) {
8773         check_ksp_type = KSPPREONLY;
8774         compute_eigs = PETSC_FALSE;
8775       } else {
8776         check_ksp_type = KSPGMRES;
8777         compute_eigs = PETSC_TRUE;
8778       }
8779       CHKERRQ(KSPSetType(check_ksp,check_ksp_type));
8780       CHKERRQ(KSPSetComputeSingularValues(check_ksp,compute_eigs));
8781       CHKERRQ(KSPSetComputeEigenvalues(check_ksp,compute_eigs));
8782       CHKERRQ(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1));
8783       CHKERRQ(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix));
8784       CHKERRQ(KSPSetOptionsPrefix(check_ksp,prefix));
8785       CHKERRQ(KSPAppendOptionsPrefix(check_ksp,"check_"));
8786       CHKERRQ(KSPSetFromOptions(check_ksp));
8787       CHKERRQ(KSPSetUp(check_ksp));
8788       CHKERRQ(KSPGetPC(pcbddc->coarse_ksp,&check_pc));
8789       CHKERRQ(KSPSetPC(check_ksp,check_pc));
8790       /* create random vec */
8791       CHKERRQ(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec));
8792       CHKERRQ(VecSetRandom(check_vec,NULL));
8793       CHKERRQ(MatMult(coarse_mat,check_vec,coarse_vec));
8794       /* solve coarse problem */
8795       CHKERRQ(KSPSolve(check_ksp,coarse_vec,coarse_vec));
8796       CHKERRQ(KSPCheckSolve(check_ksp,pc,coarse_vec));
8797       /* set eigenvalue estimation if preonly has not been requested */
8798       if (compute_eigs) {
8799         CHKERRQ(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r));
8800         CHKERRQ(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c));
8801         CHKERRQ(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs));
8802         if (neigs) {
8803           lambda_max = eigs_r[neigs-1];
8804           lambda_min = eigs_r[0];
8805           if (pcbddc->use_coarse_estimates) {
8806             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8807               CHKERRQ(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min));
8808               CHKERRQ(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min)));
8809             }
8810           }
8811         }
8812       }
8813 
8814       /* check coarse problem residual error */
8815       if (pcbddc->dbg_flag) {
8816         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8817         CHKERRQ(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1)));
8818         CHKERRQ(VecAXPY(check_vec,-1.0,coarse_vec));
8819         CHKERRQ(VecNorm(check_vec,NORM_INFINITY,&infty_error));
8820         CHKERRQ(MatMult(coarse_mat,check_vec,coarse_vec));
8821         CHKERRQ(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error));
8822         CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates));
8823         CHKERRQ(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer));
8824         CHKERRQ(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer));
8825         CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error));
8826         CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error));
8827         if (CoarseNullSpace) {
8828           CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n"));
8829         }
8830         if (compute_eigs) {
8831           PetscReal          lambda_max_s,lambda_min_s;
8832           KSPConvergedReason reason;
8833           CHKERRQ(KSPGetType(check_ksp,&check_ksp_type));
8834           CHKERRQ(KSPGetIterationNumber(check_ksp,&its));
8835           CHKERRQ(KSPGetConvergedReason(check_ksp,&reason));
8836           CHKERRQ(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s));
8837           CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s));
8838           for (i=0;i<neigs;i++) {
8839             CHKERRQ(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]));
8840           }
8841         }
8842         CHKERRQ(PetscViewerFlush(dbg_viewer));
8843         CHKERRQ(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1)));
8844       }
8845       CHKERRQ(VecDestroy(&check_vec));
8846       CHKERRQ(VecDestroy(&coarse_vec));
8847       CHKERRQ(KSPDestroy(&check_ksp));
8848       if (compute_eigs) {
8849         CHKERRQ(PetscFree(eigs_r));
8850         CHKERRQ(PetscFree(eigs_c));
8851       }
8852     }
8853   }
8854   CHKERRQ(MatNullSpaceDestroy(&CoarseNullSpace));
8855   /* print additional info */
8856   if (pcbddc->dbg_flag) {
8857     /* waits until all processes reaches this point */
8858     CHKERRQ(PetscBarrier((PetscObject)pc));
8859     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level));
8860     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8861   }
8862 
8863   /* free memory */
8864   CHKERRQ(MatDestroy(&coarse_mat));
8865   CHKERRQ(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0));
8866   PetscFunctionReturn(0);
8867 }
8868 
8869 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8870 {
8871   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8872   PC_IS*         pcis = (PC_IS*)pc->data;
8873   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8874   IS             subset,subset_mult,subset_n;
8875   PetscInt       local_size,coarse_size=0;
8876   PetscInt       *local_primal_indices=NULL;
8877   const PetscInt *t_local_primal_indices;
8878 
8879   PetscFunctionBegin;
8880   /* Compute global number of coarse dofs */
8881   PetscCheckFalse(pcbddc->local_primal_size && !pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8882   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n));
8883   CHKERRQ(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset));
8884   CHKERRQ(ISDestroy(&subset_n));
8885   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult));
8886   CHKERRQ(ISRenumber(subset,subset_mult,&coarse_size,&subset_n));
8887   CHKERRQ(ISDestroy(&subset));
8888   CHKERRQ(ISDestroy(&subset_mult));
8889   CHKERRQ(ISGetLocalSize(subset_n,&local_size));
8890   PetscCheckFalse(local_size != pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8891   CHKERRQ(PetscMalloc1(local_size,&local_primal_indices));
8892   CHKERRQ(ISGetIndices(subset_n,&t_local_primal_indices));
8893   CHKERRQ(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size));
8894   CHKERRQ(ISRestoreIndices(subset_n,&t_local_primal_indices));
8895   CHKERRQ(ISDestroy(&subset_n));
8896 
8897   /* check numbering */
8898   if (pcbddc->dbg_flag) {
8899     PetscScalar coarsesum,*array,*array2;
8900     PetscInt    i;
8901     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8902 
8903     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8904     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n"));
8905     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n"));
8906     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8907     /* counter */
8908     CHKERRQ(VecSet(pcis->vec1_global,0.0));
8909     CHKERRQ(VecSet(pcis->vec1_N,1.0));
8910     CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
8911     CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
8912     CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD));
8913     CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD));
8914     CHKERRQ(VecSet(pcis->vec1_N,0.0));
8915     for (i=0;i<pcbddc->local_primal_size;i++) {
8916       CHKERRQ(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES));
8917     }
8918     CHKERRQ(VecAssemblyBegin(pcis->vec1_N));
8919     CHKERRQ(VecAssemblyEnd(pcis->vec1_N));
8920     CHKERRQ(VecSet(pcis->vec1_global,0.0));
8921     CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
8922     CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
8923     CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD));
8924     CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD));
8925     CHKERRQ(VecGetArray(pcis->vec1_N,&array));
8926     CHKERRQ(VecGetArray(pcis->vec2_N,&array2));
8927     for (i=0;i<pcis->n;i++) {
8928       if (array[i] != 0.0 && array[i] != array2[i]) {
8929         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8930         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8931         set_error = PETSC_TRUE;
8932         CHKERRQ(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi));
8933         CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh));
8934       }
8935     }
8936     CHKERRQ(VecRestoreArray(pcis->vec2_N,&array2));
8937     CHKERRMPI(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
8938     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8939     for (i=0;i<pcis->n;i++) {
8940       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8941     }
8942     CHKERRQ(VecRestoreArray(pcis->vec1_N,&array));
8943     CHKERRQ(VecSet(pcis->vec1_global,0.0));
8944     CHKERRQ(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
8945     CHKERRQ(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE));
8946     CHKERRQ(VecSum(pcis->vec1_global,&coarsesum));
8947     CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum)));
8948     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8949       PetscInt *gidxs;
8950 
8951       CHKERRQ(PetscMalloc1(pcbddc->local_primal_size,&gidxs));
8952       CHKERRQ(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs));
8953       CHKERRQ(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n"));
8954       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8955       CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank));
8956       for (i=0;i<pcbddc->local_primal_size;i++) {
8957         CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]));
8958       }
8959       CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8960       CHKERRQ(PetscFree(gidxs));
8961     }
8962     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
8963     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8964     PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8965   }
8966 
8967   /* get back data */
8968   *coarse_size_n = coarse_size;
8969   *local_primal_indices_n = local_primal_indices;
8970   PetscFunctionReturn(0);
8971 }
8972 
8973 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8974 {
8975   IS             localis_t;
8976   PetscInt       i,lsize,*idxs,n;
8977   PetscScalar    *vals;
8978 
8979   PetscFunctionBegin;
8980   /* get indices in local ordering exploiting local to global map */
8981   CHKERRQ(ISGetLocalSize(globalis,&lsize));
8982   CHKERRQ(PetscMalloc1(lsize,&vals));
8983   for (i=0;i<lsize;i++) vals[i] = 1.0;
8984   CHKERRQ(ISGetIndices(globalis,(const PetscInt**)&idxs));
8985   CHKERRQ(VecSet(gwork,0.0));
8986   CHKERRQ(VecSet(lwork,0.0));
8987   if (idxs) { /* multilevel guard */
8988     CHKERRQ(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE));
8989     CHKERRQ(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES));
8990   }
8991   CHKERRQ(VecAssemblyBegin(gwork));
8992   CHKERRQ(ISRestoreIndices(globalis,(const PetscInt**)&idxs));
8993   CHKERRQ(PetscFree(vals));
8994   CHKERRQ(VecAssemblyEnd(gwork));
8995   /* now compute set in local ordering */
8996   CHKERRQ(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD));
8997   CHKERRQ(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD));
8998   CHKERRQ(VecGetArrayRead(lwork,(const PetscScalar**)&vals));
8999   CHKERRQ(VecGetSize(lwork,&n));
9000   for (i=0,lsize=0;i<n;i++) {
9001     if (PetscRealPart(vals[i]) > 0.5) {
9002       lsize++;
9003     }
9004   }
9005   CHKERRQ(PetscMalloc1(lsize,&idxs));
9006   for (i=0,lsize=0;i<n;i++) {
9007     if (PetscRealPart(vals[i]) > 0.5) {
9008       idxs[lsize++] = i;
9009     }
9010   }
9011   CHKERRQ(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals));
9012   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t));
9013   *localis = localis_t;
9014   PetscFunctionReturn(0);
9015 }
9016 
9017 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9018 {
9019   PC_IS               *pcis=(PC_IS*)pc->data;
9020   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9021   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
9022   Mat                 S_j;
9023   PetscInt            *used_xadj,*used_adjncy;
9024   PetscBool           free_used_adj;
9025   PetscErrorCode      ierr;
9026 
9027   PetscFunctionBegin;
9028   CHKERRQ(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0));
9029   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9030   free_used_adj = PETSC_FALSE;
9031   if (pcbddc->sub_schurs_layers == -1) {
9032     used_xadj = NULL;
9033     used_adjncy = NULL;
9034   } else {
9035     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9036       used_xadj = pcbddc->mat_graph->xadj;
9037       used_adjncy = pcbddc->mat_graph->adjncy;
9038     } else if (pcbddc->computed_rowadj) {
9039       used_xadj = pcbddc->mat_graph->xadj;
9040       used_adjncy = pcbddc->mat_graph->adjncy;
9041     } else {
9042       PetscBool      flg_row=PETSC_FALSE;
9043       const PetscInt *xadj,*adjncy;
9044       PetscInt       nvtxs;
9045 
9046       CHKERRQ(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row));
9047       if (flg_row) {
9048         CHKERRQ(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy));
9049         CHKERRQ(PetscArraycpy(used_xadj,xadj,nvtxs+1));
9050         CHKERRQ(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]));
9051         free_used_adj = PETSC_TRUE;
9052       } else {
9053         pcbddc->sub_schurs_layers = -1;
9054         used_xadj = NULL;
9055         used_adjncy = NULL;
9056       }
9057       CHKERRQ(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row));
9058     }
9059   }
9060 
9061   /* setup sub_schurs data */
9062   CHKERRQ(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j));
9063   if (!sub_schurs->schur_explicit) {
9064     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9065     CHKERRQ(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D));
9066     CHKERRQ(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL));
9067   } else {
9068     Mat       change = NULL;
9069     Vec       scaling = NULL;
9070     IS        change_primal = NULL, iP;
9071     PetscInt  benign_n;
9072     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9073     PetscBool need_change = PETSC_FALSE;
9074     PetscBool discrete_harmonic = PETSC_FALSE;
9075 
9076     if (!pcbddc->use_vertices && reuse_solvers) {
9077       PetscInt n_vertices;
9078 
9079       CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices));
9080       reuse_solvers = (PetscBool)!n_vertices;
9081     }
9082     if (!pcbddc->benign_change_explicit) {
9083       benign_n = pcbddc->benign_n;
9084     } else {
9085       benign_n = 0;
9086     }
9087     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9088        We need a global reduction to avoid possible deadlocks.
9089        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9090     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9091       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9092       CHKERRMPI(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
9093       need_change = (PetscBool)(!need_change);
9094     }
9095     /* If the user defines additional constraints, we import them here.
9096        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9097     if (need_change) {
9098       PC_IS   *pcisf;
9099       PC_BDDC *pcbddcf;
9100       PC      pcf;
9101 
9102       PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9103       CHKERRQ(PCCreate(PetscObjectComm((PetscObject)pc),&pcf));
9104       CHKERRQ(PCSetOperators(pcf,pc->mat,pc->pmat));
9105       CHKERRQ(PCSetType(pcf,PCBDDC));
9106 
9107       /* hacks */
9108       pcisf                        = (PC_IS*)pcf->data;
9109       pcisf->is_B_local            = pcis->is_B_local;
9110       pcisf->vec1_N                = pcis->vec1_N;
9111       pcisf->BtoNmap               = pcis->BtoNmap;
9112       pcisf->n                     = pcis->n;
9113       pcisf->n_B                   = pcis->n_B;
9114       pcbddcf                      = (PC_BDDC*)pcf->data;
9115       CHKERRQ(PetscFree(pcbddcf->mat_graph));
9116       pcbddcf->mat_graph           = pcbddc->mat_graph;
9117       pcbddcf->use_faces           = PETSC_TRUE;
9118       pcbddcf->use_change_of_basis = PETSC_TRUE;
9119       pcbddcf->use_change_on_faces = PETSC_TRUE;
9120       pcbddcf->use_qr_single       = PETSC_TRUE;
9121       pcbddcf->fake_change         = PETSC_TRUE;
9122 
9123       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9124       CHKERRQ(PCBDDCConstraintsSetUp(pcf));
9125       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9126       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal));
9127       change = pcbddcf->ConstraintMatrix;
9128       pcbddcf->ConstraintMatrix = NULL;
9129 
9130       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9131       CHKERRQ(PetscFree(pcbddcf->sub_schurs));
9132       CHKERRQ(MatNullSpaceDestroy(&pcbddcf->onearnullspace));
9133       CHKERRQ(PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult));
9134       CHKERRQ(PetscFree(pcbddcf->primal_indices_local_idxs));
9135       CHKERRQ(PetscFree(pcbddcf->onearnullvecs_state));
9136       CHKERRQ(PetscFree(pcf->data));
9137       pcf->ops->destroy = NULL;
9138       pcf->ops->reset   = NULL;
9139       CHKERRQ(PCDestroy(&pcf));
9140     }
9141     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9142 
9143     CHKERRQ(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP));
9144     if (iP) {
9145       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr);
9146       CHKERRQ(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL));
9147       ierr = PetscOptionsEnd();CHKERRQ(ierr);
9148     }
9149     if (discrete_harmonic) {
9150       Mat A;
9151       CHKERRQ(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A));
9152       CHKERRQ(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL));
9153       CHKERRQ(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP));
9154       CHKERRQ(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal));
9155       CHKERRQ(MatDestroy(&A));
9156     } else {
9157       CHKERRQ(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal));
9158     }
9159     CHKERRQ(MatDestroy(&change));
9160     CHKERRQ(ISDestroy(&change_primal));
9161   }
9162   CHKERRQ(MatDestroy(&S_j));
9163 
9164   /* free adjacency */
9165   if (free_used_adj) {
9166     CHKERRQ(PetscFree2(used_xadj,used_adjncy));
9167   }
9168   CHKERRQ(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0));
9169   PetscFunctionReturn(0);
9170 }
9171 
9172 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9173 {
9174   PC_IS               *pcis=(PC_IS*)pc->data;
9175   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9176   PCBDDCGraph         graph;
9177 
9178   PetscFunctionBegin;
9179   /* attach interface graph for determining subsets */
9180   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9181     IS       verticesIS,verticescomm;
9182     PetscInt vsize,*idxs;
9183 
9184     CHKERRQ(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS));
9185     CHKERRQ(ISGetSize(verticesIS,&vsize));
9186     CHKERRQ(ISGetIndices(verticesIS,(const PetscInt**)&idxs));
9187     CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm));
9188     CHKERRQ(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs));
9189     CHKERRQ(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS));
9190     CHKERRQ(PCBDDCGraphCreate(&graph));
9191     CHKERRQ(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount));
9192     CHKERRQ(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm));
9193     CHKERRQ(ISDestroy(&verticescomm));
9194     CHKERRQ(PCBDDCGraphComputeConnectedComponents(graph));
9195   } else {
9196     graph = pcbddc->mat_graph;
9197   }
9198   /* print some info */
9199   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9200     IS       vertices;
9201     PetscInt nv,nedges,nfaces;
9202     CHKERRQ(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer));
9203     CHKERRQ(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices));
9204     CHKERRQ(ISGetSize(vertices,&nv));
9205     CHKERRQ(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9206     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n"));
9207     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices));
9208     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges));
9209     CHKERRQ(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces));
9210     CHKERRQ(PetscViewerFlush(pcbddc->dbg_viewer));
9211     CHKERRQ(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9212     CHKERRQ(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices));
9213   }
9214 
9215   /* sub_schurs init */
9216   if (!pcbddc->sub_schurs) {
9217     CHKERRQ(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9218   }
9219   CHKERRQ(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild));
9220 
9221   /* free graph struct */
9222   if (pcbddc->sub_schurs_rebuild) {
9223     CHKERRQ(PCBDDCGraphDestroy(&graph));
9224   }
9225   PetscFunctionReturn(0);
9226 }
9227 
9228 PetscErrorCode PCBDDCCheckOperator(PC pc)
9229 {
9230   PC_IS               *pcis=(PC_IS*)pc->data;
9231   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9232 
9233   PetscFunctionBegin;
9234   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9235     IS             zerodiag = NULL;
9236     Mat            S_j,B0_B=NULL;
9237     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
9238     PetscScalar    *p0_check,*array,*array2;
9239     PetscReal      norm;
9240     PetscInt       i;
9241 
9242     /* B0 and B0_B */
9243     if (zerodiag) {
9244       IS       dummy;
9245 
9246       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy));
9247       CHKERRQ(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B));
9248       CHKERRQ(MatCreateVecs(B0_B,NULL,&dummy_vec));
9249       CHKERRQ(ISDestroy(&dummy));
9250     }
9251     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9252     CHKERRQ(VecDuplicate(pcbddc->vec1_P,&vec_scale_P));
9253     CHKERRQ(VecSet(pcbddc->vec1_P,1.0));
9254     CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD));
9255     CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD));
9256     CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE));
9257     CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE));
9258     CHKERRQ(VecReciprocal(vec_scale_P));
9259     /* S_j */
9260     CHKERRQ(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j));
9261     CHKERRQ(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D));
9262 
9263     /* mimic vector in \widetilde{W}_\Gamma */
9264     CHKERRQ(VecSetRandom(pcis->vec1_N,NULL));
9265     /* continuous in primal space */
9266     CHKERRQ(VecSetRandom(pcbddc->coarse_vec,NULL));
9267     CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE));
9268     CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE));
9269     CHKERRQ(VecGetArray(pcbddc->vec1_P,&array));
9270     CHKERRQ(PetscCalloc1(pcbddc->benign_n,&p0_check));
9271     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9272     CHKERRQ(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES));
9273     CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array));
9274     CHKERRQ(VecAssemblyBegin(pcis->vec1_N));
9275     CHKERRQ(VecAssemblyEnd(pcis->vec1_N));
9276     CHKERRQ(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD));
9277     CHKERRQ(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD));
9278     CHKERRQ(VecDuplicate(pcis->vec2_B,&vec_check_B));
9279     CHKERRQ(VecCopy(pcis->vec2_B,vec_check_B));
9280 
9281     /* assemble rhs for coarse problem */
9282     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9283     /* local with Schur */
9284     CHKERRQ(MatMult(S_j,pcis->vec2_B,pcis->vec1_B));
9285     if (zerodiag) {
9286       CHKERRQ(VecGetArray(dummy_vec,&array));
9287       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9288       CHKERRQ(VecRestoreArray(dummy_vec,&array));
9289       CHKERRQ(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B));
9290     }
9291     /* sum on primal nodes the local contributions */
9292     CHKERRQ(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE));
9293     CHKERRQ(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE));
9294     CHKERRQ(VecGetArray(pcis->vec1_N,&array));
9295     CHKERRQ(VecGetArray(pcbddc->vec1_P,&array2));
9296     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9297     CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array2));
9298     CHKERRQ(VecRestoreArray(pcis->vec1_N,&array));
9299     CHKERRQ(VecSet(pcbddc->coarse_vec,0.));
9300     CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD));
9301     CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD));
9302     CHKERRQ(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE));
9303     CHKERRQ(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE));
9304     CHKERRQ(VecGetArray(pcbddc->vec1_P,&array));
9305     /* scale primal nodes (BDDC sums contibutions) */
9306     CHKERRQ(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P));
9307     CHKERRQ(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES));
9308     CHKERRQ(VecRestoreArray(pcbddc->vec1_P,&array));
9309     CHKERRQ(VecAssemblyBegin(pcis->vec1_N));
9310     CHKERRQ(VecAssemblyEnd(pcis->vec1_N));
9311     CHKERRQ(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
9312     CHKERRQ(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD));
9313     /* global: \widetilde{B0}_B w_\Gamma */
9314     if (zerodiag) {
9315       CHKERRQ(MatMult(B0_B,pcis->vec2_B,dummy_vec));
9316       CHKERRQ(VecGetArray(dummy_vec,&array));
9317       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9318       CHKERRQ(VecRestoreArray(dummy_vec,&array));
9319     }
9320     /* BDDC */
9321     CHKERRQ(VecSet(pcis->vec1_D,0.));
9322     CHKERRQ(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE));
9323 
9324     CHKERRQ(VecCopy(pcis->vec1_B,pcis->vec2_B));
9325     CHKERRQ(VecAXPY(pcis->vec1_B,-1.0,vec_check_B));
9326     CHKERRQ(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm));
9327     CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm));
9328     for (i=0;i<pcbddc->benign_n;i++) {
9329       CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])));
9330     }
9331     CHKERRQ(PetscFree(p0_check));
9332     CHKERRQ(VecDestroy(&vec_scale_P));
9333     CHKERRQ(VecDestroy(&vec_check_B));
9334     CHKERRQ(VecDestroy(&dummy_vec));
9335     CHKERRQ(MatDestroy(&S_j));
9336     CHKERRQ(MatDestroy(&B0_B));
9337   }
9338   PetscFunctionReturn(0);
9339 }
9340 
9341 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9342 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9343 {
9344   Mat            At;
9345   IS             rows;
9346   PetscInt       rst,ren;
9347   PetscLayout    rmap;
9348 
9349   PetscFunctionBegin;
9350   rst = ren = 0;
9351   if (ccomm != MPI_COMM_NULL) {
9352     CHKERRQ(PetscLayoutCreate(ccomm,&rmap));
9353     CHKERRQ(PetscLayoutSetSize(rmap,A->rmap->N));
9354     CHKERRQ(PetscLayoutSetBlockSize(rmap,1));
9355     CHKERRQ(PetscLayoutSetUp(rmap));
9356     CHKERRQ(PetscLayoutGetRange(rmap,&rst,&ren));
9357   }
9358   CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows));
9359   CHKERRQ(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At));
9360   CHKERRQ(ISDestroy(&rows));
9361 
9362   if (ccomm != MPI_COMM_NULL) {
9363     Mat_MPIAIJ *a,*b;
9364     IS         from,to;
9365     Vec        gvec;
9366     PetscInt   lsize;
9367 
9368     CHKERRQ(MatCreate(ccomm,B));
9369     CHKERRQ(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N));
9370     CHKERRQ(MatSetType(*B,MATAIJ));
9371     CHKERRQ(PetscLayoutDestroy(&((*B)->rmap)));
9372     CHKERRQ(PetscLayoutSetUp((*B)->cmap));
9373     a    = (Mat_MPIAIJ*)At->data;
9374     b    = (Mat_MPIAIJ*)(*B)->data;
9375     CHKERRMPI(MPI_Comm_size(ccomm,&b->size));
9376     CHKERRMPI(MPI_Comm_rank(ccomm,&b->rank));
9377     CHKERRQ(PetscObjectReference((PetscObject)a->A));
9378     CHKERRQ(PetscObjectReference((PetscObject)a->B));
9379     b->A = a->A;
9380     b->B = a->B;
9381 
9382     b->donotstash      = a->donotstash;
9383     b->roworiented     = a->roworiented;
9384     b->rowindices      = NULL;
9385     b->rowvalues       = NULL;
9386     b->getrowactive    = PETSC_FALSE;
9387 
9388     (*B)->rmap         = rmap;
9389     (*B)->factortype   = A->factortype;
9390     (*B)->assembled    = PETSC_TRUE;
9391     (*B)->insertmode   = NOT_SET_VALUES;
9392     (*B)->preallocated = PETSC_TRUE;
9393 
9394     if (a->colmap) {
9395 #if defined(PETSC_USE_CTABLE)
9396       CHKERRQ(PetscTableCreateCopy(a->colmap,&b->colmap));
9397 #else
9398       CHKERRQ(PetscMalloc1(At->cmap->N,&b->colmap));
9399       CHKERRQ(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt)));
9400       CHKERRQ(PetscArraycpy(b->colmap,a->colmap,At->cmap->N));
9401 #endif
9402     } else b->colmap = NULL;
9403     if (a->garray) {
9404       PetscInt len;
9405       len  = a->B->cmap->n;
9406       CHKERRQ(PetscMalloc1(len+1,&b->garray));
9407       CHKERRQ(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt)));
9408       if (len) CHKERRQ(PetscArraycpy(b->garray,a->garray,len));
9409     } else b->garray = NULL;
9410 
9411     CHKERRQ(PetscObjectReference((PetscObject)a->lvec));
9412     b->lvec = a->lvec;
9413     CHKERRQ(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec));
9414 
9415     /* cannot use VecScatterCopy */
9416     CHKERRQ(VecGetLocalSize(b->lvec,&lsize));
9417     CHKERRQ(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from));
9418     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to));
9419     CHKERRQ(MatCreateVecs(*B,&gvec,NULL));
9420     CHKERRQ(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx));
9421     CHKERRQ(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx));
9422     CHKERRQ(ISDestroy(&from));
9423     CHKERRQ(ISDestroy(&to));
9424     CHKERRQ(VecDestroy(&gvec));
9425   }
9426   CHKERRQ(MatDestroy(&At));
9427   PetscFunctionReturn(0);
9428 }
9429