xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision c9ed8603f65e57fce3d31de1caa5774cbb25447b)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0"
10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   if (!pcbddc->benign_sf) {
17     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
18     if (pcbddc->benign_p0_gidx >= 0) {
19       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
20     } else {
21       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
22     }
23   }
24   if (pop) { /* use SF to get values */
25     PetscScalar *array;
26 
27     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
28     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
29     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
30     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
31   } else { /* use VecSetValue */
32     if (pcbddc->benign_p0_gidx >= 0) {
33       ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
34     }
35     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
36     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 #undef __FUNCT__
42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
44 {
45   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
46   PetscErrorCode ierr;
47 
48   PetscFunctionBegin;
49   /* TODO: add error checking
50     - avoid nested pop (or push) calls.
51     - cannot push before pop.
52   */
53   if (pcbddc->benign_p0_lidx < 0) {
54     PetscFunctionReturn(0);
55   }
56   if (pop) {
57     const PetscInt    *cB0_cols;
58     PetscInt          cB0_ncol;
59     const PetscScalar *cB0_vals;
60 
61     /* extract B_0 */
62     ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
63     pcbddc->B0_ncol = cB0_ncol;
64     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
65     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
66     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
67     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
68     ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
69     /* remove rows and cols from local problem */
70     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
71     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr);
72   } else { /* push */
73     ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
74     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
75     ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr);
76     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
77     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
78   }
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCAdaptiveSelection"
84 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
85 {
86   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
87   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
88   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
89   PetscBLASInt    *B_iwork,*B_ifail;
90   PetscScalar     *work,lwork;
91   PetscScalar     *St,*S,*eigv;
92   PetscScalar     *Sarray,*Starray;
93   PetscReal       *eigs,thresh;
94   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
95   PetscBool       allocated_S_St;
96 #if defined(PETSC_USE_COMPLEX)
97   PetscReal       *rwork;
98 #endif
99   PetscErrorCode  ierr;
100 
101   PetscFunctionBegin;
102   if (!sub_schurs->use_mumps) {
103     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
104   }
105 
106   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
107     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
108   }
109 
110   if (pcbddc->dbg_flag) {
111     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
112     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
113     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
114     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
115   }
116 
117   if (pcbddc->dbg_flag) {
118     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
119   }
120 
121   /* max size of subsets */
122   mss = 0;
123   for (i=0;i<sub_schurs->n_subs;i++) {
124     PetscInt subset_size;
125 
126     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
127     mss = PetscMax(mss,subset_size);
128   }
129 
130   /* min/max and threshold */
131   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
132   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
133   nmax = PetscMax(nmin,nmax);
134   allocated_S_St = PETSC_FALSE;
135   if (nmin) {
136     allocated_S_St = PETSC_TRUE;
137   }
138 
139   /* allocate lapack workspace */
140   cum = cum2 = 0;
141   maxneigs = 0;
142   for (i=0;i<sub_schurs->n_subs;i++) {
143     PetscInt n,subset_size;
144 
145     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
146     n = PetscMin(subset_size,nmax);
147     cum += subset_size;
148     cum2 += subset_size*n;
149     maxneigs = PetscMax(maxneigs,n);
150   }
151   if (mss) {
152     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
153       PetscBLASInt B_itype = 1;
154       PetscBLASInt B_N = mss;
155       PetscReal    zero = 0.0;
156       PetscReal    eps = 0.0; /* dlamch? */
157 
158       B_lwork = -1;
159       S = NULL;
160       St = NULL;
161       eigs = NULL;
162       eigv = NULL;
163       B_iwork = NULL;
164       B_ifail = NULL;
165 #if defined(PETSC_USE_COMPLEX)
166       rwork = NULL;
167 #endif
168       thresh = 1.0;
169       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
170 #if defined(PETSC_USE_COMPLEX)
171       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
172 #else
173       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
174 #endif
175       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
176       ierr = PetscFPTrapPop();CHKERRQ(ierr);
177     } else {
178         /* TODO */
179     }
180   } else {
181     lwork = 0;
182   }
183 
184   nv = 0;
185   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
186     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
187   }
188   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
189   if (allocated_S_St) {
190     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
191   }
192   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
195 #endif
196   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
197                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
198                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
199                       nv+cum,&pcbddc->adaptive_constraints_idxs,
200                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
201   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
202 
203   maxneigs = 0;
204   cum = cum2 = cumarray = 0;
205   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
206   pcbddc->adaptive_constraints_data_ptr[0] = 0;
207   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
208     const PetscInt *idxs;
209 
210     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
211     for (cum=0;cum<nv;cum++) {
212       pcbddc->adaptive_constraints_n[cum] = 1;
213       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
214       pcbddc->adaptive_constraints_data[cum] = 1.0;
215       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
216       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
217     }
218     cum2 = cum;
219     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
220   }
221 
222   if (mss) { /* multilevel */
223     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
224     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
225   }
226 
227   for (i=0;i<sub_schurs->n_subs;i++) {
228 
229     const PetscInt *idxs;
230     PetscReal      infty = PETSC_MAX_REAL;
231     PetscInt       j,subset_size,eigs_start = 0;
232     PetscBLASInt   B_N;
233     PetscBool      same_data = PETSC_FALSE;
234 
235     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
236     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
237     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
238       if (sub_schurs->is_hermitian) {
239         PetscInt j,k;
240         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
241           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
242           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
243         }
244         for (j=0;j<subset_size;j++) {
245           for (k=j;k<subset_size;k++) {
246             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
247             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
248           }
249         }
250       } else {
251         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
252         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
253       }
254     } else {
255       S = Sarray + cumarray;
256       St = Starray + cumarray;
257     }
258 
259     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
260     /* see if we can save some work */
261     if (sub_schurs->n_subs == 1) {
262       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
263     }
264 
265     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
266       B_neigs = 0;
267     } else {
268       /* Threshold: this is an heuristic for edges */
269       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
270 
271       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
272         PetscBLASInt B_itype = 1;
273         PetscBLASInt B_IL, B_IU;
274         PetscReal    eps = -1.0; /* dlamch? */
275         PetscInt     nmin_s;
276 
277         /* ask for eigenvalues larger than thresh */
278         if (pcbddc->dbg_flag) {
279           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
280         }
281         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
282 #if defined(PETSC_USE_COMPLEX)
283         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
284 #else
285         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
286 #endif
287         ierr = PetscFPTrapPop();CHKERRQ(ierr);
288         if (B_ierr) {
289           if (B_ierr < 0 ) {
290             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
291           } else if (B_ierr <= B_N) {
292             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
293           } else {
294             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
295           }
296         }
297 
298         if (B_neigs > nmax) {
299           if (pcbddc->dbg_flag) {
300             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
301           }
302           eigs_start = B_neigs -nmax;
303           B_neigs = nmax;
304         }
305 
306         nmin_s = PetscMin(nmin,B_N);
307         if (B_neigs < nmin_s) {
308           PetscBLASInt B_neigs2;
309 
310           B_IU = B_N - B_neigs;
311           B_IL = B_N - nmin_s + 1;
312           if (pcbddc->dbg_flag) {
313             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
314           }
315           if (sub_schurs->is_hermitian) {
316             PetscInt j;
317             for (j=0;j<subset_size;j++) {
318               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
319             }
320             for (j=0;j<subset_size;j++) {
321               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
322             }
323           } else {
324             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
325             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
326           }
327           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
330 #else
331           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
332 #endif
333           ierr = PetscFPTrapPop();CHKERRQ(ierr);
334           B_neigs += B_neigs2;
335         }
336         if (B_ierr) {
337           if (B_ierr < 0 ) {
338             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
339           } else if (B_ierr <= B_N) {
340             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
341           } else {
342             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
343           }
344         }
345         if (pcbddc->dbg_flag) {
346           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
347           for (j=0;j<B_neigs;j++) {
348             if (eigs[j] == 0.0) {
349               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
350             } else {
351               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
352             }
353           }
354         }
355       } else {
356           /* TODO */
357       }
358     }
359     maxneigs = PetscMax(B_neigs,maxneigs);
360     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
361     if (B_neigs) {
362       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
363 
364       if (pcbddc->dbg_flag > 1) {
365         PetscInt ii;
366         for (ii=0;ii<B_neigs;ii++) {
367           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
368           for (j=0;j<B_N;j++) {
369 #if defined(PETSC_USE_COMPLEX)
370             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
371             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
372             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
373 #else
374             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
375 #endif
376           }
377         }
378       }
379 #if 0
380       for (j=0;j<B_neigs;j++) {
381         PetscBLASInt Blas_N,Blas_one = 1.0;
382         PetscScalar norm;
383         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
384         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
385                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
386         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
387           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
388         } else {
389           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
390         }
391         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
392       }
393 #endif
394       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
395       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
396       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
397       cum++;
398     }
399     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
400     /* shift for next computation */
401     cumarray += subset_size*subset_size;
402   }
403   if (pcbddc->dbg_flag) {
404     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
405   }
406 
407   if (mss) {
408     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
409     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
410     /* destroy matrices (junk) */
411     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
412     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
413   }
414   if (allocated_S_St) {
415     ierr = PetscFree2(S,St);CHKERRQ(ierr);
416   }
417   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
418 #if defined(PETSC_USE_COMPLEX)
419   ierr = PetscFree(rwork);CHKERRQ(ierr);
420 #endif
421   if (pcbddc->dbg_flag) {
422     PetscInt maxneigs_r;
423     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
424     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "PCBDDCSetUpSolvers"
431 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
432 {
433   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
434   PetscScalar    *coarse_submat_vals;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   /* Setup local scatters R_to_B and (optionally) R_to_D */
439   /* PCBDDCSetUpLocalWorkVectors should be called first! */
440   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
441 
442   /* Setup local neumann solver ksp_R */
443   /* PCBDDCSetUpLocalScatters should be called first! */
444   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
445 
446   /* Change global null space passed in by the user if change of basis has been requested */
447   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
448     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
449   }
450 
451   /*
452      Setup local correction and local part of coarse basis.
453      Gives back the dense local part of the coarse matrix in column major ordering
454   */
455   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
456 
457   /* Compute total number of coarse nodes and setup coarse solver */
458   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
459 
460   /* free */
461   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "PCBDDCResetCustomization"
467 PetscErrorCode PCBDDCResetCustomization(PC pc)
468 {
469   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
474   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
475   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
476   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
477   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
478   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
479   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
480   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
481   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
482   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
483   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "PCBDDCResetTopography"
489 PetscErrorCode PCBDDCResetTopography(PC pc)
490 {
491   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
496   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
497   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
498   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
499   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "PCBDDCResetSolvers"
505 PetscErrorCode PCBDDCResetSolvers(PC pc)
506 {
507   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
508   PetscScalar    *array;
509   PetscErrorCode ierr;
510 
511   PetscFunctionBegin;
512   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
513   if (pcbddc->coarse_phi_B) {
514     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
515     ierr = PetscFree(array);CHKERRQ(ierr);
516   }
517   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
518   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
519   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
520   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
521   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
522   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
523   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
524   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
525   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
526   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
527   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
528   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
529   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
530   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
531   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
532   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
533   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
534   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
535   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
536   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
537   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
538   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
539   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
540   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
541   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
542   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
543   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
549 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
550 {
551   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
552   PC_IS          *pcis = (PC_IS*)pc->data;
553   VecType        impVecType;
554   PetscInt       n_constraints,n_R,old_size,n_benign;
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558   if (!pcbddc->ConstraintMatrix) {
559     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
560   }
561   /* get sizes */
562   n_benign = 0;
563   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
564   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
565   if (n_benign && n_constraints) {
566     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis");
567   }
568   n_R = pcis->n - pcbddc->n_vertices;
569   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
570   /* local work vectors (try to avoid unneeded work)*/
571   /* R nodes */
572   old_size = -1;
573   if (pcbddc->vec1_R) {
574     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
575   }
576   if (n_R != old_size) {
577     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
578     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
579     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
580     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
581     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
582     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
583   }
584   /* local primal dofs */
585   old_size = -1;
586   if (pcbddc->vec1_P) {
587     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
588   }
589   if (pcbddc->local_primal_size != old_size) {
590     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
591     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
592     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
593     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
594   }
595   /* local explicit constraints */
596   old_size = -1;
597   if (pcbddc->vec1_C) {
598     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
599   }
600   if (n_constraints && n_constraints != old_size) {
601     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
602     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
603     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
604     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNCT__
610 #define __FUNCT__ "PCBDDCSetUpCorrection"
611 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
612 {
613   PetscErrorCode  ierr;
614   /* pointers to pcis and pcbddc */
615   PC_IS*          pcis = (PC_IS*)pc->data;
616   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
617   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
618   /* submatrices of local problem */
619   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
620   /* submatrices of benign trick */
621   Mat             B0_R,B0_V;
622   /* submatrices of local coarse problem */
623   Mat             S_VV,S_CV,S_VC,S_CC;
624   /* working matrices */
625   Mat             C_CR;
626   /* additional working stuff */
627   PC              pc_R;
628   Mat             F,B0 = NULL;
629   Vec             vec_R_benign,vec_R_benign_2;
630   PetscBool       isLU,isCHOL,isILU;
631 
632   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
633   PetscScalar     *work;
634   PetscInt        *idx_V_B;
635   PetscInt        n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0;
636   PetscInt        i,n_R,n_D,n_B;
637   PetscBool       unsymmetric_check;
638   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
639   MatType         impMatType;
640   /* some shortcuts to scalars */
641   PetscScalar     one=1.0,m_one=-1.0;
642 
643   PetscFunctionBegin;
644   n_benign = 0;
645   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
646   n_vertices = pcbddc->n_vertices;
647   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
648   /* Set Non-overlapping dimensions */
649   n_B = pcis->n_B;
650   n_D = pcis->n - n_B;
651   n_R = pcis->n - n_vertices;
652 
653   /* Set types for local objects needed by BDDC precondtioner */
654   impMatType = MATSEQDENSE;
655 
656   /* vertices in boundary numbering */
657   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
658   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
659   if (i != n_vertices) {
660     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
661   }
662 
663   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
664   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
665   if (!n_benign) {
666     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
667     ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
668     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
669     ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
670     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
671     ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
672     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
673     ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
674   } else { /* these wrappers are not needed with the benign trick */
675     S_VV = S_CV = S_VC = S_CC = NULL;
676   }
677 
678   unsymmetric_check = PETSC_FALSE;
679   /* allocate workspace */
680   n = 0;
681   if (n_constraints) {
682     n += n_R*n_constraints;
683   }
684   if (n_vertices) {
685     n = PetscMax(2*n_R*n_vertices,n);
686     n = PetscMax((n_R+n_B)*n_vertices,n);
687   }
688   if (!pcbddc->symmetric_primal) {
689     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
690     unsymmetric_check = PETSC_TRUE;
691   }
692   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
693 
694   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
695   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
696   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
697   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
699   if (isLU || isILU || isCHOL) {
700     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
701   } else if (sub_schurs->reuse_mumps) {
702     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
703     MatFactorType type;
704 
705     F = reuse_mumps->F;
706     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
707     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
708   } else {
709     F = NULL;
710   }
711 
712   /* Precompute stuffs needed for preprocessing and application of BDDC*/
713   if (n_constraints) {
714     Mat         M1,M2,M3;
715     Mat         auxmat;
716     IS          is_aux;
717     PetscScalar *array,*array2;
718 
719     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
720     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
721 
722     /* Extract constraints on R nodes: C_{CR}  */
723     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
724     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
725     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
726 
727     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
728     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
729     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
730     for (i=0;i<n_constraints;i++) {
731       const PetscScalar *row_cmat_values;
732       const PetscInt    *row_cmat_indices;
733       PetscInt          size_of_constraint,j;
734 
735       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
736       for (j=0;j<size_of_constraint;j++) {
737         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
738       }
739       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
740     }
741     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
742     if (F) {
743       Mat B;
744 
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
746       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
747       ierr = MatDestroy(&B);CHKERRQ(ierr);
748     } else {
749       PetscScalar *marr;
750 
751       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
752       for (i=0;i<n_constraints;i++) {
753         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
754         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
755         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
756         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
757         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
758       }
759       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
760     }
761     if (!pcbddc->switch_static) {
762       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
763       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
764       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
765       for (i=0;i<n_constraints;i++) {
766         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
767         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
768         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
769         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
770         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
771         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
772       }
773       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
774       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
775       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
776     } else {
777       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
778       pcbddc->local_auxmat2 = local_auxmat2_R;
779       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
780     }
781     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
782     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
783     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
784     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
785     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
786     if (isCHOL) {
787       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
788     } else {
789       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
790     }
791     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
792     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
793     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
794     ierr = MatDestroy(&M2);CHKERRQ(ierr);
795     ierr = MatDestroy(&M3);CHKERRQ(ierr);
796     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
797     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
798     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
799     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
800     ierr = MatDestroy(&M1);CHKERRQ(ierr);
801   }
802   /* Get submatrices from subdomain matrix */
803   if (n_vertices) {
804     IS is_aux;
805 
806     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
807       IS tis;
808 
809       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
810       ierr = ISSort(tis);CHKERRQ(ierr);
811       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
812       ierr = ISDestroy(&tis);CHKERRQ(ierr);
813     } else {
814       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
815     }
816     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
817     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
818     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
819     if (n_benign) {
820       IS       dummy;
821       PetscInt ii[2];
822 
823       ii[0] = 0;
824       ii[1] = pcbddc->B0_ncol;
825       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
826       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
827       ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
828       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
829       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
830     }
831     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
832   }
833 
834   /* Matrix of coarse basis functions (local) */
835   if (pcbddc->coarse_phi_B) {
836     PetscInt on_B,on_primal,on_D=n_D;
837     if (pcbddc->coarse_phi_D) {
838       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
839     }
840     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
841     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
842       PetscScalar *marray;
843 
844       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
845       ierr = PetscFree(marray);CHKERRQ(ierr);
846       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
847       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
848       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
849       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
850     }
851   }
852 
853   if (!pcbddc->coarse_phi_B) {
854     PetscScalar *marray;
855 
856     n = n_B*pcbddc->local_primal_size;
857     if (pcbddc->switch_static || pcbddc->dbg_flag) {
858       n += n_D*pcbddc->local_primal_size;
859     }
860     if (!pcbddc->symmetric_primal) {
861       n *= 2;
862     }
863     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
864     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
865     n = n_B*pcbddc->local_primal_size;
866     if (pcbddc->switch_static || pcbddc->dbg_flag) {
867       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
868       n += n_D*pcbddc->local_primal_size;
869     }
870     if (!pcbddc->symmetric_primal) {
871       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
872       if (pcbddc->switch_static || pcbddc->dbg_flag) {
873         n = n_B*pcbddc->local_primal_size;
874         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
875       }
876     } else {
877       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
878       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
879       if (pcbddc->switch_static || pcbddc->dbg_flag) {
880         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
881         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
882       }
883     }
884   }
885 
886   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
887   if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) {
888     const PetscInt *idxs;
889 
890     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
891     ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr);
892     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
893   }
894   /* vertices */
895   if (n_vertices) {
896 
897     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
898 
899     if (n_R) {
900       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
901       Mat          S_V_benign;
902       PetscBLASInt B_N,B_one = 1;
903       PetscScalar  *x,*y;
904       PetscBool    isseqaij;
905 
906       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
907       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
908       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
909       if (F) { /* TODO could be optimized for symmetric problems */
910         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
911       } else {
912         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
913         for (i=0;i<n_vertices;i++) {
914           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
915           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
916           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
917           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
918           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
919         }
920         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
921       }
922       if (n_benign) {
923         const PetscScalar *vals;
924         const PetscInt    *idxs;
925         PetscInt          n;
926 
927         ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr);
928         ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr);
929         ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr);
930         ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
931         ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
932         ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr);
933         ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr);
934         ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
935         ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr);
936         ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr);
937       }
938       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
939       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
940       if (n_constraints) {
941         Mat B;
942 
943         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
944         for (i=0;i<n_vertices;i++) {
945           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
946           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
947           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
949           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
950           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
951         }
952         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
953         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
954         ierr = MatDestroy(&B);CHKERRQ(ierr);
955         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
956         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
957         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
958         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
959         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
960         ierr = MatDestroy(&B);CHKERRQ(ierr);
961       }
962       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
963       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
964         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
965       }
966       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
967       if (n_benign) {
968         ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr);
969       }
970       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
971       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
972       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
973       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
974       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
975       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
976       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
977       if (!n_benign) {
978         ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
979       } else {
980         const PetscScalar *vals;
981         PetscScalar       val;
982         const PetscInt    *idxs;
983         PetscInt          n,j;
984 
985         /* Use explicit loops to populate the local coarse matrix */
986         ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr);
987         for (j=0;j<n_vertices;j++) {
988           for (i=0;i<n_vertices;i++) {
989             coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i];
990           }
991         }
992         ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr);
993         ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr);
994         ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
995         for (j=0;j<n_vertices;j++) {
996           coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j];
997           coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j];
998         }
999         ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1000         ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr);
1001         ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr);
1002         ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr);
1003         coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val;
1004       }
1005       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1006     } else {
1007       /* TODO benign case also here */
1008       if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented");
1009       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1010     }
1011     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1012     /* coarse basis functions */
1013     for (i=0;i<n_vertices;i++) {
1014       PetscScalar *y;
1015 
1016       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1017       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1018       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1019       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021       y[n_B*i+idx_V_B[i]] = 1.0;
1022       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1023       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1024 
1025       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1026         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1027         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1028         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1031         if (n_benign) y[n_D*i+p0_lidx_I] = 0.0;
1032         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1033       }
1034       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1035     }
1036     if (n_benign) {
1037       PetscScalar *y;
1038 
1039       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1040       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr);
1041       ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042       ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1044       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1045 
1046       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1047         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1048         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr);
1049         ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050         ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1052         y[n_D*n_vertices+p0_lidx_I] = 0.0;
1053         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1054       }
1055     }
1056     /* if n_R == 0 the object is not destroyed */
1057     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1058   }
1059 
1060   if (n_constraints) {
1061     Mat B;
1062 
1063     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1064     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1065     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1066     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1067     if (n_vertices) {
1068       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1069         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1070       } else {
1071         Mat S_VCt;
1072 
1073         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1074         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1075         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1076       }
1077     }
1078     ierr = MatDestroy(&B);CHKERRQ(ierr);
1079     /* coarse basis functions */
1080     for (i=0;i<n_constraints;i++) {
1081       PetscScalar *y;
1082 
1083       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1084       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1085       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1086       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1089       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1090       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1091         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1092         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1093         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1094         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1095         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1096         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1097       }
1098       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1099     }
1100   }
1101   if (n_constraints) {
1102     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1103   }
1104 
1105   if (n_benign) {
1106     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1107     ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1108     ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr);
1109   }
1110 
1111   /* compute other basis functions for non-symmetric problems */
1112   if (!pcbddc->symmetric_primal) {
1113 
1114     if (n_constraints) {
1115       Mat S_CCT,B_C;
1116 
1117       /* this is a lazy thing */
1118       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1119       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1120       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1121       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1122       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1123       if (n_vertices) {
1124         Mat B_V,S_VCT;
1125 
1126         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1127         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1128         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1129         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1130         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1131       }
1132       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1133     } else { /* if there are no constraints, reset work */
1134       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1135     }
1136     if (n_vertices && n_R) {
1137       Mat          A_VRT;
1138       PetscScalar  *marray;
1139       PetscBLASInt B_N,B_one = 1;
1140 
1141       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1142       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1143       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1144       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1145       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1146       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1147       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1148     }
1149 
1150     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1151       for (i=0;i<pcbddc->local_primal_size;i++) {
1152         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1153         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1154         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1155         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1156         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1157       }
1158     } else {
1159       for (i=0;i<pcbddc->local_primal_size;i++) {
1160         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1161         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1162         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1163         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1164         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1165       }
1166     }
1167     /* coarse basis functions */
1168     for (i=0;i<pcbddc->local_primal_size;i++) {
1169       PetscScalar *y;
1170 
1171       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1172       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1173       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1174       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176       if (i<n_vertices) {
1177         y[n_B*i+idx_V_B[i]] = 1.0;
1178       }
1179       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1180       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1181 
1182       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1183         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1184         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1185         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1186         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1187         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1188         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1189       }
1190       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1191     }
1192   }
1193   /* free memory */
1194   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1195   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1196   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1197   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1198   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1199   ierr = PetscFree(work);CHKERRQ(ierr);
1200   if (n_vertices) {
1201     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1202   }
1203   if (n_constraints) {
1204     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1205   }
1206   /* Checking coarse_sub_mat and coarse basis functios */
1207   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1208   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1209   if (pcbddc->dbg_flag) {
1210     Mat         coarse_sub_mat;
1211     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1212     Mat         coarse_phi_D,coarse_phi_B;
1213     Mat         coarse_psi_D,coarse_psi_B;
1214     Mat         A_II,A_BB,A_IB,A_BI;
1215     Mat         C_B,CPHI;
1216     IS          is_dummy;
1217     Vec         mones;
1218     MatType     checkmattype=MATSEQAIJ;
1219     PetscReal   real_value;
1220 
1221     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1222     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1223     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1224     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1225     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1226     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1227     if (unsymmetric_check) {
1228       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1229       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1230     }
1231     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1232 
1233     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1234     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1235     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1236     if (unsymmetric_check) {
1237       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1238       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1239       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1240       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1241       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1242       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1243       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1244       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1245       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1246       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1247       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1248       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1249     } else {
1250       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1251       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1252       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1253       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1254       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1255       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1256       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1257       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1258     }
1259     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1260     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1261     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1262     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1263     if (n_benign) {
1264       Mat B0_I,B0_B,B0_BPHI,B0_IPHI;
1265       PetscScalar *data,*data2;
1266 
1267       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr);
1268       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1269       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1270       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1271       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1272       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1273       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1274       for (i=0;i<pcbddc->local_primal_size;i++) {
1275         data[n_vertices*pcbddc->local_primal_size+i] += data2[i];
1276         data[i*pcbddc->local_primal_size+n_vertices] += data2[i];
1277       }
1278       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1279       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1280       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1281       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1282       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1283       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1284       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1285       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1286       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1287     }
1288 #if 0
1289   {
1290     PetscViewer viewer;
1291     char filename[256];
1292     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1293     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1294     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1295     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1296     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1297   }
1298 #endif
1299     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1300     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1301     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1302     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1303 
1304     /* check constraints */
1305     if (!n_benign) {
1306       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1307       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1308       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1309       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1310       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1311       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1312       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1313       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1314       if (unsymmetric_check) {
1315         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1316         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1317         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1318         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1319         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1320       }
1321       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1322       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1323       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1324       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1325     }
1326     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1327     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1328     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1329     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1330     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1331     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1332     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1333     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1334     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1335     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1336     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1337     if (unsymmetric_check) {
1338       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1339       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1340     }
1341     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1342   }
1343   ierr = MatDestroy(&B0);CHKERRQ(ierr);
1344   /* get back data */
1345   *coarse_submat_vals_n = coarse_submat_vals;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1351 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1352 {
1353   Mat            *work_mat;
1354   IS             isrow_s,iscol_s;
1355   PetscBool      rsorted,csorted;
1356   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1361   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1362   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1363   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1364 
1365   if (!rsorted) {
1366     const PetscInt *idxs;
1367     PetscInt *idxs_sorted,i;
1368 
1369     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1370     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1371     for (i=0;i<rsize;i++) {
1372       idxs_perm_r[i] = i;
1373     }
1374     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1375     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1376     for (i=0;i<rsize;i++) {
1377       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1378     }
1379     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1380     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1383     isrow_s = isrow;
1384   }
1385 
1386   if (!csorted) {
1387     if (isrow == iscol) {
1388       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1389       iscol_s = isrow_s;
1390     } else {
1391       const PetscInt *idxs;
1392       PetscInt *idxs_sorted,i;
1393 
1394       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1395       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1396       for (i=0;i<csize;i++) {
1397         idxs_perm_c[i] = i;
1398       }
1399       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1400       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1401       for (i=0;i<csize;i++) {
1402         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1403       }
1404       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1405       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1406     }
1407   } else {
1408     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1409     iscol_s = iscol;
1410   }
1411 
1412   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1413 
1414   if (!rsorted || !csorted) {
1415     Mat      new_mat;
1416     IS       is_perm_r,is_perm_c;
1417 
1418     if (!rsorted) {
1419       PetscInt *idxs_r,i;
1420       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1421       for (i=0;i<rsize;i++) {
1422         idxs_r[idxs_perm_r[i]] = i;
1423       }
1424       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1425       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1426     } else {
1427       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1428     }
1429     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1430 
1431     if (!csorted) {
1432       if (isrow_s == iscol_s) {
1433         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1434         is_perm_c = is_perm_r;
1435       } else {
1436         PetscInt *idxs_c,i;
1437         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1438         for (i=0;i<csize;i++) {
1439           idxs_c[idxs_perm_c[i]] = i;
1440         }
1441         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1442         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1443       }
1444     } else {
1445       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1446     }
1447     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1448 
1449     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1450     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1451     work_mat[0] = new_mat;
1452     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1453     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1454   }
1455 
1456   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1457   *B = work_mat[0];
1458   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1459   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1460   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1466 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1467 {
1468   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1469   PC_IS*         pcis = (PC_IS*)pc->data;
1470   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1471   Mat            new_mat;
1472   IS             is_local,is_global;
1473   PetscInt       local_size;
1474   PetscBool      isseqaij;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1479   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1480   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1481   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr);
1482   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1483   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1484   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1485 
1486   /* check */
1487   if (pcbddc->dbg_flag) {
1488     Vec       x,x_change;
1489     PetscReal error;
1490 
1491     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1492     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1493     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1494     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1497     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1498     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1499     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1500     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1501     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1502     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1503     ierr = VecDestroy(&x);CHKERRQ(ierr);
1504     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1505   }
1506 
1507   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1508   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1509   if (isseqaij) {
1510     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1511   } else {
1512     Mat work_mat;
1513     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1514     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1515     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1516   }
1517   if (matis->A->symmetric_set) {
1518     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1519 #if !defined(PETSC_USE_COMPLEX)
1520     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1521 #endif
1522   }
1523   /*
1524   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1525   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1526   */
1527   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1533 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1534 {
1535   PC_IS*          pcis = (PC_IS*)(pc->data);
1536   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1537   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1538   PetscInt        *idx_R_local=NULL;
1539   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1540   PetscInt        vbs,bs;
1541   PetscBT         bitmask=NULL;
1542   PetscErrorCode  ierr;
1543 
1544   PetscFunctionBegin;
1545   /*
1546     No need to setup local scatters if
1547       - primal space is unchanged
1548         AND
1549       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1550         AND
1551       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1552   */
1553   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1554     PetscFunctionReturn(0);
1555   }
1556   /* destroy old objects */
1557   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1558   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1559   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1560   /* Set Non-overlapping dimensions */
1561   n_B = pcis->n_B;
1562   n_D = pcis->n - n_B;
1563   n_vertices = pcbddc->n_vertices;
1564 
1565   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1566 
1567   /* create auxiliary bitmask and allocate workspace */
1568   if (!sub_schurs->reuse_mumps) {
1569     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1570     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1571     for (i=0;i<n_vertices;i++) {
1572       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1573     }
1574 
1575     for (i=0, n_R=0; i<pcis->n; i++) {
1576       if (!PetscBTLookup(bitmask,i)) {
1577         idx_R_local[n_R++] = i;
1578       }
1579     }
1580   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1581     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1582 
1583     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1584     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1585   }
1586 
1587   /* Block code */
1588   vbs = 1;
1589   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1590   if (bs>1 && !(n_vertices%bs)) {
1591     PetscBool is_blocked = PETSC_TRUE;
1592     PetscInt  *vary;
1593     if (!sub_schurs->reuse_mumps) {
1594       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1595       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1596       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1597       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1598       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1599       for (i=0; i<pcis->n/bs; i++) {
1600         if (vary[i]!=0 && vary[i]!=bs) {
1601           is_blocked = PETSC_FALSE;
1602           break;
1603         }
1604       }
1605       ierr = PetscFree(vary);CHKERRQ(ierr);
1606     } else {
1607       /* Verify directly the R set */
1608       for (i=0; i<n_R/bs; i++) {
1609         PetscInt j,node=idx_R_local[bs*i];
1610         for (j=1; j<bs; j++) {
1611           if (node != idx_R_local[bs*i+j]-j) {
1612             is_blocked = PETSC_FALSE;
1613             break;
1614           }
1615         }
1616       }
1617     }
1618     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1619       vbs = bs;
1620       for (i=0;i<n_R/vbs;i++) {
1621         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1622       }
1623     }
1624   }
1625   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1626   if (sub_schurs->reuse_mumps) {
1627     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1628 
1629     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1630     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1631     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1632     reuse_mumps->is_R = pcbddc->is_R_local;
1633   } else {
1634     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1635   }
1636 
1637   /* print some info if requested */
1638   if (pcbddc->dbg_flag) {
1639     PetscInt benign = 0;
1640 
1641     if (pcbddc->benign_p0_lidx >= 0) benign = 1;
1642     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1643     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1644     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1645     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1646     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1647     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1648     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1649   }
1650 
1651   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1652   if (!sub_schurs->reuse_mumps) {
1653     IS       is_aux1,is_aux2;
1654     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1655 
1656     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1657     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1658     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1659     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1660     for (i=0; i<n_D; i++) {
1661       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1662     }
1663     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1664     for (i=0, j=0; i<n_R; i++) {
1665       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1666         aux_array1[j++] = i;
1667       }
1668     }
1669     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1670     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1671     for (i=0, j=0; i<n_B; i++) {
1672       if (!PetscBTLookup(bitmask,is_indices[i])) {
1673         aux_array2[j++] = i;
1674       }
1675     }
1676     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1677     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1678     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1679     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1680     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1681 
1682     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1683       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1684       for (i=0, j=0; i<n_R; i++) {
1685         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1686           aux_array1[j++] = i;
1687         }
1688       }
1689       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1690       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1691       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1692     }
1693     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1694     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1695   } else {
1696     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1697     IS               tis;
1698     PetscInt         schur_size;
1699 
1700     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1701     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1702     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1703     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1704     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1705       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1706       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1707       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1708     }
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1716 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1717 {
1718   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1719   PC_IS          *pcis = (PC_IS*)pc->data;
1720   PC             pc_temp;
1721   Mat            A_RR;
1722   MatReuse       reuse;
1723   PetscScalar    m_one = -1.0;
1724   PetscReal      value;
1725   PetscInt       n_D,n_R;
1726   PetscBool      use_exact,use_exact_reduced,issbaij;
1727   PetscErrorCode ierr;
1728   /* prefixes stuff */
1729   char           dir_prefix[256],neu_prefix[256],str_level[16];
1730   size_t         len;
1731 
1732   PetscFunctionBegin;
1733 
1734   /* compute prefixes */
1735   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1736   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1737   if (!pcbddc->current_level) {
1738     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1739     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1740     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1741     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1742   } else {
1743     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1744     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1745     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1746     len -= 15; /* remove "pc_bddc_coarse_" */
1747     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1748     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1749     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1750     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1751     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1752     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1753     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1754     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1755   }
1756 
1757   /* DIRICHLET PROBLEM */
1758   if (dirichlet) {
1759     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1760     if (pcbddc->local_mat->symmetric_set) {
1761       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1762     }
1763     /* Matrix for Dirichlet problem is pcis->A_II */
1764     n_D = pcis->n - pcis->n_B;
1765     if (!pcbddc->ksp_D) { /* create object if not yet build */
1766       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1767       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1768       /* default */
1769       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1770       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1771       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1772       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1773       if (issbaij) {
1774         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1775       } else {
1776         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1777       }
1778       /* Allow user's customization */
1779       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1780       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1781     }
1782     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1783     if (sub_schurs->reuse_mumps) {
1784       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1785 
1786       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1787     }
1788     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1789     if (!n_D) {
1790       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1791       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1792     }
1793     /* Set Up KSP for Dirichlet problem of BDDC */
1794     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1795     /* set ksp_D into pcis data */
1796     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1797     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1798     pcis->ksp_D = pcbddc->ksp_D;
1799   }
1800 
1801   /* NEUMANN PROBLEM */
1802   A_RR = 0;
1803   if (neumann) {
1804     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1805     PetscInt        ibs,mbs;
1806     PetscBool       issbaij;
1807     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1808     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1809     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1810     if (pcbddc->ksp_R) { /* already created ksp */
1811       PetscInt nn_R;
1812       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1813       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1814       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1815       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1816         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1817         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1818         reuse = MAT_INITIAL_MATRIX;
1819       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1820         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1821           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1822           reuse = MAT_INITIAL_MATRIX;
1823         } else { /* safe to reuse the matrix */
1824           reuse = MAT_REUSE_MATRIX;
1825         }
1826       }
1827       /* last check */
1828       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1829         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1830         reuse = MAT_INITIAL_MATRIX;
1831       }
1832     } else { /* first time, so we need to create the matrix */
1833       reuse = MAT_INITIAL_MATRIX;
1834     }
1835     /* extract A_RR */
1836     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1837     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1838     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1839     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1840       if (matis->A == pcbddc->local_mat) {
1841         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1842         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1843       } else {
1844         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1845       }
1846     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1847       if (matis->A == pcbddc->local_mat) {
1848         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1849         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1850       } else {
1851         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1852       }
1853     }
1854     if (!sub_schurs->reuse_mumps) {
1855       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1856       if (pcbddc->local_mat->symmetric_set) {
1857         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1858       }
1859     } else {
1860       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1861 
1862       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1863       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1864       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1865     }
1866     if (!pcbddc->ksp_R) { /* create object if not present */
1867       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1868       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1869       /* default */
1870       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1871       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1872       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1873       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1874       if (issbaij) {
1875         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1876       } else {
1877         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1878       }
1879       /* Allow user's customization */
1880       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1881       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1882     }
1883     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1884     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1885     if (!n_R) {
1886       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1887       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1888     }
1889     /* Reuse MUMPS solver if it is present */
1890     if (sub_schurs->reuse_mumps) {
1891       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1892 
1893       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1894     }
1895     /* Set Up KSP for Neumann problem of BDDC */
1896     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1897   }
1898   /* free Neumann problem's matrix */
1899   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1900 
1901   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1902   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1903     if (pcbddc->dbg_flag) {
1904       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1905       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1906       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1907     }
1908     if (dirichlet) { /* Dirichlet */
1909       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1910       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1911       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1912       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1913       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1914       /* need to be adapted? */
1915       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1916       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1917       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1918       /* print info */
1919       if (pcbddc->dbg_flag) {
1920         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1921         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1922       }
1923       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1924         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1925       }
1926     }
1927     if (neumann) { /* Neumann */
1928       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1929       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1930       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1931       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1932       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1933       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1934       /* need to be adapted? */
1935       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1936       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1937       /* print info */
1938       if (pcbddc->dbg_flag) {
1939         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1940         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1941       }
1942       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1943         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1944       }
1945     }
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1952 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1953 {
1954   PetscErrorCode  ierr;
1955   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1956   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1957 
1958   PetscFunctionBegin;
1959   if (!sub_schurs->reuse_mumps) {
1960     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1961   }
1962   if (!pcbddc->switch_static) {
1963     if (applytranspose && pcbddc->local_auxmat1) {
1964       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1965       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1966     }
1967     if (!sub_schurs->reuse_mumps) {
1968       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1969       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1970     } else {
1971       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1972 
1973       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1974       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1975     }
1976   } else {
1977     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1978     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1979     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1980     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1981     if (applytranspose && pcbddc->local_auxmat1) {
1982       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1983       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1984       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1985       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1986     }
1987   }
1988   if (!sub_schurs->reuse_mumps) {
1989     if (applytranspose) {
1990       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1991     } else {
1992       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1993     }
1994 #if defined(PETSC_HAVE_MUMPS)
1995   } else {
1996     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1997 
1998     if (applytranspose) {
1999       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2000     } else {
2001       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2002     }
2003 #endif
2004   }
2005   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2006   if (!pcbddc->switch_static) {
2007     if (!sub_schurs->reuse_mumps) {
2008       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2009       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2010     } else {
2011       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2012 
2013       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2014       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2015     }
2016     if (!applytranspose && pcbddc->local_auxmat1) {
2017       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2018       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2019     }
2020   } else {
2021     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2022     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2023     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2024     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2025     if (!applytranspose && pcbddc->local_auxmat1) {
2026       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2027       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2028     }
2029     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2030     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2031     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2032     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2038 #undef __FUNCT__
2039 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2040 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2041 {
2042   PetscErrorCode ierr;
2043   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2044   PC_IS*            pcis = (PC_IS*)  (pc->data);
2045   const PetscScalar zero = 0.0;
2046 
2047   PetscFunctionBegin;
2048   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2049   if (applytranspose) {
2050     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2051     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2052   } else {
2053     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2054     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2055   }
2056 
2057   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2058   if (pcbddc->benign_p0_lidx >= 0) {
2059     PetscScalar *array;
2060 
2061     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2062     array[pcbddc->local_primal_size-1] += pcbddc->benign_p0;
2063     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2064   }
2065 
2066   /* start communications from local primal nodes to rhs of coarse solver */
2067   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2068   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2069   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2070 
2071   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2072   /* TODO remove null space when doing multilevel */
2073   if (pcbddc->coarse_ksp) {
2074     Vec rhs,sol;
2075 
2076     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2077     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2078     if (applytranspose) {
2079       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2080     } else {
2081       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2082     }
2083   }
2084 
2085   /* Local solution on R nodes */
2086   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2087     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2088   }
2089 
2090   /* communications from coarse sol to local primal nodes */
2091   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2092   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2093 
2094   /* Sum contributions from two levels */
2095   if (applytranspose) {
2096     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2097     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2098   } else {
2099     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2100     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2101   }
2102   /* store p0 */
2103   if (pcbddc->benign_p0_lidx >= 0) {
2104     PetscScalar *array;
2105 
2106     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2107     pcbddc->benign_p0 = array[pcbddc->local_primal_size-1];
2108     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2109   }
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2115 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2116 {
2117   PetscErrorCode ierr;
2118   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2119   PetscScalar    *array;
2120   Vec            from,to;
2121 
2122   PetscFunctionBegin;
2123   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2124     from = pcbddc->coarse_vec;
2125     to = pcbddc->vec1_P;
2126     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2127       Vec tvec;
2128 
2129       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2130       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2131       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2132       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2133       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2134       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2135     }
2136   } else { /* from local to global -> put data in coarse right hand side */
2137     from = pcbddc->vec1_P;
2138     to = pcbddc->coarse_vec;
2139   }
2140   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2146 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2147 {
2148   PetscErrorCode ierr;
2149   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2150   PetscScalar    *array;
2151   Vec            from,to;
2152 
2153   PetscFunctionBegin;
2154   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2155     from = pcbddc->coarse_vec;
2156     to = pcbddc->vec1_P;
2157   } else { /* from local to global -> put data in coarse right hand side */
2158     from = pcbddc->vec1_P;
2159     to = pcbddc->coarse_vec;
2160   }
2161   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2162   if (smode == SCATTER_FORWARD) {
2163     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2164       Vec tvec;
2165 
2166       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2167       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2168       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2169       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2170     }
2171   } else {
2172     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2173      ierr = VecResetArray(from);CHKERRQ(ierr);
2174     }
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /* uncomment for testing purposes */
2180 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2183 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2184 {
2185   PetscErrorCode    ierr;
2186   PC_IS*            pcis = (PC_IS*)(pc->data);
2187   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2188   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2189   /* one and zero */
2190   PetscScalar       one=1.0,zero=0.0;
2191   /* space to store constraints and their local indices */
2192   PetscScalar       *constraints_data;
2193   PetscInt          *constraints_idxs,*constraints_idxs_B;
2194   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2195   PetscInt          *constraints_n;
2196   /* iterators */
2197   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2198   /* BLAS integers */
2199   PetscBLASInt      lwork,lierr;
2200   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2201   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2202   /* reuse */
2203   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2204   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2205   /* change of basis */
2206   PetscBool         qr_needed;
2207   PetscBT           change_basis,qr_needed_idx;
2208   /* auxiliary stuff */
2209   PetscInt          *nnz,*is_indices;
2210   PetscInt          ncc;
2211   /* some quantities */
2212   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2213   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2214 
2215   PetscFunctionBegin;
2216   /* Destroy Mat objects computed previously */
2217   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2218   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2219   /* save info on constraints from previous setup (if any) */
2220   olocal_primal_size = pcbddc->local_primal_size;
2221   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2222   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2223   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2224   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2225   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2226   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2227 
2228   /* print some info */
2229   if (pcbddc->dbg_flag) {
2230     IS       vertices;
2231     PetscInt nv,nedges,nfaces;
2232     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2233     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2234     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2235     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2236     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2237     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2238     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2239     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2240     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2241   }
2242 
2243   if (!pcbddc->adaptive_selection) {
2244     IS           ISForVertices,*ISForFaces,*ISForEdges;
2245     MatNullSpace nearnullsp;
2246     const Vec    *nearnullvecs;
2247     Vec          *localnearnullsp;
2248     PetscScalar  *array;
2249     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2250     PetscBool    nnsp_has_cnst;
2251     /* LAPACK working arrays for SVD or POD */
2252     PetscBool    skip_lapack,boolforchange;
2253     PetscScalar  *work;
2254     PetscReal    *singular_vals;
2255 #if defined(PETSC_USE_COMPLEX)
2256     PetscReal    *rwork;
2257 #endif
2258 #if defined(PETSC_MISSING_LAPACK_GESVD)
2259     PetscScalar  *temp_basis,*correlation_mat;
2260 #else
2261     PetscBLASInt dummy_int=1;
2262     PetscScalar  dummy_scalar=1.;
2263 #endif
2264 
2265     /* Get index sets for faces, edges and vertices from graph */
2266     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2267     /* free unneeded index sets */
2268     if (!pcbddc->use_vertices) {
2269       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2270     }
2271     if (!pcbddc->use_edges) {
2272       for (i=0;i<n_ISForEdges;i++) {
2273         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2274       }
2275       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2276       n_ISForEdges = 0;
2277     }
2278     if (!pcbddc->use_faces) {
2279       for (i=0;i<n_ISForFaces;i++) {
2280         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2281       }
2282       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2283       n_ISForFaces = 0;
2284     }
2285 
2286 #if defined(PETSC_USE_DEBUG)
2287     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2288        Also use_change_of_basis should be consistent among processors */
2289     if (pcbddc->NullSpace) {
2290       PetscBool tbool[2],gbool[2];
2291 
2292       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2293         pcbddc->use_change_of_basis = PETSC_TRUE;
2294         if (!ISForEdges) {
2295           pcbddc->use_change_on_faces = PETSC_TRUE;
2296         }
2297       }
2298       tbool[0] = pcbddc->use_change_of_basis;
2299       tbool[1] = pcbddc->use_change_on_faces;
2300       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2301       pcbddc->use_change_of_basis = gbool[0];
2302       pcbddc->use_change_on_faces = gbool[1];
2303     }
2304 #endif
2305 
2306     /* check if near null space is attached to global mat */
2307     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2308     if (nearnullsp) {
2309       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2310       /* remove any stored info */
2311       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2312       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2313       /* store information for BDDC solver reuse */
2314       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2315       pcbddc->onearnullspace = nearnullsp;
2316       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2317       for (i=0;i<nnsp_size;i++) {
2318         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2319       }
2320     } else { /* if near null space is not provided BDDC uses constants by default */
2321       nnsp_size = 0;
2322       nnsp_has_cnst = PETSC_TRUE;
2323     }
2324     /* get max number of constraints on a single cc */
2325     max_constraints = nnsp_size;
2326     if (nnsp_has_cnst) max_constraints++;
2327 
2328     /*
2329          Evaluate maximum storage size needed by the procedure
2330          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2331          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2332          There can be multiple constraints per connected component
2333                                                                                                                                                            */
2334     n_vertices = 0;
2335     if (ISForVertices) {
2336       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2337     }
2338     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2339     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2340 
2341     total_counts = n_ISForFaces+n_ISForEdges;
2342     total_counts *= max_constraints;
2343     total_counts += n_vertices;
2344     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2345 
2346     total_counts = 0;
2347     max_size_of_constraint = 0;
2348     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2349       IS used_is;
2350       if (i<n_ISForEdges) {
2351         used_is = ISForEdges[i];
2352       } else {
2353         used_is = ISForFaces[i-n_ISForEdges];
2354       }
2355       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2356       total_counts += j;
2357       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2358     }
2359     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2360 
2361     /* get local part of global near null space vectors */
2362     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2363     for (k=0;k<nnsp_size;k++) {
2364       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2365       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2366       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2367     }
2368 
2369     /* whether or not to skip lapack calls */
2370     skip_lapack = PETSC_TRUE;
2371     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2372 
2373     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2374     if (!skip_lapack) {
2375       PetscScalar temp_work;
2376 
2377 #if defined(PETSC_MISSING_LAPACK_GESVD)
2378       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2379       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2380       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2381       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2382 #if defined(PETSC_USE_COMPLEX)
2383       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2384 #endif
2385       /* now we evaluate the optimal workspace using query with lwork=-1 */
2386       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2387       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2388       lwork = -1;
2389       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2390 #if !defined(PETSC_USE_COMPLEX)
2391       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2392 #else
2393       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2394 #endif
2395       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2396       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2397 #else /* on missing GESVD */
2398       /* SVD */
2399       PetscInt max_n,min_n;
2400       max_n = max_size_of_constraint;
2401       min_n = max_constraints;
2402       if (max_size_of_constraint < max_constraints) {
2403         min_n = max_size_of_constraint;
2404         max_n = max_constraints;
2405       }
2406       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2407 #if defined(PETSC_USE_COMPLEX)
2408       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2409 #endif
2410       /* now we evaluate the optimal workspace using query with lwork=-1 */
2411       lwork = -1;
2412       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2413       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2414       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2415       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2416 #if !defined(PETSC_USE_COMPLEX)
2417       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2418 #else
2419       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2420 #endif
2421       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2422       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2423 #endif /* on missing GESVD */
2424       /* Allocate optimal workspace */
2425       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2426       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2427     }
2428     /* Now we can loop on constraining sets */
2429     total_counts = 0;
2430     constraints_idxs_ptr[0] = 0;
2431     constraints_data_ptr[0] = 0;
2432     /* vertices */
2433     if (n_vertices) {
2434       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2435       if (nnsp_has_cnst) { /* it considers all possible vertices */
2436         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2437         for (i=0;i<n_vertices;i++) {
2438           constraints_n[total_counts] = 1;
2439           constraints_data[total_counts] = 1.0;
2440           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2441           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2442           total_counts++;
2443         }
2444       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2445         PetscBool used_vertex;
2446         for (i=0;i<n_vertices;i++) {
2447           used_vertex = PETSC_FALSE;
2448           k = 0;
2449           while (!used_vertex && k<nnsp_size) {
2450             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2451             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2452               constraints_n[total_counts] = 1;
2453               constraints_idxs[total_counts] = is_indices[i];
2454               constraints_data[total_counts] = 1.0;
2455               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2456               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2457               total_counts++;
2458               used_vertex = PETSC_TRUE;
2459             }
2460             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2461             k++;
2462           }
2463         }
2464       }
2465       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2466       n_vertices = total_counts;
2467     }
2468 
2469     /* edges and faces */
2470     total_counts_cc = total_counts;
2471     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2472       IS        used_is;
2473       PetscBool idxs_copied = PETSC_FALSE;
2474 
2475       if (ncc<n_ISForEdges) {
2476         used_is = ISForEdges[ncc];
2477         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2478       } else {
2479         used_is = ISForFaces[ncc-n_ISForEdges];
2480         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2481       }
2482       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2483 
2484       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2485       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2486       /* change of basis should not be performed on local periodic nodes */
2487       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2488       if (nnsp_has_cnst) {
2489         PetscScalar quad_value;
2490 
2491         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2492         idxs_copied = PETSC_TRUE;
2493 
2494         if (!pcbddc->use_nnsp_true) {
2495           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2496         } else {
2497           quad_value = 1.0;
2498         }
2499         for (j=0;j<size_of_constraint;j++) {
2500           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2501         }
2502         temp_constraints++;
2503         total_counts++;
2504       }
2505       for (k=0;k<nnsp_size;k++) {
2506         PetscReal real_value;
2507         PetscScalar *ptr_to_data;
2508 
2509         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2510         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2511         for (j=0;j<size_of_constraint;j++) {
2512           ptr_to_data[j] = array[is_indices[j]];
2513         }
2514         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2515         /* check if array is null on the connected component */
2516         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2517         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2518         if (real_value > 0.0) { /* keep indices and values */
2519           temp_constraints++;
2520           total_counts++;
2521           if (!idxs_copied) {
2522             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2523             idxs_copied = PETSC_TRUE;
2524           }
2525         }
2526       }
2527       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2528       valid_constraints = temp_constraints;
2529       if (!pcbddc->use_nnsp_true && temp_constraints) {
2530         if (temp_constraints == 1) { /* just normalize the constraint */
2531           PetscScalar norm,*ptr_to_data;
2532 
2533           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2535           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2536           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2537           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2538         } else { /* perform SVD */
2539           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2540           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2541 
2542 #if defined(PETSC_MISSING_LAPACK_GESVD)
2543           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2544              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2545              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2546                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2547                 from that computed using LAPACKgesvd
2548              -> This is due to a different computation of eigenvectors in LAPACKheev
2549              -> The quality of the POD-computed basis will be the same */
2550           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2551           /* Store upper triangular part of correlation matrix */
2552           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2553           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2554           for (j=0;j<temp_constraints;j++) {
2555             for (k=0;k<j+1;k++) {
2556               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2557             }
2558           }
2559           /* compute eigenvalues and eigenvectors of correlation matrix */
2560           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2561           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2562 #if !defined(PETSC_USE_COMPLEX)
2563           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2564 #else
2565           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2566 #endif
2567           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2568           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2569           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2570           j = 0;
2571           while (j < temp_constraints && singular_vals[j] < tol) j++;
2572           total_counts = total_counts-j;
2573           valid_constraints = temp_constraints-j;
2574           /* scale and copy POD basis into used quadrature memory */
2575           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2576           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2577           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2578           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2579           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2580           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2581           if (j<temp_constraints) {
2582             PetscInt ii;
2583             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2584             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2585             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2586             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2587             for (k=0;k<temp_constraints-j;k++) {
2588               for (ii=0;ii<size_of_constraint;ii++) {
2589                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2590               }
2591             }
2592           }
2593 #else  /* on missing GESVD */
2594           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2595           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2596           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2597           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2598 #if !defined(PETSC_USE_COMPLEX)
2599           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2600 #else
2601           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2602 #endif
2603           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2604           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2605           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2606           k = temp_constraints;
2607           if (k > size_of_constraint) k = size_of_constraint;
2608           j = 0;
2609           while (j < k && singular_vals[k-j-1] < tol) j++;
2610           valid_constraints = k-j;
2611           total_counts = total_counts-temp_constraints+valid_constraints;
2612 #endif /* on missing GESVD */
2613         }
2614       }
2615       /* update pointers information */
2616       if (valid_constraints) {
2617         constraints_n[total_counts_cc] = valid_constraints;
2618         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2619         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2620         /* set change_of_basis flag */
2621         if (boolforchange) {
2622           PetscBTSet(change_basis,total_counts_cc);
2623         }
2624         total_counts_cc++;
2625       }
2626     }
2627     /* free workspace */
2628     if (!skip_lapack) {
2629       ierr = PetscFree(work);CHKERRQ(ierr);
2630 #if defined(PETSC_USE_COMPLEX)
2631       ierr = PetscFree(rwork);CHKERRQ(ierr);
2632 #endif
2633       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2634 #if defined(PETSC_MISSING_LAPACK_GESVD)
2635       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2636       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2637 #endif
2638     }
2639     for (k=0;k<nnsp_size;k++) {
2640       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2641     }
2642     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2643     /* free index sets of faces, edges and vertices */
2644     for (i=0;i<n_ISForFaces;i++) {
2645       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2646     }
2647     if (n_ISForFaces) {
2648       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2649     }
2650     for (i=0;i<n_ISForEdges;i++) {
2651       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2652     }
2653     if (n_ISForEdges) {
2654       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2655     }
2656     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2657   } else {
2658     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2659 
2660     total_counts = 0;
2661     n_vertices = 0;
2662     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2663       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2664     }
2665     max_constraints = 0;
2666     total_counts_cc = 0;
2667     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2668       total_counts += pcbddc->adaptive_constraints_n[i];
2669       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2670       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2671     }
2672     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2673     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2674     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2675     constraints_data = pcbddc->adaptive_constraints_data;
2676     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2677     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2678     total_counts_cc = 0;
2679     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2680       if (pcbddc->adaptive_constraints_n[i]) {
2681         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2682       }
2683     }
2684 #if 0
2685     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2686     for (i=0;i<total_counts_cc;i++) {
2687       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2688       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2689       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2690         printf(" %d",constraints_idxs[j]);
2691       }
2692       printf("\n");
2693       printf("number of cc: %d\n",constraints_n[i]);
2694     }
2695     for (i=0;i<n_vertices;i++) {
2696       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2697     }
2698     for (i=0;i<sub_schurs->n_subs;i++) {
2699       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2700     }
2701 #endif
2702 
2703     max_size_of_constraint = 0;
2704     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2705     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2706     /* Change of basis */
2707     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2708     if (pcbddc->use_change_of_basis) {
2709       for (i=0;i<sub_schurs->n_subs;i++) {
2710         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2711           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2712         }
2713       }
2714     }
2715   }
2716   pcbddc->local_primal_size = total_counts;
2717   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2718   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2719 
2720   /* map constraints_idxs in boundary numbering */
2721   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2722   if (i != constraints_idxs_ptr[total_counts_cc]) {
2723     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2724   }
2725 
2726   /* Create constraint matrix */
2727   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2728   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2729   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2730 
2731   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2732   /* determine if a QR strategy is needed for change of basis */
2733   qr_needed = PETSC_FALSE;
2734   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2735   total_primal_vertices=0;
2736   pcbddc->local_primal_size_cc = 0;
2737   for (i=0;i<total_counts_cc;i++) {
2738     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2739     if (size_of_constraint == 1) {
2740       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2741       pcbddc->local_primal_size_cc += 1;
2742     } else if (PetscBTLookup(change_basis,i)) {
2743       for (k=0;k<constraints_n[i];k++) {
2744         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2745       }
2746       pcbddc->local_primal_size_cc += constraints_n[i];
2747       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2748         PetscBTSet(qr_needed_idx,i);
2749         qr_needed = PETSC_TRUE;
2750       }
2751     } else {
2752       pcbddc->local_primal_size_cc += 1;
2753     }
2754   }
2755   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2756   pcbddc->n_vertices = total_primal_vertices;
2757   /* permute indices in order to have a sorted set of vertices */
2758   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2759 
2760   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2761   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2762   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2763   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2764 
2765   /* nonzero structure of constraint matrix */
2766   /* and get reference dof for local constraints */
2767   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2768   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2769 
2770   j = total_primal_vertices;
2771   total_counts = total_primal_vertices;
2772   cum = total_primal_vertices;
2773   for (i=n_vertices;i<total_counts_cc;i++) {
2774     if (!PetscBTLookup(change_basis,i)) {
2775       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2776       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2777       cum++;
2778       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2779       for (k=0;k<constraints_n[i];k++) {
2780         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2781         nnz[j+k] = size_of_constraint;
2782       }
2783       j += constraints_n[i];
2784     }
2785   }
2786   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2787   ierr = PetscFree(nnz);CHKERRQ(ierr);
2788 
2789   /* set values in constraint matrix */
2790   for (i=0;i<total_primal_vertices;i++) {
2791     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2792   }
2793   total_counts = total_primal_vertices;
2794   for (i=n_vertices;i<total_counts_cc;i++) {
2795     if (!PetscBTLookup(change_basis,i)) {
2796       PetscInt *cols;
2797 
2798       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2799       cols = constraints_idxs+constraints_idxs_ptr[i];
2800       for (k=0;k<constraints_n[i];k++) {
2801         PetscInt    row = total_counts+k;
2802         PetscScalar *vals;
2803 
2804         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2805         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2806       }
2807       total_counts += constraints_n[i];
2808     }
2809   }
2810   /* assembling */
2811   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2813 
2814   /*
2815   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2816   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2817   */
2818   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2819   if (pcbddc->use_change_of_basis) {
2820     /* dual and primal dofs on a single cc */
2821     PetscInt     dual_dofs,primal_dofs;
2822     /* working stuff for GEQRF */
2823     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2824     PetscBLASInt lqr_work;
2825     /* working stuff for UNGQR */
2826     PetscScalar  *gqr_work,lgqr_work_t;
2827     PetscBLASInt lgqr_work;
2828     /* working stuff for TRTRS */
2829     PetscScalar  *trs_rhs;
2830     PetscBLASInt Blas_NRHS;
2831     /* pointers for values insertion into change of basis matrix */
2832     PetscInt     *start_rows,*start_cols;
2833     PetscScalar  *start_vals;
2834     /* working stuff for values insertion */
2835     PetscBT      is_primal;
2836     PetscInt     *aux_primal_numbering_B;
2837     /* matrix sizes */
2838     PetscInt     global_size,local_size;
2839     /* temporary change of basis */
2840     Mat          localChangeOfBasisMatrix;
2841     /* extra space for debugging */
2842     PetscScalar  *dbg_work;
2843 
2844     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2845     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2846     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2847     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2848     /* nonzeros for local mat */
2849     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2850     for (i=0;i<pcis->n;i++) nnz[i]=1;
2851     for (i=n_vertices;i<total_counts_cc;i++) {
2852       if (PetscBTLookup(change_basis,i)) {
2853         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2854         if (PetscBTLookup(qr_needed_idx,i)) {
2855           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2856         } else {
2857           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2858           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2859         }
2860       }
2861     }
2862     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2863     ierr = PetscFree(nnz);CHKERRQ(ierr);
2864     /* Set initial identity in the matrix */
2865     for (i=0;i<pcis->n;i++) {
2866       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2867     }
2868 
2869     if (pcbddc->dbg_flag) {
2870       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2871       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2872     }
2873 
2874 
2875     /* Now we loop on the constraints which need a change of basis */
2876     /*
2877        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2878        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2879 
2880        Basic blocks of change of basis matrix T computed by
2881 
2882           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2883 
2884             | 1        0   ...        0         s_1/S |
2885             | 0        1   ...        0         s_2/S |
2886             |              ...                        |
2887             | 0        ...            1     s_{n-1}/S |
2888             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2889 
2890             with S = \sum_{i=1}^n s_i^2
2891             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2892                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2893 
2894           - QR decomposition of constraints otherwise
2895     */
2896     if (qr_needed) {
2897       /* space to store Q */
2898       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2899       /* first we issue queries for optimal work */
2900       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2901       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2902       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2903       lqr_work = -1;
2904       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2905       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2906       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2907       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2908       lgqr_work = -1;
2909       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2910       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2911       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2912       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2913       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2914       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2915       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2916       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2917       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2918       /* array to store scaling factors for reflectors */
2919       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2920       /* array to store rhs and solution of triangular solver */
2921       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2922       /* allocating workspace for check */
2923       if (pcbddc->dbg_flag) {
2924         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2925       }
2926     }
2927     /* array to store whether a node is primal or not */
2928     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2929     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2930     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2931     if (i != total_primal_vertices) {
2932       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2933     }
2934     for (i=0;i<total_primal_vertices;i++) {
2935       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2936     }
2937     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2938 
2939     /* loop on constraints and see whether or not they need a change of basis and compute it */
2940     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2941       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2942       if (PetscBTLookup(change_basis,total_counts)) {
2943         /* get constraint info */
2944         primal_dofs = constraints_n[total_counts];
2945         dual_dofs = size_of_constraint-primal_dofs;
2946 
2947         if (pcbddc->dbg_flag) {
2948           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2949         }
2950 
2951         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2952 
2953           /* copy quadrature constraints for change of basis check */
2954           if (pcbddc->dbg_flag) {
2955             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2956           }
2957           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2958           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2959 
2960           /* compute QR decomposition of constraints */
2961           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2962           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2963           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2964           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2965           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2966           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2967           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2968 
2969           /* explictly compute R^-T */
2970           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2971           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2972           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2973           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2974           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2975           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2976           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2977           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2978           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2979           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2980 
2981           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2982           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2983           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2984           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2985           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2986           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2987           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2988           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2989           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2990 
2991           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2992              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2993              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2994           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2995           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2996           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2997           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2998           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2999           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3000           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3001           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3002           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3003           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3004 
3005           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3006           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3007           /* insert cols for primal dofs */
3008           for (j=0;j<primal_dofs;j++) {
3009             start_vals = &qr_basis[j*size_of_constraint];
3010             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3011             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3012           }
3013           /* insert cols for dual dofs */
3014           for (j=0,k=0;j<dual_dofs;k++) {
3015             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3016               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3017               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3018               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3019               j++;
3020             }
3021           }
3022 
3023           /* check change of basis */
3024           if (pcbddc->dbg_flag) {
3025             PetscInt   ii,jj;
3026             PetscBool valid_qr=PETSC_TRUE;
3027             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3028             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3029             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3030             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3031             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3032             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3033             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3034             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3035             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3036             for (jj=0;jj<size_of_constraint;jj++) {
3037               for (ii=0;ii<primal_dofs;ii++) {
3038                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3039                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3040               }
3041             }
3042             if (!valid_qr) {
3043               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3044               for (jj=0;jj<size_of_constraint;jj++) {
3045                 for (ii=0;ii<primal_dofs;ii++) {
3046                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3047                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3048                   }
3049                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3050                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3051                   }
3052                 }
3053               }
3054             } else {
3055               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3056             }
3057           }
3058         } else { /* simple transformation block */
3059           PetscInt    row,col;
3060           PetscScalar val,norm;
3061 
3062           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3063           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3064           for (j=0;j<size_of_constraint;j++) {
3065             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3066             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3067             if (!PetscBTLookup(is_primal,row_B)) {
3068               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3069               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3070               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3071             } else {
3072               for (k=0;k<size_of_constraint;k++) {
3073                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3074                 if (row != col) {
3075                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3076                 } else {
3077                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3078                 }
3079                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3080               }
3081             }
3082           }
3083           if (pcbddc->dbg_flag) {
3084             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3085           }
3086         }
3087       } else {
3088         if (pcbddc->dbg_flag) {
3089           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3090         }
3091       }
3092     }
3093 
3094     /* free workspace */
3095     if (qr_needed) {
3096       if (pcbddc->dbg_flag) {
3097         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3098       }
3099       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3100       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3101       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3102       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3103       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3104     }
3105     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3106     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3107     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3108 
3109     /* assembling of global change of variable */
3110     {
3111       Mat      tmat;
3112       PetscInt bs;
3113 
3114       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3115       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3116       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3117       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3118       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3119       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3120       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3121       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3122       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3123       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3124       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3125       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3126       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3127       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3128       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3129       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3130       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3131       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3132     }
3133     /* check */
3134     if (pcbddc->dbg_flag) {
3135       PetscReal error;
3136       Vec       x,x_change;
3137 
3138       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3139       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3140       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3141       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3142       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3143       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3144       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3145       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3146       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3147       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3148       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3149       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3150       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3152       ierr = VecDestroy(&x);CHKERRQ(ierr);
3153       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3154     }
3155 
3156     /* adapt sub_schurs computed (if any) */
3157     if (pcbddc->use_deluxe_scaling) {
3158       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3159       if (sub_schurs->S_Ej_all) {
3160         Mat S_new,tmat;
3161         IS is_all_N;
3162 
3163         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3164         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3165         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3166         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3167         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3168         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3169         sub_schurs->S_Ej_all = S_new;
3170         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3171         if (sub_schurs->sum_S_Ej_all) {
3172           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3173           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3174           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3175           sub_schurs->sum_S_Ej_all = S_new;
3176           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3177         }
3178         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3179       }
3180     }
3181     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3182   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3183     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3184     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3185   }
3186 
3187   /* set up change of basis context */
3188   if (pcbddc->ChangeOfBasisMatrix) {
3189     PCBDDCChange_ctx change_ctx;
3190 
3191     if (!pcbddc->new_global_mat) {
3192       PetscInt global_size,local_size;
3193 
3194       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3195       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3196       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3197       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3198       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3199       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3200       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3201       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3202       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3203     } else {
3204       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3205       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3206       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3207     }
3208     if (!pcbddc->user_ChangeOfBasisMatrix) {
3209       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3210       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3211     } else {
3212       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3213       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3214     }
3215     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3216     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3217     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3218     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3219   }
3220 
3221   /* add pressure dof to set of primal nodes for numbering purposes */
3222   if (pcbddc->benign_p0_lidx >= 0) {
3223     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx;
3224     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx;
3225     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3226     pcbddc->local_primal_size_cc++;
3227     pcbddc->local_primal_size++;
3228   }
3229 
3230   /* check if a new primal space has been introduced (also take into account benign trick) */
3231   pcbddc->new_primal_space_local = PETSC_TRUE;
3232   if (olocal_primal_size == pcbddc->local_primal_size) {
3233     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3234     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3235     if (!pcbddc->new_primal_space_local) {
3236       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3237       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3238     }
3239   }
3240   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3241   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3242   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3243 
3244   /* flush dbg viewer */
3245   if (pcbddc->dbg_flag) {
3246     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3247   }
3248 
3249   /* free workspace */
3250   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3251   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3252   if (!pcbddc->adaptive_selection) {
3253     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3254     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3255   } else {
3256     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3257                       pcbddc->adaptive_constraints_idxs_ptr,
3258                       pcbddc->adaptive_constraints_data_ptr,
3259                       pcbddc->adaptive_constraints_idxs,
3260                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3261     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3262     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3263   }
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3269 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3270 {
3271   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3272   PC_IS       *pcis = (PC_IS*)pc->data;
3273   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3274   PetscInt    ierr,i,vertex_size,N;
3275   PetscViewer viewer=pcbddc->dbg_viewer;
3276 
3277   PetscFunctionBegin;
3278   /* Reset previously computed graph */
3279   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3280   /* Init local Graph struct */
3281   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3282   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3283 
3284   /* Check validity of the csr graph passed in by the user */
3285   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3286     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3287   }
3288 
3289   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3290   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3291     PetscInt  *xadj,*adjncy;
3292     PetscInt  nvtxs;
3293     PetscBool flg_row=PETSC_FALSE;
3294 
3295     if (pcbddc->use_local_adj) {
3296 
3297       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3298       if (flg_row) {
3299         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3300         pcbddc->computed_rowadj = PETSC_TRUE;
3301       }
3302       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3303     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3304       IS                     is_dummy;
3305       ISLocalToGlobalMapping l2gmap_dummy;
3306       PetscInt               j,sum;
3307       PetscInt               *cxadj,*cadjncy;
3308       const PetscInt         *idxs;
3309       PCBDDCGraph            graph;
3310       PetscBT                is_on_boundary;
3311 
3312       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3313       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3314       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3315       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3316       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3317       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3318       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3319       if (flg_row) {
3320         graph->xadj = xadj;
3321         graph->adjncy = adjncy;
3322       }
3323       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3324       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3325       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3326 
3327       if (pcbddc->dbg_flag) {
3328         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3329         for (i=0;i<graph->ncc;i++) {
3330           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3331         }
3332       }
3333 
3334       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3335       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3336       for (i=0;i<pcis->n_B;i++) {
3337         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3338       }
3339       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3340 
3341       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3342       sum = 0;
3343       for (i=0;i<graph->ncc;i++) {
3344         PetscInt sizecc = 0;
3345         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3346           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3347             sizecc++;
3348           }
3349         }
3350         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3351           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3352             cxadj[graph->queue[j]] = sizecc;
3353           }
3354         }
3355         sum += sizecc*sizecc;
3356       }
3357       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3358       sum = 0;
3359       for (i=0;i<pcis->n;i++) {
3360         PetscInt temp = cxadj[i];
3361         cxadj[i] = sum;
3362         sum += temp;
3363       }
3364       cxadj[pcis->n] = sum;
3365       for (i=0;i<graph->ncc;i++) {
3366         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3367           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3368             PetscInt k,sizecc = 0;
3369             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3370               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3371                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3372                 sizecc++;
3373               }
3374             }
3375           }
3376         }
3377       }
3378       if (sum) {
3379         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3380       } else {
3381         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3382         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3383       }
3384       graph->xadj = 0;
3385       graph->adjncy = 0;
3386       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3387       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3388     }
3389   }
3390   if (pcbddc->dbg_flag) {
3391     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3392   }
3393 
3394   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3395   vertex_size = 1;
3396   if (pcbddc->user_provided_isfordofs) {
3397     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3398       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3399       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3400         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3401         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3402       }
3403       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3404       pcbddc->n_ISForDofs = 0;
3405       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3406     }
3407     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3408     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3409   } else {
3410     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3411       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3412       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3413       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3414         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3415       }
3416     }
3417   }
3418 
3419   /* Setup of Graph */
3420   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3421     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3422   }
3423   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3424     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3425   }
3426   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3427 
3428   /* Graph's connected components analysis */
3429   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3430 
3431   /* print some info to stdout */
3432   if (pcbddc->dbg_flag) {
3433     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3434   }
3435 
3436   /* mark topography has done */
3437   pcbddc->recompute_topography = PETSC_FALSE;
3438   PetscFunctionReturn(0);
3439 }
3440 
3441 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3442 #undef __FUNCT__
3443 #define __FUNCT__ "PCBDDCSubsetNumbering"
3444 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3445 {
3446   PetscSF        sf;
3447   PetscLayout    map;
3448   const PetscInt *idxs;
3449   PetscInt       *leaf_data,*root_data,*gidxs;
3450   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3451   PetscInt       n_n,nlocals,start,first_index;
3452   PetscMPIInt    commsize;
3453   PetscBool      first_found;
3454   PetscErrorCode ierr;
3455 
3456   PetscFunctionBegin;
3457   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3458   if (subset_mult) {
3459     PetscCheckSameComm(subset,1,subset_mult,2);
3460     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3461     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3462   }
3463   /* create workspace layout for computing global indices of subset */
3464   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3465   lbounds[0] = lbounds[1] = 0;
3466   for (i=0;i<n;i++) {
3467     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3468     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3469   }
3470   lbounds[0] = -lbounds[0];
3471   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3472   gbounds[0] = -gbounds[0];
3473   N = gbounds[1] - gbounds[0] + 1;
3474   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3475   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3476   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3477   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3478   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3479 
3480   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3481   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3482   if (subset_mult) {
3483     const PetscInt* idxs_mult;
3484 
3485     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3486     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3487     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3488   } else {
3489     for (i=0;i<n;i++) leaf_data[i] = 1;
3490   }
3491   /* local size of new subset */
3492   n_n = 0;
3493   for (i=0;i<n;i++) n_n += leaf_data[i];
3494 
3495   /* global indexes in layout */
3496   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3497   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3498   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3499   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3500   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3501   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3502 
3503   /* reduce from leaves to roots */
3504   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3505   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3506   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3507 
3508   /* count indexes in local part of layout */
3509   nlocals = 0;
3510   first_index = -1;
3511   first_found = PETSC_FALSE;
3512   for (i=0;i<Nl;i++) {
3513     if (!first_found && root_data[i]) {
3514       first_found = PETSC_TRUE;
3515       first_index = i;
3516     }
3517     nlocals += root_data[i];
3518   }
3519 
3520   /* cumulative of number of indexes and size of subset without holes */
3521 #if defined(PETSC_HAVE_MPI_EXSCAN)
3522   start = 0;
3523   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3524 #else
3525   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3526   start = start-nlocals;
3527 #endif
3528 
3529   if (N_n) { /* compute total size of new subset if requested */
3530     *N_n = start + nlocals;
3531     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3532     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3533   }
3534 
3535   /* adapt root data with cumulative */
3536   if (first_found) {
3537     PetscInt old_index;
3538 
3539     root_data[first_index] += start;
3540     old_index = first_index;
3541     for (i=first_index+1;i<Nl;i++) {
3542       if (root_data[i]) {
3543         root_data[i] += root_data[old_index];
3544         old_index = i;
3545       }
3546     }
3547   }
3548 
3549   /* from roots to leaves */
3550   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3551   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3552   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3553 
3554   /* create new IS with global indexes without holes */
3555   if (subset_mult) {
3556     const PetscInt* idxs_mult;
3557     PetscInt        cum;
3558 
3559     cum = 0;
3560     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3561     for (i=0;i<n;i++) {
3562       PetscInt j;
3563       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3564     }
3565     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3566   } else {
3567     for (i=0;i<n;i++) {
3568       gidxs[i] = leaf_data[i]-1;
3569     }
3570   }
3571   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3572   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 #undef __FUNCT__
3577 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3578 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3579 {
3580   PetscInt       i,j;
3581   PetscScalar    *alphas;
3582   PetscErrorCode ierr;
3583 
3584   PetscFunctionBegin;
3585   /* this implements stabilized Gram-Schmidt */
3586   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3587   for (i=0;i<n;i++) {
3588     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3589     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3590     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3591   }
3592   ierr = PetscFree(alphas);CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 #undef __FUNCT__
3597 #define __FUNCT__ "MatISGetSubassemblingPattern"
3598 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3599 {
3600   IS             ranks_send_to;
3601   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3602   PetscMPIInt    size,rank,color;
3603   PetscInt       *xadj,*adjncy;
3604   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3605   PetscInt       i,local_size,threshold=0;
3606   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3607   PetscSubcomm   subcomm;
3608   PetscErrorCode ierr;
3609 
3610   PetscFunctionBegin;
3611   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3612   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3613   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3614 
3615   /* Get info on mapping */
3616   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3617   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3618 
3619   /* build local CSR graph of subdomains' connectivity */
3620   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3621   xadj[0] = 0;
3622   xadj[1] = PetscMax(n_neighs-1,0);
3623   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3625 
3626   if (threshold) {
3627     PetscInt xadj_count = 0;
3628     for (i=1;i<n_neighs;i++) {
3629       if (n_shared[i] > threshold) {
3630         adjncy[xadj_count] = neighs[i];
3631         adjncy_wgt[xadj_count] = n_shared[i];
3632         xadj_count++;
3633       }
3634     }
3635     xadj[1] = xadj_count;
3636   } else {
3637     if (xadj[1]) {
3638       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3639       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3640     }
3641   }
3642   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3643   if (use_square) {
3644     for (i=0;i<xadj[1];i++) {
3645       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3646     }
3647   }
3648   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3649 
3650   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3651 
3652   /*
3653     Restrict work on active processes only.
3654   */
3655   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3656   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3657   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3658   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3659   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3660   if (color) {
3661     ierr = PetscFree(xadj);CHKERRQ(ierr);
3662     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3663     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3664   } else {
3665     Mat             subdomain_adj;
3666     IS              new_ranks,new_ranks_contig;
3667     MatPartitioning partitioner;
3668     PetscInt        prank,rstart=0,rend=0;
3669     PetscInt        *is_indices,*oldranks;
3670     PetscBool       aggregate;
3671 
3672     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3673     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3674     prank = rank;
3675     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3676     /*
3677     for (i=0;i<size;i++) {
3678       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3679     }
3680     */
3681     for (i=0;i<xadj[1];i++) {
3682       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3683     }
3684     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3685     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3686     if (aggregate) {
3687       PetscInt    lrows,row,ncols,*cols;
3688       PetscMPIInt nrank;
3689       PetscScalar *vals;
3690 
3691       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3692       lrows = 0;
3693       if (nrank<redprocs) {
3694         lrows = size/redprocs;
3695         if (nrank<size%redprocs) lrows++;
3696       }
3697       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3698       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3699       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3700       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3701       row = nrank;
3702       ncols = xadj[1]-xadj[0];
3703       cols = adjncy;
3704       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3705       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3706       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3707       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709       ierr = PetscFree(xadj);CHKERRQ(ierr);
3710       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3711       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3712       ierr = PetscFree(vals);CHKERRQ(ierr);
3713     } else {
3714       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3715     }
3716     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3717 
3718     /* Partition */
3719     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3720     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3721     if (use_vwgt) {
3722       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3723       v_wgt[0] = local_size;
3724       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3725     }
3726     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3727     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3728     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3729     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3730     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3731 
3732     /* renumber new_ranks to avoid "holes" in new set of processors */
3733     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3734     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3735     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3736     if (!redprocs) {
3737       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3738     } else {
3739       PetscInt    idxs[1];
3740       PetscMPIInt tag;
3741       MPI_Request *reqs;
3742 
3743       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3744       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3745       for (i=rstart;i<rend;i++) {
3746         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3747       }
3748       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3749       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3750       ierr = PetscFree(reqs);CHKERRQ(ierr);
3751       ranks_send_to_idx[0] = oldranks[idxs[0]];
3752     }
3753     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3754     /* clean up */
3755     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3756     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3757     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3758     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3759   }
3760   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3761 
3762   /* assemble parallel IS for sends */
3763   i = 1;
3764   if (color) i=0;
3765   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3766   /* get back IS */
3767   *is_sends = ranks_send_to;
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatISSubassemble"
3775 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3776 {
3777   Mat                    local_mat;
3778   IS                     is_sends_internal;
3779   PetscInt               rows,cols,new_local_rows;
3780   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3781   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3782   ISLocalToGlobalMapping l2gmap;
3783   PetscInt*              l2gmap_indices;
3784   const PetscInt*        is_indices;
3785   MatType                new_local_type;
3786   /* buffers */
3787   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3788   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3789   PetscInt               *recv_buffer_idxs_local;
3790   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3791   /* MPI */
3792   MPI_Comm               comm,comm_n;
3793   PetscSubcomm           subcomm;
3794   PetscMPIInt            n_sends,n_recvs,commsize;
3795   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3796   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3797   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3798   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3799   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3800   PetscErrorCode         ierr;
3801 
3802   PetscFunctionBegin;
3803   /* TODO: add missing checks */
3804   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3805   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3806   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3807   PetscValidLogicalCollectiveInt(mat,nis,7);
3808   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3809   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3810   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3811   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3812   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3813   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3814   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3815   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3816     PetscInt mrows,mcols,mnrows,mncols;
3817     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3818     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3819     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3820     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3821     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3822     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3823   }
3824   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3825   PetscValidLogicalCollectiveInt(mat,bs,0);
3826   /* prepare IS for sending if not provided */
3827   if (!is_sends) {
3828     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3829     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3830   } else {
3831     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3832     is_sends_internal = is_sends;
3833   }
3834 
3835   /* get comm */
3836   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3837 
3838   /* compute number of sends */
3839   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3840   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3841 
3842   /* compute number of receives */
3843   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3844   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3845   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3846   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3847   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3848   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3849   ierr = PetscFree(iflags);CHKERRQ(ierr);
3850 
3851   /* restrict comm if requested */
3852   subcomm = 0;
3853   destroy_mat = PETSC_FALSE;
3854   if (restrict_comm) {
3855     PetscMPIInt color,subcommsize;
3856 
3857     color = 0;
3858     if (restrict_full) {
3859       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3860     } else {
3861       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3862     }
3863     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3864     subcommsize = commsize - subcommsize;
3865     /* check if reuse has been requested */
3866     if (reuse == MAT_REUSE_MATRIX) {
3867       if (*mat_n) {
3868         PetscMPIInt subcommsize2;
3869         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3870         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3871         comm_n = PetscObjectComm((PetscObject)*mat_n);
3872       } else {
3873         comm_n = PETSC_COMM_SELF;
3874       }
3875     } else { /* MAT_INITIAL_MATRIX */
3876       PetscMPIInt rank;
3877 
3878       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3879       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3880       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3881       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3882       comm_n = PetscSubcommChild(subcomm);
3883     }
3884     /* flag to destroy *mat_n if not significative */
3885     if (color) destroy_mat = PETSC_TRUE;
3886   } else {
3887     comm_n = comm;
3888   }
3889 
3890   /* prepare send/receive buffers */
3891   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3892   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3893   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3894   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3895   if (nis) {
3896     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3897   }
3898 
3899   /* Get data from local matrices */
3900   if (!isdense) {
3901     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3902     /* TODO: See below some guidelines on how to prepare the local buffers */
3903     /*
3904        send_buffer_vals should contain the raw values of the local matrix
3905        send_buffer_idxs should contain:
3906        - MatType_PRIVATE type
3907        - PetscInt        size_of_l2gmap
3908        - PetscInt        global_row_indices[size_of_l2gmap]
3909        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3910     */
3911   } else {
3912     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3913     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3914     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3915     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3916     send_buffer_idxs[1] = i;
3917     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3918     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3919     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3920     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3921     for (i=0;i<n_sends;i++) {
3922       ilengths_vals[is_indices[i]] = len*len;
3923       ilengths_idxs[is_indices[i]] = len+2;
3924     }
3925   }
3926   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3927   /* additional is (if any) */
3928   if (nis) {
3929     PetscMPIInt psum;
3930     PetscInt j;
3931     for (j=0,psum=0;j<nis;j++) {
3932       PetscInt plen;
3933       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3934       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3935       psum += len+1; /* indices + lenght */
3936     }
3937     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3938     for (j=0,psum=0;j<nis;j++) {
3939       PetscInt plen;
3940       const PetscInt *is_array_idxs;
3941       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3942       send_buffer_idxs_is[psum] = plen;
3943       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3944       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3945       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3946       psum += plen+1; /* indices + lenght */
3947     }
3948     for (i=0;i<n_sends;i++) {
3949       ilengths_idxs_is[is_indices[i]] = psum;
3950     }
3951     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3952   }
3953 
3954   buf_size_idxs = 0;
3955   buf_size_vals = 0;
3956   buf_size_idxs_is = 0;
3957   for (i=0;i<n_recvs;i++) {
3958     buf_size_idxs += (PetscInt)olengths_idxs[i];
3959     buf_size_vals += (PetscInt)olengths_vals[i];
3960     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3961   }
3962   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3963   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3964   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3965 
3966   /* get new tags for clean communications */
3967   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3968   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3969   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3970 
3971   /* allocate for requests */
3972   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3973   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3974   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3975   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3976   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3977   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3978 
3979   /* communications */
3980   ptr_idxs = recv_buffer_idxs;
3981   ptr_vals = recv_buffer_vals;
3982   ptr_idxs_is = recv_buffer_idxs_is;
3983   for (i=0;i<n_recvs;i++) {
3984     source_dest = onodes[i];
3985     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3986     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3987     ptr_idxs += olengths_idxs[i];
3988     ptr_vals += olengths_vals[i];
3989     if (nis) {
3990       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3991       ptr_idxs_is += olengths_idxs_is[i];
3992     }
3993   }
3994   for (i=0;i<n_sends;i++) {
3995     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3996     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3997     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3998     if (nis) {
3999       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4000     }
4001   }
4002   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4003   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4004 
4005   /* assemble new l2g map */
4006   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4007   ptr_idxs = recv_buffer_idxs;
4008   new_local_rows = 0;
4009   for (i=0;i<n_recvs;i++) {
4010     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4011     ptr_idxs += olengths_idxs[i];
4012   }
4013   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4014   ptr_idxs = recv_buffer_idxs;
4015   new_local_rows = 0;
4016   for (i=0;i<n_recvs;i++) {
4017     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4018     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4019     ptr_idxs += olengths_idxs[i];
4020   }
4021   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4022   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4023   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4024 
4025   /* infer new local matrix type from received local matrices type */
4026   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4027   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4028   if (n_recvs) {
4029     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4030     ptr_idxs = recv_buffer_idxs;
4031     for (i=0;i<n_recvs;i++) {
4032       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4033         new_local_type_private = MATAIJ_PRIVATE;
4034         break;
4035       }
4036       ptr_idxs += olengths_idxs[i];
4037     }
4038     switch (new_local_type_private) {
4039       case MATDENSE_PRIVATE:
4040         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4041           new_local_type = MATSEQAIJ;
4042           bs = 1;
4043         } else { /* if I receive only 1 dense matrix */
4044           new_local_type = MATSEQDENSE;
4045           bs = 1;
4046         }
4047         break;
4048       case MATAIJ_PRIVATE:
4049         new_local_type = MATSEQAIJ;
4050         bs = 1;
4051         break;
4052       case MATBAIJ_PRIVATE:
4053         new_local_type = MATSEQBAIJ;
4054         break;
4055       case MATSBAIJ_PRIVATE:
4056         new_local_type = MATSEQSBAIJ;
4057         break;
4058       default:
4059         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4060         break;
4061     }
4062   } else { /* by default, new_local_type is seqdense */
4063     new_local_type = MATSEQDENSE;
4064     bs = 1;
4065   }
4066 
4067   /* create MATIS object if needed */
4068   if (reuse == MAT_INITIAL_MATRIX) {
4069     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4070     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4071   } else {
4072     /* it also destroys the local matrices */
4073     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4074   }
4075   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4076   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4077 
4078   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4079 
4080   /* Global to local map of received indices */
4081   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4082   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4083   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4084 
4085   /* restore attributes -> type of incoming data and its size */
4086   buf_size_idxs = 0;
4087   for (i=0;i<n_recvs;i++) {
4088     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4089     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4090     buf_size_idxs += (PetscInt)olengths_idxs[i];
4091   }
4092   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4093 
4094   /* set preallocation */
4095   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4096   if (!newisdense) {
4097     PetscInt *new_local_nnz=0;
4098 
4099     ptr_vals = recv_buffer_vals;
4100     ptr_idxs = recv_buffer_idxs_local;
4101     if (n_recvs) {
4102       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4103     }
4104     for (i=0;i<n_recvs;i++) {
4105       PetscInt j;
4106       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4107         for (j=0;j<*(ptr_idxs+1);j++) {
4108           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4109         }
4110       } else {
4111         /* TODO */
4112       }
4113       ptr_idxs += olengths_idxs[i];
4114     }
4115     if (new_local_nnz) {
4116       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4117       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4118       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4119       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4120       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4121       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4122     } else {
4123       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4124     }
4125     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4126   } else {
4127     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4128   }
4129 
4130   /* set values */
4131   ptr_vals = recv_buffer_vals;
4132   ptr_idxs = recv_buffer_idxs_local;
4133   for (i=0;i<n_recvs;i++) {
4134     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4135       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4136       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4137       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4138       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4139       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4140     } else {
4141       /* TODO */
4142     }
4143     ptr_idxs += olengths_idxs[i];
4144     ptr_vals += olengths_vals[i];
4145   }
4146   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4147   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4148   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4151   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4152 
4153 #if 0
4154   if (!restrict_comm) { /* check */
4155     Vec       lvec,rvec;
4156     PetscReal infty_error;
4157 
4158     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4159     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4160     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4161     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4162     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4163     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4164     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4165     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4166     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4167   }
4168 #endif
4169 
4170   /* assemble new additional is (if any) */
4171   if (nis) {
4172     PetscInt **temp_idxs,*count_is,j,psum;
4173 
4174     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4175     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4176     ptr_idxs = recv_buffer_idxs_is;
4177     psum = 0;
4178     for (i=0;i<n_recvs;i++) {
4179       for (j=0;j<nis;j++) {
4180         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4181         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4182         psum += plen;
4183         ptr_idxs += plen+1; /* shift pointer to received data */
4184       }
4185     }
4186     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4187     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4188     for (i=1;i<nis;i++) {
4189       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4190     }
4191     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4192     ptr_idxs = recv_buffer_idxs_is;
4193     for (i=0;i<n_recvs;i++) {
4194       for (j=0;j<nis;j++) {
4195         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4196         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4197         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4198         ptr_idxs += plen+1; /* shift pointer to received data */
4199       }
4200     }
4201     for (i=0;i<nis;i++) {
4202       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4203       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4204       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4205     }
4206     ierr = PetscFree(count_is);CHKERRQ(ierr);
4207     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4208     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4209   }
4210   /* free workspace */
4211   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4212   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4213   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4214   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4215   if (isdense) {
4216     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4217     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4218   } else {
4219     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4220   }
4221   if (nis) {
4222     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4223     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4224   }
4225   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4226   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4227   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4228   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4229   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4230   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4231   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4232   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4233   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4234   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4235   ierr = PetscFree(onodes);CHKERRQ(ierr);
4236   if (nis) {
4237     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4238     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4239     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4240   }
4241   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4242   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4243     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4244     for (i=0;i<nis;i++) {
4245       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4246     }
4247     *mat_n = NULL;
4248   }
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 /* temporary hack into ksp private data structure */
4253 #include <petsc/private/kspimpl.h>
4254 
4255 #undef __FUNCT__
4256 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4257 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4258 {
4259   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4260   PC_IS                  *pcis = (PC_IS*)pc->data;
4261   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4262   MatNullSpace           CoarseNullSpace=NULL;
4263   ISLocalToGlobalMapping coarse_islg;
4264   IS                     coarse_is,*isarray;
4265   PetscInt               i,im_active=-1,active_procs=-1;
4266   PetscInt               nis,nisdofs,nisneu;
4267   PC                     pc_temp;
4268   PCType                 coarse_pc_type;
4269   KSPType                coarse_ksp_type;
4270   PetscBool              multilevel_requested,multilevel_allowed;
4271   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4272   Mat                    t_coarse_mat_is;
4273   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4274   PetscMPIInt            all_procs;
4275   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4276   PetscBool              compute_vecs = PETSC_FALSE;
4277   PetscScalar            *array;
4278   PetscErrorCode         ierr;
4279 
4280   PetscFunctionBegin;
4281   /* Assign global numbering to coarse dofs */
4282   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4283     PetscInt ocoarse_size;
4284     compute_vecs = PETSC_TRUE;
4285     ocoarse_size = pcbddc->coarse_size;
4286     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4287     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4288     /* see if we can avoid some work */
4289     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4290       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4291       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4292         PC        pc;
4293         PetscBool isbddc;
4294 
4295         /* temporary workaround since PCBDDC does not have a reset method so far */
4296         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4297         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4298         if (isbddc) {
4299           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4300         }
4301         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4302         coarse_reuse = PETSC_FALSE;
4303       } else { /* we can safely reuse already computed coarse matrix */
4304         coarse_reuse = PETSC_TRUE;
4305       }
4306     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4307       coarse_reuse = PETSC_FALSE;
4308     }
4309     /* reset any subassembling information */
4310     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4311     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4312   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4313     coarse_reuse = PETSC_TRUE;
4314   }
4315 
4316   /* count "active" (i.e. with positive local size) and "void" processes */
4317   im_active = !!(pcis->n);
4318   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4319   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4320   void_procs = all_procs-active_procs;
4321   csin_type_simple = PETSC_TRUE;
4322   redist = PETSC_FALSE;
4323   if (pcbddc->current_level && void_procs) {
4324     csin_ml = PETSC_TRUE;
4325     ncoarse_ml = void_procs;
4326     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4327     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4328       csin_ds = PETSC_TRUE;
4329       ncoarse_ds = pcbddc->redistribute_coarse;
4330       redist = PETSC_TRUE;
4331     } else {
4332       csin_ds = PETSC_TRUE;
4333       ncoarse_ds = active_procs;
4334       redist = PETSC_TRUE;
4335     }
4336   } else {
4337     csin_ml = PETSC_FALSE;
4338     ncoarse_ml = all_procs;
4339     if (void_procs) {
4340       csin_ds = PETSC_TRUE;
4341       ncoarse_ds = void_procs;
4342       csin_type_simple = PETSC_FALSE;
4343     } else {
4344       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4345         csin_ds = PETSC_TRUE;
4346         ncoarse_ds = pcbddc->redistribute_coarse;
4347         redist = PETSC_TRUE;
4348       } else {
4349         csin_ds = PETSC_FALSE;
4350         ncoarse_ds = all_procs;
4351       }
4352     }
4353   }
4354 
4355   /*
4356     test if we can go multilevel: three conditions must be satisfied:
4357     - we have not exceeded the number of levels requested
4358     - we can actually subassemble the active processes
4359     - we can find a suitable number of MPI processes where we can place the subassembled problem
4360   */
4361   multilevel_allowed = PETSC_FALSE;
4362   multilevel_requested = PETSC_FALSE;
4363   if (pcbddc->current_level < pcbddc->max_levels) {
4364     multilevel_requested = PETSC_TRUE;
4365     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4366       multilevel_allowed = PETSC_FALSE;
4367     } else {
4368       multilevel_allowed = PETSC_TRUE;
4369     }
4370   }
4371   /* determine number of process partecipating to coarse solver */
4372   if (multilevel_allowed) {
4373     ncoarse = ncoarse_ml;
4374     csin = csin_ml;
4375     redist = PETSC_FALSE;
4376   } else {
4377     ncoarse = ncoarse_ds;
4378     csin = csin_ds;
4379   }
4380 
4381   /* creates temporary l2gmap and IS for coarse indexes */
4382   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4383   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4384 
4385   /* creates temporary MATIS object for coarse matrix */
4386   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4387   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4388   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4389   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4390 #if 0
4391   {
4392     PetscViewer viewer;
4393     char filename[256];
4394     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4395     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4396     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4397     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4398     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4399   }
4400 #endif
4401   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4402   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4403   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4404   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4405   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4406 
4407   /* compute dofs splitting and neumann boundaries for coarse dofs */
4408   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4409     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4410     const PetscInt         *idxs;
4411     ISLocalToGlobalMapping tmap;
4412 
4413     /* create map between primal indices (in local representative ordering) and local primal numbering */
4414     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4415     /* allocate space for temporary storage */
4416     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4417     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4418     /* allocate for IS array */
4419     nisdofs = pcbddc->n_ISForDofsLocal;
4420     nisneu = !!pcbddc->NeumannBoundariesLocal;
4421     nis = nisdofs + nisneu;
4422     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4423     /* dofs splitting */
4424     for (i=0;i<nisdofs;i++) {
4425       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4426       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4427       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4428       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4429       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4430       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4431       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4432       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4433     }
4434     /* neumann boundaries */
4435     if (pcbddc->NeumannBoundariesLocal) {
4436       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4437       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4438       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4439       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4440       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4441       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4442       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4443       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4444     }
4445     /* free memory */
4446     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4447     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4448     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4449   } else {
4450     nis = 0;
4451     nisdofs = 0;
4452     nisneu = 0;
4453     isarray = NULL;
4454   }
4455   /* destroy no longer needed map */
4456   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4457 
4458   /* restrict on coarse candidates (if needed) */
4459   coarse_mat_is = NULL;
4460   if (csin) {
4461     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4462       if (redist) {
4463         PetscMPIInt rank;
4464         PetscInt    spc,n_spc_p1,dest[1],destsize;
4465 
4466         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4467         spc = active_procs/ncoarse;
4468         n_spc_p1 = active_procs%ncoarse;
4469         if (im_active) {
4470           destsize = 1;
4471           if (rank > n_spc_p1*(spc+1)-1) {
4472             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4473           } else {
4474             dest[0] = rank/(spc+1);
4475           }
4476         } else {
4477           destsize = 0;
4478         }
4479         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4480       } else if (csin_type_simple) {
4481         PetscMPIInt rank;
4482         PetscInt    issize,isidx;
4483 
4484         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4485         if (im_active) {
4486           issize = 1;
4487           isidx = (PetscInt)rank;
4488         } else {
4489           issize = 0;
4490           isidx = -1;
4491         }
4492         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4493       } else { /* get a suitable subassembling pattern from MATIS code */
4494         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4495       }
4496 
4497       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4498       if (!redist || ncoarse <= void_procs) {
4499         PetscInt ncoarse_cand,tissize,*nisindices;
4500         PetscInt *coarse_candidates;
4501         const PetscInt* tisindices;
4502 
4503         /* get coarse candidates' ranks in pc communicator */
4504         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4505         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4506         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4507           if (!coarse_candidates[i]) {
4508             coarse_candidates[ncoarse_cand++]=i;
4509           }
4510         }
4511         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4512 
4513 
4514         if (pcbddc->dbg_flag) {
4515           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4516           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4517           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4518           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4519           for (i=0;i<ncoarse_cand;i++) {
4520             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4521           }
4522           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4523           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4524         }
4525         /* shift the pattern on coarse candidates */
4526         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4527         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4528         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4529         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4530         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4531         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4532         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4533       }
4534       if (pcbddc->dbg_flag) {
4535         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4536         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4537         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4538         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4539       }
4540     }
4541     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4542     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4543       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4544     } else { /* this is the last level, so use just receiving processes in subcomm */
4545       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4546     }
4547   } else {
4548     if (pcbddc->dbg_flag) {
4549       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4550       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4551       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4552     }
4553     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4554     coarse_mat_is = t_coarse_mat_is;
4555   }
4556 
4557   /* create local to global scatters for coarse problem */
4558   if (compute_vecs) {
4559     PetscInt lrows;
4560     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4561     if (coarse_mat_is) {
4562       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4563     } else {
4564       lrows = 0;
4565     }
4566     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4567     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4568     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4569     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4570     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4571   }
4572   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4573   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4574 
4575   /* set defaults for coarse KSP and PC */
4576   if (multilevel_allowed) {
4577     coarse_ksp_type = KSPRICHARDSON;
4578     coarse_pc_type = PCBDDC;
4579   } else {
4580     coarse_ksp_type = KSPPREONLY;
4581     coarse_pc_type = PCREDUNDANT;
4582   }
4583 
4584   /* print some info if requested */
4585   if (pcbddc->dbg_flag) {
4586     if (!multilevel_allowed) {
4587       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4588       if (multilevel_requested) {
4589         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4590       } else if (pcbddc->max_levels) {
4591         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4592       }
4593       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4594     }
4595   }
4596 
4597   /* create the coarse KSP object only once with defaults */
4598   if (coarse_mat_is) {
4599     MatReuse coarse_mat_reuse;
4600     PetscViewer dbg_viewer = NULL;
4601     if (pcbddc->dbg_flag) {
4602       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4603       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4604     }
4605     if (!pcbddc->coarse_ksp) {
4606       char prefix[256],str_level[16];
4607       size_t len;
4608       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4609       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4610       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4611       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4612       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4613       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4614       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4615       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4616       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4617       /* prefix */
4618       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4619       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4620       if (!pcbddc->current_level) {
4621         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4622         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4623       } else {
4624         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4625         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4626         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4627         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4628         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4629         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4630       }
4631       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4632       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4633       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4634       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4635       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4636       /* allow user customization */
4637       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4638     }
4639     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4640     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4641     if (nisdofs) {
4642       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4643       for (i=0;i<nisdofs;i++) {
4644         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4645       }
4646     }
4647     if (nisneu) {
4648       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4649       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4650     }
4651 
4652     /* get some info after set from options */
4653     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4654     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4655     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4656     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4657       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4658       isbddc = PETSC_FALSE;
4659     }
4660     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4661     if (isredundant) {
4662       KSP inner_ksp;
4663       PC  inner_pc;
4664       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4665       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4666       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4667     }
4668 
4669     /* assemble coarse matrix */
4670     if (coarse_reuse) {
4671       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4672       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4673       coarse_mat_reuse = MAT_REUSE_MATRIX;
4674     } else {
4675       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4676     }
4677     if (isbddc || isnn) {
4678       if (pcbddc->coarsening_ratio > 1) {
4679         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4680           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4681           if (pcbddc->dbg_flag) {
4682             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4683             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4684             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4685             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4686           }
4687         }
4688         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4689       } else {
4690         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4691         coarse_mat = coarse_mat_is;
4692       }
4693     } else {
4694       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4695     }
4696     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4697 
4698     /* propagate symmetry info of coarse matrix */
4699     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4700     if (pc->pmat->symmetric_set) {
4701       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4702     }
4703     if (pc->pmat->hermitian_set) {
4704       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4705     }
4706     if (pc->pmat->spd_set) {
4707       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4708     }
4709     /* set operators */
4710     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4711     if (pcbddc->dbg_flag) {
4712       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4713     }
4714   } else { /* processes non partecipating to coarse solver (if any) */
4715     coarse_mat = 0;
4716   }
4717   ierr = PetscFree(isarray);CHKERRQ(ierr);
4718 #if 0
4719   {
4720     PetscViewer viewer;
4721     char filename[256];
4722     sprintf(filename,"coarse_mat.m");
4723     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4724     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4725     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4726     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4727   }
4728 #endif
4729 
4730   /* Compute coarse null space (special handling by BDDC only) */
4731 #if 0
4732   if (pcbddc->NullSpace) {
4733     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4734   }
4735 #endif
4736 
4737   if (pcbddc->coarse_ksp) {
4738     Vec crhs,csol;
4739     PetscBool ispreonly;
4740 
4741     if (CoarseNullSpace) {
4742       if (isbddc) {
4743         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4744       } else {
4745         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4746       }
4747     }
4748     /* setup coarse ksp */
4749     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4750     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4751     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4752     /* hack */
4753     if (!csol) {
4754       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4755     }
4756     if (!crhs) {
4757       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4758     }
4759     /* Check coarse problem if in debug mode or if solving with an iterative method */
4760     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4761     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4762       KSP       check_ksp;
4763       KSPType   check_ksp_type;
4764       PC        check_pc;
4765       Vec       check_vec,coarse_vec;
4766       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4767       PetscInt  its;
4768       PetscBool compute_eigs;
4769       PetscReal *eigs_r,*eigs_c;
4770       PetscInt  neigs;
4771       const char *prefix;
4772 
4773       /* Create ksp object suitable for estimation of extreme eigenvalues */
4774       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4775       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4776       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4777       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4778       if (ispreonly) {
4779         check_ksp_type = KSPPREONLY;
4780         compute_eigs = PETSC_FALSE;
4781       } else {
4782         check_ksp_type = KSPGMRES;
4783         compute_eigs = PETSC_TRUE;
4784       }
4785       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4786       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4787       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4788       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4789       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4790       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4791       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4792       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4793       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4794       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4795       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4796       /* create random vec */
4797       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4798       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4799       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4800       if (CoarseNullSpace) {
4801         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4802       }
4803       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4804       /* solve coarse problem */
4805       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4806       if (CoarseNullSpace) {
4807         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4808       }
4809       /* set eigenvalue estimation if preonly has not been requested */
4810       if (compute_eigs) {
4811         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4812         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4813         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4814         lambda_max = eigs_r[neigs-1];
4815         lambda_min = eigs_r[0];
4816         if (pcbddc->use_coarse_estimates) {
4817           if (lambda_max>lambda_min) {
4818             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4819             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4820           }
4821         }
4822       }
4823 
4824       /* check coarse problem residual error */
4825       if (pcbddc->dbg_flag) {
4826         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4827         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4828         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4829         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4830         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4831         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4832         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4833         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4834         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4835         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4836         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4837         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4838         if (compute_eigs) {
4839           PetscReal lambda_max_s,lambda_min_s;
4840           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4841           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4842           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4843           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4844           for (i=0;i<neigs;i++) {
4845             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4846           }
4847         }
4848         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4849         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4850       }
4851       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4852       if (compute_eigs) {
4853         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4854         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4855       }
4856     }
4857   }
4858   /* print additional info */
4859   if (pcbddc->dbg_flag) {
4860     /* waits until all processes reaches this point */
4861     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4862     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4863     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4864   }
4865 
4866   /* free memory */
4867   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4868   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 #undef __FUNCT__
4873 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4874 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4875 {
4876   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4877   PC_IS*         pcis = (PC_IS*)pc->data;
4878   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4879   IS             subset,subset_mult,subset_n;
4880   PetscInt       local_size,coarse_size=0;
4881   PetscInt       *local_primal_indices=NULL;
4882   const PetscInt *t_local_primal_indices;
4883   PetscErrorCode ierr;
4884 
4885   PetscFunctionBegin;
4886   /* Compute global number of coarse dofs */
4887   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4888     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4889   }
4890   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4891   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4892   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4893   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4894   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4895   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4896   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4897   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4898   if (local_size != pcbddc->local_primal_size) {
4899     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4900   }
4901   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4902   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4903   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4904   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4905   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4906 
4907   /* check numbering */
4908   if (pcbddc->dbg_flag) {
4909     PetscScalar coarsesum,*array,*array2;
4910     PetscInt    i;
4911     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4912 
4913     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4914     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4915     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4916     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4917     /* counter */
4918     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4919     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
4920     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4921     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4922     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4923     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4924 
4925     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4926     for (i=0;i<pcbddc->local_primal_size;i++) {
4927       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4928     }
4929     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4930     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4931     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4932     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4933     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4934     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4935     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4936     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4937     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4938     for (i=0;i<pcis->n;i++) {
4939       if (array[i] != 0.0 && array[i] != array2[i]) {
4940         PetscInt owned = (PetscInt)(array[i]);
4941         PetscInt neigh = (PetscInt)(array2[i]);
4942         set_error = PETSC_TRUE;
4943         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
4944       }
4945     }
4946     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4947     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4948     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4949     for (i=0;i<pcis->n;i++) {
4950       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4951     }
4952     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4953     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4954     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4955     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4956     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4957     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4958     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4959       PetscInt *gidxs;
4960 
4961       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4962       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4963       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4964       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4965       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4966       for (i=0;i<pcbddc->local_primal_size;i++) {
4967         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4968       }
4969       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4970       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4971     }
4972     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4973     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4974   }
4975   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4976   /* get back data */
4977   *coarse_size_n = coarse_size;
4978   *local_primal_indices_n = local_primal_indices;
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 #undef __FUNCT__
4983 #define __FUNCT__ "PCBDDCGlobalToLocal"
4984 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4985 {
4986   IS             localis_t;
4987   PetscInt       i,lsize,*idxs,n;
4988   PetscScalar    *vals;
4989   PetscErrorCode ierr;
4990 
4991   PetscFunctionBegin;
4992   /* get indices in local ordering exploiting local to global map */
4993   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4994   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4995   for (i=0;i<lsize;i++) vals[i] = 1.0;
4996   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4997   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4998   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4999   if (idxs) { /* multilevel guard */
5000     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5001   }
5002   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5003   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5004   ierr = PetscFree(vals);CHKERRQ(ierr);
5005   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5006   /* now compute set in local ordering */
5007   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5008   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5009   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5010   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5011   for (i=0,lsize=0;i<n;i++) {
5012     if (PetscRealPart(vals[i]) > 0.5) {
5013       lsize++;
5014     }
5015   }
5016   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5017   for (i=0,lsize=0;i<n;i++) {
5018     if (PetscRealPart(vals[i]) > 0.5) {
5019       idxs[lsize++] = i;
5020     }
5021   }
5022   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5023   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5024   *localis = localis_t;
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5029 #undef __FUNCT__
5030 #define __FUNCT__ "PCBDDCMatMult_Private"
5031 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5032 {
5033   PCBDDCChange_ctx change_ctx;
5034   PetscErrorCode   ierr;
5035 
5036   PetscFunctionBegin;
5037   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5038   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5039   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5040   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5041   PetscFunctionReturn(0);
5042 }
5043 
5044 #undef __FUNCT__
5045 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5046 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5047 {
5048   PCBDDCChange_ctx change_ctx;
5049   PetscErrorCode   ierr;
5050 
5051   PetscFunctionBegin;
5052   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5053   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5054   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5055   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5056   PetscFunctionReturn(0);
5057 }
5058 
5059 #undef __FUNCT__
5060 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5061 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5062 {
5063   PC_IS               *pcis=(PC_IS*)pc->data;
5064   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5065   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5066   Mat                 S_j;
5067   PetscInt            *used_xadj,*used_adjncy;
5068   PetscBool           free_used_adj;
5069   PetscErrorCode      ierr;
5070 
5071   PetscFunctionBegin;
5072   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5073   free_used_adj = PETSC_FALSE;
5074   if (pcbddc->sub_schurs_layers == -1) {
5075     used_xadj = NULL;
5076     used_adjncy = NULL;
5077   } else {
5078     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5079       used_xadj = pcbddc->mat_graph->xadj;
5080       used_adjncy = pcbddc->mat_graph->adjncy;
5081     } else if (pcbddc->computed_rowadj) {
5082       used_xadj = pcbddc->mat_graph->xadj;
5083       used_adjncy = pcbddc->mat_graph->adjncy;
5084     } else {
5085       PetscBool      flg_row=PETSC_FALSE;
5086       const PetscInt *xadj,*adjncy;
5087       PetscInt       nvtxs;
5088 
5089       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5090       if (flg_row) {
5091         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5092         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5093         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5094         free_used_adj = PETSC_TRUE;
5095       } else {
5096         pcbddc->sub_schurs_layers = -1;
5097         used_xadj = NULL;
5098         used_adjncy = NULL;
5099       }
5100       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5101     }
5102   }
5103 
5104   /* setup sub_schurs data */
5105   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5106   if (!sub_schurs->use_mumps) {
5107     /* pcbddc->ksp_D up to date only if not using MUMPS */
5108     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5109     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5110   } else {
5111     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5112     PetscBool isseqaij;
5113     if (!pcbddc->use_vertices && reuse_solvers) {
5114       PetscInt n_vertices;
5115 
5116       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5117       reuse_solvers = (PetscBool)!n_vertices;
5118     }
5119     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5120     if (!isseqaij) {
5121       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5122       if (matis->A == pcbddc->local_mat) {
5123         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5124         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5125       } else {
5126         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5127       }
5128     }
5129     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5130   }
5131   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5132 
5133   /* free adjacency */
5134   if (free_used_adj) {
5135     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5136   }
5137   PetscFunctionReturn(0);
5138 }
5139 
5140 #undef __FUNCT__
5141 #define __FUNCT__ "PCBDDCInitSubSchurs"
5142 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5143 {
5144   PC_IS               *pcis=(PC_IS*)pc->data;
5145   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5146   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5147   PCBDDCGraph         graph;
5148   PetscErrorCode      ierr;
5149 
5150   PetscFunctionBegin;
5151   /* attach interface graph for determining subsets */
5152   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5153     IS       verticesIS,verticescomm;
5154     PetscInt vsize,*idxs;
5155 
5156     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5157     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5158     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5159     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5160     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5161     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5162     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5163     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5164     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5165     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5166     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5167 /*
5168     if (pcbddc->dbg_flag) {
5169       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5170     }
5171 */
5172   } else {
5173     graph = pcbddc->mat_graph;
5174   }
5175 
5176   /* sub_schurs init */
5177   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5178 
5179   /* free graph struct */
5180   if (pcbddc->sub_schurs_rebuild) {
5181     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5182   }
5183   PetscFunctionReturn(0);
5184 }
5185