xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 7b026bddef162fc88d957ead1c55e55eee935bb7)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
10 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   const PetscInt *idxs;
14   PetscInt       nz = 0;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   if (!pcbddc->zerodiag) {
19     PetscFunctionReturn(0);
20   } else {
21     ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr);
22     if (!nz) PetscFunctionReturn(0);
23   }
24   ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
25   /* TODO: add error checking
26     - avoid nested pop (or push) calls.
27     - cannot push before pop.
28   */
29   if (pop) {
30     const PetscInt    *cB0_cols;
31     PetscInt          cB0_ncol;
32     const PetscScalar *cB0_vals;
33 
34     /* extract B_0 */
35     ierr = MatGetRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
36     pcbddc->B0_ncol = cB0_ncol;
37     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
38     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
39     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
40     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
41     ierr = MatRestoreRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
42     /* remove rows and cols from local problem */
43     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
44     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,idxs+nz-1,1.,NULL,NULL);CHKERRQ(ierr);
45   } else { /* push */
46     ierr = MatSetValues(pcbddc->local_mat,1,&idxs[nz-1],pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
47     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&idxs[nz-1],pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
48     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
49     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
50   }
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "PCBDDCAdaptiveSelection"
56 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
57 {
58   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
59   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
60   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
61   PetscBLASInt    *B_iwork,*B_ifail;
62   PetscScalar     *work,lwork;
63   PetscScalar     *St,*S,*eigv;
64   PetscScalar     *Sarray,*Starray;
65   PetscReal       *eigs,thresh;
66   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
67   PetscBool       allocated_S_St;
68 #if defined(PETSC_USE_COMPLEX)
69   PetscReal       *rwork;
70 #endif
71   PetscErrorCode  ierr;
72 
73   PetscFunctionBegin;
74   if (!sub_schurs->use_mumps) {
75     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
76   }
77 
78   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
79     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
80   }
81 
82   if (pcbddc->dbg_flag) {
83     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
84     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
85     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
86     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
87   }
88 
89   if (pcbddc->dbg_flag) {
90     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
91   }
92 
93   /* max size of subsets */
94   mss = 0;
95   for (i=0;i<sub_schurs->n_subs;i++) {
96     PetscInt subset_size;
97 
98     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
99     mss = PetscMax(mss,subset_size);
100   }
101 
102   /* min/max and threshold */
103   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
104   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
105   nmax = PetscMax(nmin,nmax);
106   allocated_S_St = PETSC_FALSE;
107   if (nmin) {
108     allocated_S_St = PETSC_TRUE;
109   }
110 
111   /* allocate lapack workspace */
112   cum = cum2 = 0;
113   maxneigs = 0;
114   for (i=0;i<sub_schurs->n_subs;i++) {
115     PetscInt n,subset_size;
116 
117     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
118     n = PetscMin(subset_size,nmax);
119     cum += subset_size;
120     cum2 += subset_size*n;
121     maxneigs = PetscMax(maxneigs,n);
122   }
123   if (mss) {
124     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
125       PetscBLASInt B_itype = 1;
126       PetscBLASInt B_N = mss;
127       PetscReal    zero = 0.0;
128       PetscReal    eps = 0.0; /* dlamch? */
129 
130       B_lwork = -1;
131       S = NULL;
132       St = NULL;
133       eigs = NULL;
134       eigv = NULL;
135       B_iwork = NULL;
136       B_ifail = NULL;
137 #if defined(PETSC_USE_COMPLEX)
138       rwork = NULL;
139 #endif
140       thresh = 1.0;
141       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
142 #if defined(PETSC_USE_COMPLEX)
143       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
144 #else
145       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
146 #endif
147       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
148       ierr = PetscFPTrapPop();CHKERRQ(ierr);
149     } else {
150         /* TODO */
151     }
152   } else {
153     lwork = 0;
154   }
155 
156   nv = 0;
157   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
158     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
159   }
160   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
161   if (allocated_S_St) {
162     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
163   }
164   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
165 #if defined(PETSC_USE_COMPLEX)
166   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
167 #endif
168   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
169                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
170                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
171                       nv+cum,&pcbddc->adaptive_constraints_idxs,
172                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
173   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
174 
175   maxneigs = 0;
176   cum = cum2 = cumarray = 0;
177   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
178   pcbddc->adaptive_constraints_data_ptr[0] = 0;
179   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
180     const PetscInt *idxs;
181 
182     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
183     for (cum=0;cum<nv;cum++) {
184       pcbddc->adaptive_constraints_n[cum] = 1;
185       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
186       pcbddc->adaptive_constraints_data[cum] = 1.0;
187       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
188       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
189     }
190     cum2 = cum;
191     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
192   }
193 
194   if (mss) { /* multilevel */
195     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
196     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
197   }
198 
199   for (i=0;i<sub_schurs->n_subs;i++) {
200 
201     const PetscInt *idxs;
202     PetscReal      infty = PETSC_MAX_REAL;
203     PetscInt       j,subset_size,eigs_start = 0;
204     PetscBLASInt   B_N;
205     PetscBool      same_data = PETSC_FALSE;
206 
207     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
208     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
209     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
210       if (sub_schurs->is_hermitian) {
211         PetscInt j,k;
212         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
213           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
214           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
215         }
216         for (j=0;j<subset_size;j++) {
217           for (k=j;k<subset_size;k++) {
218             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
219             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
220           }
221         }
222       } else {
223         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
224         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
225       }
226     } else {
227       S = Sarray + cumarray;
228       St = Starray + cumarray;
229     }
230 
231     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
232     /* see if we can save some work */
233     if (sub_schurs->n_subs == 1) {
234       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
235     }
236 
237     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
238       B_neigs = 0;
239     } else {
240       /* Threshold: this is an heuristic for edges */
241       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
242 
243       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
244         PetscBLASInt B_itype = 1;
245         PetscBLASInt B_IL, B_IU;
246         PetscReal    eps = -1.0; /* dlamch? */
247         PetscInt     nmin_s;
248 
249         /* ask for eigenvalues larger than thresh */
250         if (pcbddc->dbg_flag) {
251           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
252         }
253         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
254 #if defined(PETSC_USE_COMPLEX)
255         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
256 #else
257         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
258 #endif
259         ierr = PetscFPTrapPop();CHKERRQ(ierr);
260         if (B_ierr) {
261           if (B_ierr < 0 ) {
262             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
263           } else if (B_ierr <= B_N) {
264             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
265           } else {
266             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
267           }
268         }
269 
270         if (B_neigs > nmax) {
271           if (pcbddc->dbg_flag) {
272             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
273           }
274           eigs_start = B_neigs -nmax;
275           B_neigs = nmax;
276         }
277 
278         nmin_s = PetscMin(nmin,B_N);
279         if (B_neigs < nmin_s) {
280           PetscBLASInt B_neigs2;
281 
282           B_IU = B_N - B_neigs;
283           B_IL = B_N - nmin_s + 1;
284           if (pcbddc->dbg_flag) {
285             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
286           }
287           if (sub_schurs->is_hermitian) {
288             PetscInt j;
289             for (j=0;j<subset_size;j++) {
290               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
291             }
292             for (j=0;j<subset_size;j++) {
293               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
294             }
295           } else {
296             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
297             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
298           }
299           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
300 #if defined(PETSC_USE_COMPLEX)
301           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
302 #else
303           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
304 #endif
305           ierr = PetscFPTrapPop();CHKERRQ(ierr);
306           B_neigs += B_neigs2;
307         }
308         if (B_ierr) {
309           if (B_ierr < 0 ) {
310             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
311           } else if (B_ierr <= B_N) {
312             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
313           } else {
314             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
315           }
316         }
317         if (pcbddc->dbg_flag) {
318           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
319           for (j=0;j<B_neigs;j++) {
320             if (eigs[j] == 0.0) {
321               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
322             } else {
323               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
324             }
325           }
326         }
327       } else {
328           /* TODO */
329       }
330     }
331     maxneigs = PetscMax(B_neigs,maxneigs);
332     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
333     if (B_neigs) {
334       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
335 
336       if (pcbddc->dbg_flag > 1) {
337         PetscInt ii;
338         for (ii=0;ii<B_neigs;ii++) {
339           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
340           for (j=0;j<B_N;j++) {
341 #if defined(PETSC_USE_COMPLEX)
342             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
343             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
344             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
345 #else
346             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
347 #endif
348           }
349         }
350       }
351 #if 0
352       for (j=0;j<B_neigs;j++) {
353         PetscBLASInt Blas_N,Blas_one = 1.0;
354         PetscScalar norm;
355         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
356         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
357                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
358         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
359           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
360         } else {
361           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
362         }
363         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
364       }
365 #endif
366       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
367       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
368       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
369       cum++;
370     }
371     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
372     /* shift for next computation */
373     cumarray += subset_size*subset_size;
374   }
375   if (pcbddc->dbg_flag) {
376     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
377   }
378 
379   if (mss) {
380     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
381     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
382     /* destroy matrices (junk) */
383     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
384     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
385   }
386   if (allocated_S_St) {
387     ierr = PetscFree2(S,St);CHKERRQ(ierr);
388   }
389   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
390 #if defined(PETSC_USE_COMPLEX)
391   ierr = PetscFree(rwork);CHKERRQ(ierr);
392 #endif
393   if (pcbddc->dbg_flag) {
394     PetscInt maxneigs_r;
395     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNCT__
402 #define __FUNCT__ "PCBDDCSetUpSolvers"
403 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
404 {
405   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
406   PetscScalar    *coarse_submat_vals;
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   /* Setup local scatters R_to_B and (optionally) R_to_D */
411   /* PCBDDCSetUpLocalWorkVectors should be called first! */
412   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
413 
414   /* Setup local neumann solver ksp_R */
415   /* PCBDDCSetUpLocalScatters should be called first! */
416   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
417 
418   /* Change global null space passed in by the user if change of basis has been requested */
419   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
420     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
421   }
422 
423   /*
424      Setup local correction and local part of coarse basis.
425      Gives back the dense local part of the coarse matrix in column major ordering
426   */
427   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
428 
429   /* Compute total number of coarse nodes and setup coarse solver */
430   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
431 
432   /* free */
433   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "PCBDDCResetCustomization"
439 PetscErrorCode PCBDDCResetCustomization(PC pc)
440 {
441   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
442   PetscErrorCode ierr;
443 
444   PetscFunctionBegin;
445   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
447   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
450   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
451   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
452   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
453   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
454   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
455   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCBDDCResetTopography"
461 PetscErrorCode PCBDDCResetTopography(PC pc)
462 {
463   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
464   PetscErrorCode ierr;
465 
466   PetscFunctionBegin;
467   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
468   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
469   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
470   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
471   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "PCBDDCResetSolvers"
477 PetscErrorCode PCBDDCResetSolvers(PC pc)
478 {
479   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
480   PetscScalar    *array;
481   PetscErrorCode ierr;
482 
483   PetscFunctionBegin;
484   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
485   if (pcbddc->coarse_phi_B) {
486     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
487     ierr = PetscFree(array);CHKERRQ(ierr);
488   }
489   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
490   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
491   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
492   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
493   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
494   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
495   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
496   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
497   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
498   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
499   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
500   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
501   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
502   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
503   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
504   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
505   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
506   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
507   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
508   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
509   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
510   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
511   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
512   ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr);
513   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
514   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
520 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
521 {
522   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
523   PC_IS          *pcis = (PC_IS*)pc->data;
524   VecType        impVecType;
525   PetscInt       n_constraints,n_R,old_size;
526   PetscErrorCode ierr;
527 
528   PetscFunctionBegin;
529   if (!pcbddc->ConstraintMatrix) {
530     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
531   }
532   /* get sizes */
533   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
534   n_R = pcis->n-pcbddc->n_vertices;
535   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
536   /* local work vectors (try to avoid unneeded work)*/
537   /* R nodes */
538   old_size = -1;
539   if (pcbddc->vec1_R) {
540     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
541   }
542   if (n_R != old_size) {
543     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
544     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
545     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
546     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
547     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
548     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
549   }
550   /* local primal dofs */
551   old_size = -1;
552   if (pcbddc->vec1_P) {
553     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
554   }
555   if (pcbddc->local_primal_size != old_size) {
556     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
557     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
558     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
559     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
560   }
561   /* local explicit constraints */
562   old_size = -1;
563   if (pcbddc->vec1_C) {
564     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
565   }
566   if (n_constraints && n_constraints != old_size) {
567     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
568     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
569     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
570     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "PCBDDCSetUpCorrection"
577 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
578 {
579   PetscErrorCode  ierr;
580   /* pointers to pcis and pcbddc */
581   PC_IS*          pcis = (PC_IS*)pc->data;
582   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
583   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
584   /* submatrices of local problem */
585   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
586   /* submatrices of local coarse problem */
587   Mat             S_VV,S_CV,S_VC,S_CC;
588   /* working matrices */
589   Mat             C_CR;
590   /* additional working stuff */
591   PC              pc_R;
592   Mat             F;
593   PetscBool       isLU,isCHOL,isILU;
594 
595   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
596   PetscScalar     *work;
597   PetscInt        *idx_V_B;
598   PetscInt        n,n_vertices,n_constraints;
599   PetscInt        i,n_R,n_D,n_B;
600   PetscBool       unsymmetric_check;
601   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
602   MatType         impMatType;
603   /* some shortcuts to scalars */
604   PetscScalar     one=1.0,m_one=-1.0;
605 
606   PetscFunctionBegin;
607   n_vertices = pcbddc->n_vertices;
608   n_constraints = pcbddc->local_primal_size-n_vertices;
609   /* Set Non-overlapping dimensions */
610   n_B = pcis->n_B;
611   n_D = pcis->n - n_B;
612   n_R = pcis->n - n_vertices;
613 
614   /* Set types for local objects needed by BDDC precondtioner */
615   impMatType = MATSEQDENSE;
616 
617   /* vertices in boundary numbering */
618   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
619   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
620   if (i != n_vertices) {
621     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
622   }
623 
624   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
625   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
626   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
627   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
628   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
629   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
630   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
631   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
632   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
633   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
634 
635   unsymmetric_check = PETSC_FALSE;
636   /* allocate workspace */
637   n = 0;
638   if (n_constraints) {
639     n += n_R*n_constraints;
640   }
641   if (n_vertices) {
642     n = PetscMax(2*n_R*n_vertices,n);
643     n = PetscMax((n_R+n_B)*n_vertices,n);
644   }
645   if (!pcbddc->symmetric_primal) {
646     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
647     unsymmetric_check = PETSC_TRUE;
648   }
649   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
650 
651   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
652   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
653   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
654   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
655   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
656   if (isLU || isILU || isCHOL) {
657     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
658   } else if (sub_schurs->reuse_mumps) {
659     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
660     MatFactorType type;
661 
662     F = reuse_mumps->F;
663     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
664     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
665   } else {
666     F = NULL;
667   }
668 
669   /* Precompute stuffs needed for preprocessing and application of BDDC*/
670   if (n_constraints) {
671     Mat         M1,M2,M3;
672     Mat         auxmat;
673     IS          is_aux;
674     PetscScalar *array,*array2;
675 
676     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
677     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
678 
679     /* Extract constraints on R nodes: C_{CR}  */
680     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
681     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
682     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
683 
684     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
685     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
686     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
687     for (i=0;i<n_constraints;i++) {
688       const PetscScalar *row_cmat_values;
689       const PetscInt    *row_cmat_indices;
690       PetscInt          size_of_constraint,j;
691 
692       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
693       for (j=0;j<size_of_constraint;j++) {
694         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
695       }
696       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
697     }
698     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
699     if (F) {
700       Mat B;
701 
702       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
703       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
704       ierr = MatDestroy(&B);CHKERRQ(ierr);
705     } else {
706       PetscScalar *marr;
707 
708       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
709       for (i=0;i<n_constraints;i++) {
710         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
711         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
712         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
713         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
714         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
715       }
716       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
717     }
718     if (!pcbddc->switch_static) {
719       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
720       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
721       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
722       for (i=0;i<n_constraints;i++) {
723         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
724         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
725         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
728         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
729       }
730       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
731       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
732       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
733     } else {
734       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
735       pcbddc->local_auxmat2 = local_auxmat2_R;
736       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
737     }
738     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
739     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
740     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
741     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
742     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
743     if (isCHOL) {
744       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
745     } else {
746       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
747     }
748     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
749     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
750     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
751     ierr = MatDestroy(&M2);CHKERRQ(ierr);
752     ierr = MatDestroy(&M3);CHKERRQ(ierr);
753     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
754     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
755     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
756     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
757     ierr = MatDestroy(&M1);CHKERRQ(ierr);
758   }
759   /* Get submatrices from subdomain matrix */
760   if (n_vertices) {
761     IS is_aux;
762 
763     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
764       IS tis;
765 
766       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
767       ierr = ISSort(tis);CHKERRQ(ierr);
768       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
769       ierr = ISDestroy(&tis);CHKERRQ(ierr);
770     } else {
771       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
772     }
773     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
774     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
775     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
776     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
777   }
778 
779   /* Matrix of coarse basis functions (local) */
780   if (pcbddc->coarse_phi_B) {
781     PetscInt on_B,on_primal,on_D=n_D;
782     if (pcbddc->coarse_phi_D) {
783       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
784     }
785     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
786     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
787       PetscScalar *marray;
788 
789       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
790       ierr = PetscFree(marray);CHKERRQ(ierr);
791       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
792       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
793       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
794       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
795     }
796   }
797 
798   if (!pcbddc->coarse_phi_B) {
799     PetscScalar *marray;
800 
801     n = n_B*pcbddc->local_primal_size;
802     if (pcbddc->switch_static || pcbddc->dbg_flag) {
803       n += n_D*pcbddc->local_primal_size;
804     }
805     if (!pcbddc->symmetric_primal) {
806       n *= 2;
807     }
808     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
809     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
810     n = n_B*pcbddc->local_primal_size;
811     if (pcbddc->switch_static || pcbddc->dbg_flag) {
812       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
813       n += n_D*pcbddc->local_primal_size;
814     }
815     if (!pcbddc->symmetric_primal) {
816       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
817       if (pcbddc->switch_static || pcbddc->dbg_flag) {
818         n = n_B*pcbddc->local_primal_size;
819         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
820       }
821     } else {
822       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
823       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
824       if (pcbddc->switch_static || pcbddc->dbg_flag) {
825         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
826         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
827       }
828     }
829   }
830   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
831   /* vertices */
832   if (n_vertices) {
833 
834     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
835 
836     if (n_R) {
837       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
838       PetscBLASInt B_N,B_one = 1;
839       PetscScalar  *x,*y;
840       PetscBool    isseqaij;
841 
842       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
843       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
844       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
845       if (F) { /* TODO could be optimized for symmetric problems */
846         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
847       } else {
848         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
849         for (i=0;i<n_vertices;i++) {
850           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
851           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
852           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
853           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
854           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
855         }
856         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
857       }
858       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
859       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
860       if (n_constraints) {
861         Mat B;
862 
863         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
864         for (i=0;i<n_vertices;i++) {
865           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
866           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
867           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
870           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871         }
872         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
873         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
874         ierr = MatDestroy(&B);CHKERRQ(ierr);
875         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
876         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
877         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
878         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
879         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
880         ierr = MatDestroy(&B);CHKERRQ(ierr);
881       }
882       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
883       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
884         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
885       }
886       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
887       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
888       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
889       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
890       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
891       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
892       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
893       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
894       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
895       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
896     } else {
897       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
898     }
899     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
900     /* coarse basis functions */
901     for (i=0;i<n_vertices;i++) {
902       PetscScalar *y;
903 
904       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
905       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
906       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
907       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909       y[n_B*i+idx_V_B[i]] = 1.0;
910       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
911       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
912 
913       if (pcbddc->switch_static || pcbddc->dbg_flag) {
914         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
915         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
916         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
919         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
920       }
921       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
922     }
923     /* if n_R == 0 the object is not destroyed */
924     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
925   }
926 
927   if (n_constraints) {
928     Mat B;
929 
930     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
931     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
932     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
933     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
934     if (n_vertices) {
935       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
936         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
937       } else {
938         Mat S_VCt;
939 
940         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
941         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
942         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
943       }
944     }
945     ierr = MatDestroy(&B);CHKERRQ(ierr);
946     /* coarse basis functions */
947     for (i=0;i<n_constraints;i++) {
948       PetscScalar *y;
949 
950       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
951       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
952       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
953       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
956       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
957       if (pcbddc->switch_static || pcbddc->dbg_flag) {
958         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
959         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
960         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
963         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
964       }
965       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
966     }
967   }
968   if (n_constraints) {
969     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
970   }
971 
972   /* compute other basis functions for non-symmetric problems */
973   if (!pcbddc->symmetric_primal) {
974 
975     if (n_constraints) {
976       Mat S_CCT,B_C;
977 
978       /* this is a lazy thing */
979       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
980       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
981       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
982       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
983       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
984       if (n_vertices) {
985         Mat B_V,S_VCT;
986 
987         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
988         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
989         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
990         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
991         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
992       }
993       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
994     } else { /* if there are no constraints, reset work */
995       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
996     }
997     if (n_vertices && n_R) {
998       Mat          A_VRT;
999       PetscScalar  *marray;
1000       PetscBLASInt B_N,B_one = 1;
1001 
1002       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1003       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1004       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1005       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1006       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1007       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1008       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1009     }
1010 
1011     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1012       for (i=0;i<pcbddc->local_primal_size;i++) {
1013         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1014         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1015         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1016         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1017         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1018       }
1019     } else {
1020       for (i=0;i<pcbddc->local_primal_size;i++) {
1021         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1022         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1023         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1024         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1025         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1026       }
1027     }
1028     /* coarse basis functions */
1029     for (i=0;i<pcbddc->local_primal_size;i++) {
1030       PetscScalar *y;
1031 
1032       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1033       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1034       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1035       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037       if (i<n_vertices) {
1038         y[n_B*i+idx_V_B[i]] = 1.0;
1039       }
1040       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1041       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1042 
1043       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1044         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1045         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1046         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1047         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1049         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1050       }
1051       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1052     }
1053   }
1054   /* free memory */
1055   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1056   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1057   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1058   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1059   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1060   ierr = PetscFree(work);CHKERRQ(ierr);
1061   if (n_vertices) {
1062     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1063   }
1064   if (n_constraints) {
1065     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1066   }
1067   /* Checking coarse_sub_mat and coarse basis functios */
1068   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1069   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1070   if (pcbddc->dbg_flag) {
1071     Mat         coarse_sub_mat;
1072     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1073     Mat         coarse_phi_D,coarse_phi_B;
1074     Mat         coarse_psi_D,coarse_psi_B;
1075     Mat         A_II,A_BB,A_IB,A_BI;
1076     Mat         C_B,CPHI;
1077     IS          is_dummy;
1078     Vec         mones;
1079     MatType     checkmattype=MATSEQAIJ;
1080     PetscReal   real_value;
1081 
1082     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1083     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1084     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1085     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1086     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1087     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1088     if (unsymmetric_check) {
1089       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1090       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1091     }
1092     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1093 
1094     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1095     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1096     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1097     if (unsymmetric_check) {
1098       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1099       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1100       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1101       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1102       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1103       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1104       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1105       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1106       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1107       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1108       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1109       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1110     } else {
1111       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1112       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1113       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1114       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1115       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1116       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1117       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1118       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1119     }
1120     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1121     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1122     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1123     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1124     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1125     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1126     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1127     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1128 
1129     /* check constraints */
1130     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1131     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1132     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1133     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1134     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1135     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1136     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1137     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1138     if (unsymmetric_check) {
1139       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1140       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1141       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1142       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1143       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1144     }
1145     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1146     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1147     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1148     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1149 
1150     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1151     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1152     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1153     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1154     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1155     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1156     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1157     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1158     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1159     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1160     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1161     if (unsymmetric_check) {
1162       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1163       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1164     }
1165     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1166   }
1167   /* get back data */
1168   *coarse_submat_vals_n = coarse_submat_vals;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1174 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1175 {
1176   Mat            *work_mat;
1177   IS             isrow_s,iscol_s;
1178   PetscBool      rsorted,csorted;
1179   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1184   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1185   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1186   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1187 
1188   if (!rsorted) {
1189     const PetscInt *idxs;
1190     PetscInt *idxs_sorted,i;
1191 
1192     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1193     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1194     for (i=0;i<rsize;i++) {
1195       idxs_perm_r[i] = i;
1196     }
1197     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1198     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1199     for (i=0;i<rsize;i++) {
1200       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1201     }
1202     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1203     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1204   } else {
1205     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1206     isrow_s = isrow;
1207   }
1208 
1209   if (!csorted) {
1210     if (isrow == iscol) {
1211       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1212       iscol_s = isrow_s;
1213     } else {
1214       const PetscInt *idxs;
1215       PetscInt *idxs_sorted,i;
1216 
1217       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1218       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1219       for (i=0;i<csize;i++) {
1220         idxs_perm_c[i] = i;
1221       }
1222       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1223       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1224       for (i=0;i<csize;i++) {
1225         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1226       }
1227       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1228       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1229     }
1230   } else {
1231     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1232     iscol_s = iscol;
1233   }
1234 
1235   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1236 
1237   if (!rsorted || !csorted) {
1238     Mat      new_mat;
1239     IS       is_perm_r,is_perm_c;
1240 
1241     if (!rsorted) {
1242       PetscInt *idxs_r,i;
1243       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1244       for (i=0;i<rsize;i++) {
1245         idxs_r[idxs_perm_r[i]] = i;
1246       }
1247       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1248       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1249     } else {
1250       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1251     }
1252     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1253 
1254     if (!csorted) {
1255       if (isrow_s == iscol_s) {
1256         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1257         is_perm_c = is_perm_r;
1258       } else {
1259         PetscInt *idxs_c,i;
1260         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1261         for (i=0;i<csize;i++) {
1262           idxs_c[idxs_perm_c[i]] = i;
1263         }
1264         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1265         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1266       }
1267     } else {
1268       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1269     }
1270     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1271 
1272     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1273     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1274     work_mat[0] = new_mat;
1275     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1276     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1277   }
1278 
1279   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1280   *B = work_mat[0];
1281   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1282   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1283   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1289 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1290 {
1291   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1292   PC_IS*         pcis = (PC_IS*)pc->data;
1293   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1294   Mat            new_mat;
1295   IS             is_local,is_global;
1296   PetscInt       local_size;
1297   PetscBool      isseqaij;
1298   PetscErrorCode ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1302   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1303   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1304   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr);
1305   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1306   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1307   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1308 
1309   /* check */
1310   if (pcbddc->dbg_flag) {
1311     Vec       x,x_change;
1312     PetscReal error;
1313 
1314     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1315     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1316     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1317     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1320     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1321     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1322     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1323     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1324     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1325     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1326     ierr = VecDestroy(&x);CHKERRQ(ierr);
1327     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1328   }
1329 
1330   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1331   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1332   if (isseqaij) {
1333     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1334   } else {
1335     Mat work_mat;
1336     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1337     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1338     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1339   }
1340   if (matis->A->symmetric_set) {
1341     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1342 #if !defined(PETSC_USE_COMPLEX)
1343     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1344 #endif
1345   }
1346   /*
1347   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1348   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1349   */
1350   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1356 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1357 {
1358   PC_IS*          pcis = (PC_IS*)(pc->data);
1359   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1360   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1361   PetscInt        *idx_R_local=NULL;
1362   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1363   PetscInt        vbs,bs;
1364   PetscBT         bitmask=NULL;
1365   PetscErrorCode  ierr;
1366 
1367   PetscFunctionBegin;
1368   /*
1369     No need to setup local scatters if
1370       - primal space is unchanged
1371         AND
1372       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1373         AND
1374       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1375   */
1376   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1377     PetscFunctionReturn(0);
1378   }
1379   /* destroy old objects */
1380   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1381   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1382   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1383   /* Set Non-overlapping dimensions */
1384   n_B = pcis->n_B;
1385   n_D = pcis->n - n_B;
1386   n_vertices = pcbddc->n_vertices;
1387 
1388   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1389 
1390   /* create auxiliary bitmask and allocate workspace */
1391   if (!sub_schurs->reuse_mumps) {
1392     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1393     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1394     for (i=0;i<n_vertices;i++) {
1395       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1396     }
1397 
1398     for (i=0, n_R=0; i<pcis->n; i++) {
1399       if (!PetscBTLookup(bitmask,i)) {
1400         idx_R_local[n_R++] = i;
1401       }
1402     }
1403   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1404     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1405 
1406     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1407     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1408   }
1409 
1410   /* Block code */
1411   vbs = 1;
1412   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1413   if (bs>1 && !(n_vertices%bs)) {
1414     PetscBool is_blocked = PETSC_TRUE;
1415     PetscInt  *vary;
1416     if (!sub_schurs->reuse_mumps) {
1417       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1418       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1419       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1420       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1421       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1422       for (i=0; i<pcis->n/bs; i++) {
1423         if (vary[i]!=0 && vary[i]!=bs) {
1424           is_blocked = PETSC_FALSE;
1425           break;
1426         }
1427       }
1428       ierr = PetscFree(vary);CHKERRQ(ierr);
1429     } else {
1430       /* Verify directly the R set */
1431       for (i=0; i<n_R/bs; i++) {
1432         PetscInt j,node=idx_R_local[bs*i];
1433         for (j=1; j<bs; j++) {
1434           if (node != idx_R_local[bs*i+j]-j) {
1435             is_blocked = PETSC_FALSE;
1436             break;
1437           }
1438         }
1439       }
1440     }
1441     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1442       vbs = bs;
1443       for (i=0;i<n_R/vbs;i++) {
1444         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1445       }
1446     }
1447   }
1448   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1449   if (sub_schurs->reuse_mumps) {
1450     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1451 
1452     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1453     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1454     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1455     reuse_mumps->is_R = pcbddc->is_R_local;
1456   } else {
1457     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1458   }
1459 
1460   /* print some info if requested */
1461   if (pcbddc->dbg_flag) {
1462     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1463     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1464     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1465     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1466     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1467     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1468     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1469   }
1470 
1471   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1472   if (!sub_schurs->reuse_mumps) {
1473     IS       is_aux1,is_aux2;
1474     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1475 
1476     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1477     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1478     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1479     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1480     for (i=0; i<n_D; i++) {
1481       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1482     }
1483     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1484     for (i=0, j=0; i<n_R; i++) {
1485       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1486         aux_array1[j++] = i;
1487       }
1488     }
1489     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1490     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1491     for (i=0, j=0; i<n_B; i++) {
1492       if (!PetscBTLookup(bitmask,is_indices[i])) {
1493         aux_array2[j++] = i;
1494       }
1495     }
1496     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1497     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1498     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1499     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1500     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1501 
1502     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1503       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1504       for (i=0, j=0; i<n_R; i++) {
1505         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1506           aux_array1[j++] = i;
1507         }
1508       }
1509       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1510       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1511       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1512     }
1513     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1514     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1515   } else {
1516     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1517     IS               tis;
1518     PetscInt         schur_size;
1519 
1520     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1521     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1522     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1523     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1524     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1525       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1526       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1527       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1528     }
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1536 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1537 {
1538   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1539   PC_IS          *pcis = (PC_IS*)pc->data;
1540   PC             pc_temp;
1541   Mat            A_RR;
1542   MatReuse       reuse;
1543   PetscScalar    m_one = -1.0;
1544   PetscReal      value;
1545   PetscInt       n_D,n_R;
1546   PetscBool      use_exact,use_exact_reduced,issbaij;
1547   PetscErrorCode ierr;
1548   /* prefixes stuff */
1549   char           dir_prefix[256],neu_prefix[256],str_level[16];
1550   size_t         len;
1551 
1552   PetscFunctionBegin;
1553 
1554   /* compute prefixes */
1555   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1556   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1557   if (!pcbddc->current_level) {
1558     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1559     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1560     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1561     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1562   } else {
1563     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1564     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1565     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1566     len -= 15; /* remove "pc_bddc_coarse_" */
1567     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1568     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1569     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1570     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1571     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1572     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1573     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1574     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1575   }
1576 
1577   /* DIRICHLET PROBLEM */
1578   if (dirichlet) {
1579     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1580     if (pcbddc->local_mat->symmetric_set) {
1581       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1582     }
1583     /* Matrix for Dirichlet problem is pcis->A_II */
1584     n_D = pcis->n - pcis->n_B;
1585     if (!pcbddc->ksp_D) { /* create object if not yet build */
1586       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1587       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1588       /* default */
1589       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1590       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1591       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1592       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1593       if (issbaij) {
1594         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1595       } else {
1596         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1597       }
1598       /* Allow user's customization */
1599       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1600       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1601     }
1602     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1603     if (sub_schurs->reuse_mumps) {
1604       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1605 
1606       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1607     }
1608     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1609     if (!n_D) {
1610       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1611       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1612     }
1613     /* Set Up KSP for Dirichlet problem of BDDC */
1614     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1615     /* set ksp_D into pcis data */
1616     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1617     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1618     pcis->ksp_D = pcbddc->ksp_D;
1619   }
1620 
1621   /* NEUMANN PROBLEM */
1622   A_RR = 0;
1623   if (neumann) {
1624     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1625     PetscInt        ibs,mbs;
1626     PetscBool       issbaij;
1627     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1628     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1629     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1630     if (pcbddc->ksp_R) { /* already created ksp */
1631       PetscInt nn_R;
1632       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1633       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1634       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1635       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1636         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1637         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1638         reuse = MAT_INITIAL_MATRIX;
1639       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1640         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1641           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1642           reuse = MAT_INITIAL_MATRIX;
1643         } else { /* safe to reuse the matrix */
1644           reuse = MAT_REUSE_MATRIX;
1645         }
1646       }
1647       /* last check */
1648       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1649         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1650         reuse = MAT_INITIAL_MATRIX;
1651       }
1652     } else { /* first time, so we need to create the matrix */
1653       reuse = MAT_INITIAL_MATRIX;
1654     }
1655     /* extract A_RR */
1656     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1657     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1658     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1659     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1660       if (matis->A == pcbddc->local_mat) {
1661         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1662         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1663       } else {
1664         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1665       }
1666     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1667       if (matis->A == pcbddc->local_mat) {
1668         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1669         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1670       } else {
1671         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1672       }
1673     }
1674     if (!sub_schurs->reuse_mumps) {
1675       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1676       if (pcbddc->local_mat->symmetric_set) {
1677         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1678       }
1679     } else {
1680       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1681 
1682       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1683       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1684       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1685     }
1686     if (!pcbddc->ksp_R) { /* create object if not present */
1687       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1688       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1689       /* default */
1690       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1691       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1692       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1693       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1694       if (issbaij) {
1695         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1696       } else {
1697         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1698       }
1699       /* Allow user's customization */
1700       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1701       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1702     }
1703     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1704     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1705     if (!n_R) {
1706       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1707       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1708     }
1709     /* Reuse MUMPS solver if it is present */
1710     if (sub_schurs->reuse_mumps) {
1711       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1712 
1713       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1714     }
1715     /* Set Up KSP for Neumann problem of BDDC */
1716     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1717   }
1718   /* free Neumann problem's matrix */
1719   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1720 
1721   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1722   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1723     if (pcbddc->dbg_flag) {
1724       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1725       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1726       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1727     }
1728     if (dirichlet) { /* Dirichlet */
1729       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1730       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1731       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1732       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1733       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1734       /* need to be adapted? */
1735       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1736       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1737       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1738       /* print info */
1739       if (pcbddc->dbg_flag) {
1740         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1741         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1742       }
1743       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1744         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1745       }
1746     }
1747     if (neumann) { /* Neumann */
1748       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1749       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1750       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1751       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1752       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1753       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1754       /* need to be adapted? */
1755       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1756       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1757       /* print info */
1758       if (pcbddc->dbg_flag) {
1759         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1760         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1761       }
1762       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1763         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1764       }
1765     }
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1772 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1773 {
1774   PetscErrorCode  ierr;
1775   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1776   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1777 
1778   PetscFunctionBegin;
1779   if (!sub_schurs->reuse_mumps) {
1780     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1781   }
1782   if (!pcbddc->switch_static) {
1783     if (applytranspose && pcbddc->local_auxmat1) {
1784       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1785       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1786     }
1787     if (!sub_schurs->reuse_mumps) {
1788       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1789       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1790     } else {
1791       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1792 
1793       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1794       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1795     }
1796   } else {
1797     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1798     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1799     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1800     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1801     if (applytranspose && pcbddc->local_auxmat1) {
1802       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1803       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1804       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1805       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1806     }
1807   }
1808   if (!sub_schurs->reuse_mumps) {
1809     if (applytranspose) {
1810       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1811     } else {
1812       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1813     }
1814 #if defined(PETSC_HAVE_MUMPS)
1815   } else {
1816     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1817 
1818     if (applytranspose) {
1819       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1820     } else {
1821       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1822     }
1823 #endif
1824   }
1825   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1826   if (!pcbddc->switch_static) {
1827     if (!sub_schurs->reuse_mumps) {
1828       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1829       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1830     } else {
1831       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1832 
1833       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1834       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1835     }
1836     if (!applytranspose && pcbddc->local_auxmat1) {
1837       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1838       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1839     }
1840   } else {
1841     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1842     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1843     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1844     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1845     if (!applytranspose && pcbddc->local_auxmat1) {
1846       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1847       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1848     }
1849     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1851     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1852     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1858 #undef __FUNCT__
1859 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1860 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1861 {
1862   PetscErrorCode ierr;
1863   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1864   PC_IS*            pcis = (PC_IS*)  (pc->data);
1865   const PetscScalar zero = 0.0;
1866 
1867   PetscFunctionBegin;
1868   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1869   if (applytranspose) {
1870     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1871     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1872   } else {
1873     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1874     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1875   }
1876   /* start communications from local primal nodes to rhs of coarse solver */
1877   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1878   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1879   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1880 
1881   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1882   /* TODO remove null space when doing multilevel */
1883   if (pcbddc->coarse_ksp) {
1884     Vec rhs,sol;
1885 
1886     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1887     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1888     if (applytranspose) {
1889       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1890     } else {
1891       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1892     }
1893   }
1894 
1895   /* Local solution on R nodes */
1896   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1897     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1898   }
1899 
1900   /* communications from coarse sol to local primal nodes */
1901   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1902   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1903 
1904   /* Sum contributions from two levels */
1905   if (applytranspose) {
1906     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1907     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1908   } else {
1909     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1910     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1917 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1918 {
1919   PetscErrorCode ierr;
1920   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1921   PetscScalar    *array;
1922   Vec            from,to;
1923 
1924   PetscFunctionBegin;
1925   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1926     from = pcbddc->coarse_vec;
1927     to = pcbddc->vec1_P;
1928     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1929       Vec tvec;
1930 
1931       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1932       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1933       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1934       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1935       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1936       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1937     }
1938   } else { /* from local to global -> put data in coarse right hand side */
1939     from = pcbddc->vec1_P;
1940     to = pcbddc->coarse_vec;
1941   }
1942   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1948 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1949 {
1950   PetscErrorCode ierr;
1951   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1952   PetscScalar    *array;
1953   Vec            from,to;
1954 
1955   PetscFunctionBegin;
1956   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1957     from = pcbddc->coarse_vec;
1958     to = pcbddc->vec1_P;
1959   } else { /* from local to global -> put data in coarse right hand side */
1960     from = pcbddc->vec1_P;
1961     to = pcbddc->coarse_vec;
1962   }
1963   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1964   if (smode == SCATTER_FORWARD) {
1965     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1966       Vec tvec;
1967 
1968       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1969       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1970       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1971       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1972     }
1973   } else {
1974     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1975      ierr = VecResetArray(from);CHKERRQ(ierr);
1976     }
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 /* uncomment for testing purposes */
1982 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1983 #undef __FUNCT__
1984 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1985 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1986 {
1987   PetscErrorCode    ierr;
1988   PC_IS*            pcis = (PC_IS*)(pc->data);
1989   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1990   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1991   /* one and zero */
1992   PetscScalar       one=1.0,zero=0.0;
1993   /* space to store constraints and their local indices */
1994   PetscScalar       *constraints_data;
1995   PetscInt          *constraints_idxs,*constraints_idxs_B;
1996   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1997   PetscInt          *constraints_n;
1998   /* iterators */
1999   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2000   /* BLAS integers */
2001   PetscBLASInt      lwork,lierr;
2002   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2003   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2004   /* reuse */
2005   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2006   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2007   /* change of basis */
2008   PetscBool         qr_needed;
2009   PetscBT           change_basis,qr_needed_idx;
2010   /* auxiliary stuff */
2011   PetscInt          *nnz,*is_indices;
2012   PetscInt          ncc;
2013   /* some quantities */
2014   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2015   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2016 
2017   PetscFunctionBegin;
2018   /* Destroy Mat objects computed previously */
2019   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2020   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2021   /* save info on constraints from previous setup (if any) */
2022   olocal_primal_size = pcbddc->local_primal_size;
2023   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2024   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2025   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2026   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2027   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2028   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2029 
2030   /* print some info */
2031   if (pcbddc->dbg_flag) {
2032     IS       vertices;
2033     PetscInt nv,nedges,nfaces;
2034     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2035     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2036     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2037     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2038     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2039     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2040     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2041     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2042     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2043   }
2044 
2045   if (!pcbddc->adaptive_selection) {
2046     IS           ISForVertices,*ISForFaces,*ISForEdges;
2047     MatNullSpace nearnullsp;
2048     const Vec    *nearnullvecs;
2049     Vec          *localnearnullsp;
2050     PetscScalar  *array;
2051     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2052     PetscBool    nnsp_has_cnst;
2053     /* LAPACK working arrays for SVD or POD */
2054     PetscBool    skip_lapack,boolforchange;
2055     PetscScalar  *work;
2056     PetscReal    *singular_vals;
2057 #if defined(PETSC_USE_COMPLEX)
2058     PetscReal    *rwork;
2059 #endif
2060 #if defined(PETSC_MISSING_LAPACK_GESVD)
2061     PetscScalar  *temp_basis,*correlation_mat;
2062 #else
2063     PetscBLASInt dummy_int=1;
2064     PetscScalar  dummy_scalar=1.;
2065 #endif
2066 
2067     /* Get index sets for faces, edges and vertices from graph */
2068     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2069     /* free unneeded index sets */
2070     if (!pcbddc->use_vertices) {
2071       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2072     }
2073     if (!pcbddc->use_edges) {
2074       for (i=0;i<n_ISForEdges;i++) {
2075         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2076       }
2077       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2078       n_ISForEdges = 0;
2079     }
2080     if (!pcbddc->use_faces) {
2081       for (i=0;i<n_ISForFaces;i++) {
2082         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2083       }
2084       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2085       n_ISForFaces = 0;
2086     }
2087 
2088 #if defined(PETSC_USE_DEBUG)
2089     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2090        Also use_change_of_basis should be consistent among processors */
2091     if (pcbddc->NullSpace) {
2092       PetscBool tbool[2],gbool[2];
2093 
2094       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2095         pcbddc->use_change_of_basis = PETSC_TRUE;
2096         if (!ISForEdges) {
2097           pcbddc->use_change_on_faces = PETSC_TRUE;
2098         }
2099       }
2100       tbool[0] = pcbddc->use_change_of_basis;
2101       tbool[1] = pcbddc->use_change_on_faces;
2102       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2103       pcbddc->use_change_of_basis = gbool[0];
2104       pcbddc->use_change_on_faces = gbool[1];
2105     }
2106 #endif
2107 
2108     /* check if near null space is attached to global mat */
2109     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2110     if (nearnullsp) {
2111       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2112       /* remove any stored info */
2113       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2114       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2115       /* store information for BDDC solver reuse */
2116       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2117       pcbddc->onearnullspace = nearnullsp;
2118       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2119       for (i=0;i<nnsp_size;i++) {
2120         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2121       }
2122     } else { /* if near null space is not provided BDDC uses constants by default */
2123       nnsp_size = 0;
2124       nnsp_has_cnst = PETSC_TRUE;
2125     }
2126     /* get max number of constraints on a single cc */
2127     max_constraints = nnsp_size;
2128     if (nnsp_has_cnst) max_constraints++;
2129 
2130     /*
2131          Evaluate maximum storage size needed by the procedure
2132          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2133          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2134          There can be multiple constraints per connected component
2135                                                                                                                                                            */
2136     n_vertices = 0;
2137     if (ISForVertices) {
2138       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2139     }
2140     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2141     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2142 
2143     total_counts = n_ISForFaces+n_ISForEdges;
2144     total_counts *= max_constraints;
2145     total_counts += n_vertices;
2146     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2147 
2148     total_counts = 0;
2149     max_size_of_constraint = 0;
2150     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2151       IS used_is;
2152       if (i<n_ISForEdges) {
2153         used_is = ISForEdges[i];
2154       } else {
2155         used_is = ISForFaces[i-n_ISForEdges];
2156       }
2157       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2158       total_counts += j;
2159       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2160     }
2161     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2162 
2163     /* get local part of global near null space vectors */
2164     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2165     for (k=0;k<nnsp_size;k++) {
2166       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2167       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2168       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2169     }
2170 
2171     /* whether or not to skip lapack calls */
2172     skip_lapack = PETSC_TRUE;
2173     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2174 
2175     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2176     if (!skip_lapack) {
2177       PetscScalar temp_work;
2178 
2179 #if defined(PETSC_MISSING_LAPACK_GESVD)
2180       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2181       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2182       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2183       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2184 #if defined(PETSC_USE_COMPLEX)
2185       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2186 #endif
2187       /* now we evaluate the optimal workspace using query with lwork=-1 */
2188       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2189       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2190       lwork = -1;
2191       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2192 #if !defined(PETSC_USE_COMPLEX)
2193       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2194 #else
2195       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2196 #endif
2197       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2198       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2199 #else /* on missing GESVD */
2200       /* SVD */
2201       PetscInt max_n,min_n;
2202       max_n = max_size_of_constraint;
2203       min_n = max_constraints;
2204       if (max_size_of_constraint < max_constraints) {
2205         min_n = max_size_of_constraint;
2206         max_n = max_constraints;
2207       }
2208       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2209 #if defined(PETSC_USE_COMPLEX)
2210       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2211 #endif
2212       /* now we evaluate the optimal workspace using query with lwork=-1 */
2213       lwork = -1;
2214       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2215       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2216       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2217       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2218 #if !defined(PETSC_USE_COMPLEX)
2219       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2220 #else
2221       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2222 #endif
2223       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2224       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2225 #endif /* on missing GESVD */
2226       /* Allocate optimal workspace */
2227       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2228       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2229     }
2230     /* Now we can loop on constraining sets */
2231     total_counts = 0;
2232     constraints_idxs_ptr[0] = 0;
2233     constraints_data_ptr[0] = 0;
2234     /* vertices */
2235     if (n_vertices) {
2236       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2237       if (nnsp_has_cnst) { /* it considers all possible vertices */
2238         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2239         for (i=0;i<n_vertices;i++) {
2240           constraints_n[total_counts] = 1;
2241           constraints_data[total_counts] = 1.0;
2242           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2243           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2244           total_counts++;
2245         }
2246       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2247         PetscBool used_vertex;
2248         for (i=0;i<n_vertices;i++) {
2249           used_vertex = PETSC_FALSE;
2250           k = 0;
2251           while (!used_vertex && k<nnsp_size) {
2252             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2253             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2254               constraints_n[total_counts] = 1;
2255               constraints_idxs[total_counts] = is_indices[i];
2256               constraints_data[total_counts] = 1.0;
2257               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2258               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2259               total_counts++;
2260               used_vertex = PETSC_TRUE;
2261             }
2262             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2263             k++;
2264           }
2265         }
2266       }
2267       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2268       n_vertices = total_counts;
2269     }
2270 
2271     /* edges and faces */
2272     total_counts_cc = total_counts;
2273     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2274       IS        used_is;
2275       PetscBool idxs_copied = PETSC_FALSE;
2276 
2277       if (ncc<n_ISForEdges) {
2278         used_is = ISForEdges[ncc];
2279         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2280       } else {
2281         used_is = ISForFaces[ncc-n_ISForEdges];
2282         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2283       }
2284       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2285 
2286       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2287       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2288       /* change of basis should not be performed on local periodic nodes */
2289       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2290       if (nnsp_has_cnst) {
2291         PetscScalar quad_value;
2292 
2293         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2294         idxs_copied = PETSC_TRUE;
2295 
2296         if (!pcbddc->use_nnsp_true) {
2297           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2298         } else {
2299           quad_value = 1.0;
2300         }
2301         for (j=0;j<size_of_constraint;j++) {
2302           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2303         }
2304         temp_constraints++;
2305         total_counts++;
2306       }
2307       for (k=0;k<nnsp_size;k++) {
2308         PetscReal real_value;
2309         PetscScalar *ptr_to_data;
2310 
2311         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2312         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2313         for (j=0;j<size_of_constraint;j++) {
2314           ptr_to_data[j] = array[is_indices[j]];
2315         }
2316         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2317         /* check if array is null on the connected component */
2318         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2319         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2320         if (real_value > 0.0) { /* keep indices and values */
2321           temp_constraints++;
2322           total_counts++;
2323           if (!idxs_copied) {
2324             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2325             idxs_copied = PETSC_TRUE;
2326           }
2327         }
2328       }
2329       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2330       valid_constraints = temp_constraints;
2331       if (!pcbddc->use_nnsp_true && temp_constraints) {
2332         if (temp_constraints == 1) { /* just normalize the constraint */
2333           PetscScalar norm,*ptr_to_data;
2334 
2335           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2336           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2337           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2338           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2339           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2340         } else { /* perform SVD */
2341           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2342           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2343 
2344 #if defined(PETSC_MISSING_LAPACK_GESVD)
2345           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2346              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2347              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2348                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2349                 from that computed using LAPACKgesvd
2350              -> This is due to a different computation of eigenvectors in LAPACKheev
2351              -> The quality of the POD-computed basis will be the same */
2352           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2353           /* Store upper triangular part of correlation matrix */
2354           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2355           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2356           for (j=0;j<temp_constraints;j++) {
2357             for (k=0;k<j+1;k++) {
2358               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2359             }
2360           }
2361           /* compute eigenvalues and eigenvectors of correlation matrix */
2362           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2363           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2364 #if !defined(PETSC_USE_COMPLEX)
2365           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2366 #else
2367           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2368 #endif
2369           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2370           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2371           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2372           j = 0;
2373           while (j < temp_constraints && singular_vals[j] < tol) j++;
2374           total_counts = total_counts-j;
2375           valid_constraints = temp_constraints-j;
2376           /* scale and copy POD basis into used quadrature memory */
2377           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2378           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2379           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2380           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2381           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2382           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2383           if (j<temp_constraints) {
2384             PetscInt ii;
2385             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2386             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2387             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2388             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2389             for (k=0;k<temp_constraints-j;k++) {
2390               for (ii=0;ii<size_of_constraint;ii++) {
2391                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2392               }
2393             }
2394           }
2395 #else  /* on missing GESVD */
2396           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2397           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2398           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2399           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2400 #if !defined(PETSC_USE_COMPLEX)
2401           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2402 #else
2403           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2404 #endif
2405           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2406           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2407           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2408           k = temp_constraints;
2409           if (k > size_of_constraint) k = size_of_constraint;
2410           j = 0;
2411           while (j < k && singular_vals[k-j-1] < tol) j++;
2412           valid_constraints = k-j;
2413           total_counts = total_counts-temp_constraints+valid_constraints;
2414 #endif /* on missing GESVD */
2415         }
2416       }
2417       /* update pointers information */
2418       if (valid_constraints) {
2419         constraints_n[total_counts_cc] = valid_constraints;
2420         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2421         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2422         /* set change_of_basis flag */
2423         if (boolforchange) {
2424           PetscBTSet(change_basis,total_counts_cc);
2425         }
2426         total_counts_cc++;
2427       }
2428     }
2429     /* free workspace */
2430     if (!skip_lapack) {
2431       ierr = PetscFree(work);CHKERRQ(ierr);
2432 #if defined(PETSC_USE_COMPLEX)
2433       ierr = PetscFree(rwork);CHKERRQ(ierr);
2434 #endif
2435       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2436 #if defined(PETSC_MISSING_LAPACK_GESVD)
2437       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2438       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2439 #endif
2440     }
2441     for (k=0;k<nnsp_size;k++) {
2442       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2443     }
2444     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2445     /* free index sets of faces, edges and vertices */
2446     for (i=0;i<n_ISForFaces;i++) {
2447       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2448     }
2449     if (n_ISForFaces) {
2450       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2451     }
2452     for (i=0;i<n_ISForEdges;i++) {
2453       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2454     }
2455     if (n_ISForEdges) {
2456       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2457     }
2458     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2459   } else {
2460     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2461 
2462     total_counts = 0;
2463     n_vertices = 0;
2464     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2465       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2466     }
2467     max_constraints = 0;
2468     total_counts_cc = 0;
2469     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2470       total_counts += pcbddc->adaptive_constraints_n[i];
2471       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2472       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2473     }
2474     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2475     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2476     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2477     constraints_data = pcbddc->adaptive_constraints_data;
2478     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2479     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2480     total_counts_cc = 0;
2481     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2482       if (pcbddc->adaptive_constraints_n[i]) {
2483         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2484       }
2485     }
2486 #if 0
2487     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2488     for (i=0;i<total_counts_cc;i++) {
2489       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2490       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2491       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2492         printf(" %d",constraints_idxs[j]);
2493       }
2494       printf("\n");
2495       printf("number of cc: %d\n",constraints_n[i]);
2496     }
2497     for (i=0;i<n_vertices;i++) {
2498       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2499     }
2500     for (i=0;i<sub_schurs->n_subs;i++) {
2501       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2502     }
2503 #endif
2504 
2505     max_size_of_constraint = 0;
2506     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2507     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2508     /* Change of basis */
2509     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2510     if (pcbddc->use_change_of_basis) {
2511       for (i=0;i<sub_schurs->n_subs;i++) {
2512         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2513           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2514         }
2515       }
2516     }
2517   }
2518   pcbddc->local_primal_size = total_counts;
2519   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2520 
2521   /* map constraints_idxs in boundary numbering */
2522   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2523   if (i != constraints_idxs_ptr[total_counts_cc]) {
2524     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2525   }
2526 
2527   /* Create constraint matrix */
2528   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2529   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2530   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2531 
2532   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2533   /* determine if a QR strategy is needed for change of basis */
2534   qr_needed = PETSC_FALSE;
2535   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2536   total_primal_vertices=0;
2537   pcbddc->local_primal_size_cc = 0;
2538   for (i=0;i<total_counts_cc;i++) {
2539     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2540     if (size_of_constraint == 1) {
2541       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2542       pcbddc->local_primal_size_cc += 1;
2543     } else if (PetscBTLookup(change_basis,i)) {
2544       for (k=0;k<constraints_n[i];k++) {
2545         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2546       }
2547       pcbddc->local_primal_size_cc += constraints_n[i];
2548       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2549         PetscBTSet(qr_needed_idx,i);
2550         qr_needed = PETSC_TRUE;
2551       }
2552     } else {
2553       pcbddc->local_primal_size_cc += 1;
2554     }
2555   }
2556   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2557   pcbddc->n_vertices = total_primal_vertices;
2558   /* permute indices in order to have a sorted set of vertices */
2559   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2560 
2561   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2562   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2563   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2564 
2565   /* nonzero structure of constraint matrix */
2566   /* and get reference dof for local constraints */
2567   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2568   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2569 
2570   j = total_primal_vertices;
2571   total_counts = total_primal_vertices;
2572   cum = total_primal_vertices;
2573   for (i=n_vertices;i<total_counts_cc;i++) {
2574     if (!PetscBTLookup(change_basis,i)) {
2575       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2576       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2577       cum++;
2578       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2579       for (k=0;k<constraints_n[i];k++) {
2580         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2581         nnz[j+k] = size_of_constraint;
2582       }
2583       j += constraints_n[i];
2584     }
2585   }
2586   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2587   ierr = PetscFree(nnz);CHKERRQ(ierr);
2588 
2589   /* set values in constraint matrix */
2590   for (i=0;i<total_primal_vertices;i++) {
2591     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2592   }
2593   total_counts = total_primal_vertices;
2594   for (i=n_vertices;i<total_counts_cc;i++) {
2595     if (!PetscBTLookup(change_basis,i)) {
2596       PetscInt *cols;
2597 
2598       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2599       cols = constraints_idxs+constraints_idxs_ptr[i];
2600       for (k=0;k<constraints_n[i];k++) {
2601         PetscInt    row = total_counts+k;
2602         PetscScalar *vals;
2603 
2604         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2605         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2606       }
2607       total_counts += constraints_n[i];
2608     }
2609   }
2610   /* assembling */
2611   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2612   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2613 
2614   /*
2615   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2616   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2617   */
2618   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2619   if (pcbddc->use_change_of_basis) {
2620     /* dual and primal dofs on a single cc */
2621     PetscInt     dual_dofs,primal_dofs;
2622     /* working stuff for GEQRF */
2623     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2624     PetscBLASInt lqr_work;
2625     /* working stuff for UNGQR */
2626     PetscScalar  *gqr_work,lgqr_work_t;
2627     PetscBLASInt lgqr_work;
2628     /* working stuff for TRTRS */
2629     PetscScalar  *trs_rhs;
2630     PetscBLASInt Blas_NRHS;
2631     /* pointers for values insertion into change of basis matrix */
2632     PetscInt     *start_rows,*start_cols;
2633     PetscScalar  *start_vals;
2634     /* working stuff for values insertion */
2635     PetscBT      is_primal;
2636     PetscInt     *aux_primal_numbering_B;
2637     /* matrix sizes */
2638     PetscInt     global_size,local_size;
2639     /* temporary change of basis */
2640     Mat          localChangeOfBasisMatrix;
2641     /* extra space for debugging */
2642     PetscScalar  *dbg_work;
2643 
2644     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2645     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2646     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2647     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2648     /* nonzeros for local mat */
2649     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2650     for (i=0;i<pcis->n;i++) nnz[i]=1;
2651     for (i=n_vertices;i<total_counts_cc;i++) {
2652       if (PetscBTLookup(change_basis,i)) {
2653         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2654         if (PetscBTLookup(qr_needed_idx,i)) {
2655           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2656         } else {
2657           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2658           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2659         }
2660       }
2661     }
2662     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2663     ierr = PetscFree(nnz);CHKERRQ(ierr);
2664     /* Set initial identity in the matrix */
2665     for (i=0;i<pcis->n;i++) {
2666       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2667     }
2668 
2669     if (pcbddc->dbg_flag) {
2670       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2671       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2672     }
2673 
2674 
2675     /* Now we loop on the constraints which need a change of basis */
2676     /*
2677        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2678        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2679 
2680        Basic blocks of change of basis matrix T computed by
2681 
2682           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2683 
2684             | 1        0   ...        0         s_1/S |
2685             | 0        1   ...        0         s_2/S |
2686             |              ...                        |
2687             | 0        ...            1     s_{n-1}/S |
2688             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2689 
2690             with S = \sum_{i=1}^n s_i^2
2691             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2692                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2693 
2694           - QR decomposition of constraints otherwise
2695     */
2696     if (qr_needed) {
2697       /* space to store Q */
2698       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2699       /* first we issue queries for optimal work */
2700       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2701       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2702       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2703       lqr_work = -1;
2704       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2705       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2706       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2707       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2708       lgqr_work = -1;
2709       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2710       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2711       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2712       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2713       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2714       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2715       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2716       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2717       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2718       /* array to store scaling factors for reflectors */
2719       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2720       /* array to store rhs and solution of triangular solver */
2721       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2722       /* allocating workspace for check */
2723       if (pcbddc->dbg_flag) {
2724         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2725       }
2726     }
2727     /* array to store whether a node is primal or not */
2728     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2729     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2730     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2731     if (i != total_primal_vertices) {
2732       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2733     }
2734     for (i=0;i<total_primal_vertices;i++) {
2735       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2736     }
2737     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2738 
2739     /* loop on constraints and see whether or not they need a change of basis and compute it */
2740     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2741       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2742       if (PetscBTLookup(change_basis,total_counts)) {
2743         /* get constraint info */
2744         primal_dofs = constraints_n[total_counts];
2745         dual_dofs = size_of_constraint-primal_dofs;
2746 
2747         if (pcbddc->dbg_flag) {
2748           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2749         }
2750 
2751         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2752 
2753           /* copy quadrature constraints for change of basis check */
2754           if (pcbddc->dbg_flag) {
2755             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2756           }
2757           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2758           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2759 
2760           /* compute QR decomposition of constraints */
2761           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2762           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2763           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2764           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2765           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2766           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2767           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2768 
2769           /* explictly compute R^-T */
2770           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2771           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2772           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2773           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2774           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2775           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2776           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2777           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2778           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2779           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2780 
2781           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2782           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2783           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2784           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2785           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2786           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2787           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2788           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2789           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2790 
2791           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2792              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2793              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2794           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2795           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2796           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2797           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2798           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2799           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2800           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2801           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2802           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2803           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2804 
2805           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2806           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2807           /* insert cols for primal dofs */
2808           for (j=0;j<primal_dofs;j++) {
2809             start_vals = &qr_basis[j*size_of_constraint];
2810             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2811             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2812           }
2813           /* insert cols for dual dofs */
2814           for (j=0,k=0;j<dual_dofs;k++) {
2815             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2816               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2817               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2818               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2819               j++;
2820             }
2821           }
2822 
2823           /* check change of basis */
2824           if (pcbddc->dbg_flag) {
2825             PetscInt   ii,jj;
2826             PetscBool valid_qr=PETSC_TRUE;
2827             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2828             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2829             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2830             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2831             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2832             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2833             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2834             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2835             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2836             for (jj=0;jj<size_of_constraint;jj++) {
2837               for (ii=0;ii<primal_dofs;ii++) {
2838                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2839                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2840               }
2841             }
2842             if (!valid_qr) {
2843               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2844               for (jj=0;jj<size_of_constraint;jj++) {
2845                 for (ii=0;ii<primal_dofs;ii++) {
2846                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2847                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2848                   }
2849                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2850                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2851                   }
2852                 }
2853               }
2854             } else {
2855               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2856             }
2857           }
2858         } else { /* simple transformation block */
2859           PetscInt    row,col;
2860           PetscScalar val,norm;
2861 
2862           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2863           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2864           for (j=0;j<size_of_constraint;j++) {
2865             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2866             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2867             if (!PetscBTLookup(is_primal,row_B)) {
2868               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2869               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2870               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2871             } else {
2872               for (k=0;k<size_of_constraint;k++) {
2873                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2874                 if (row != col) {
2875                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2876                 } else {
2877                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2878                 }
2879                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2880               }
2881             }
2882           }
2883           if (pcbddc->dbg_flag) {
2884             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2885           }
2886         }
2887       } else {
2888         if (pcbddc->dbg_flag) {
2889           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2890         }
2891       }
2892     }
2893 
2894     /* free workspace */
2895     if (qr_needed) {
2896       if (pcbddc->dbg_flag) {
2897         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2898       }
2899       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2900       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2901       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2902       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2903       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2904     }
2905     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2906     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2908 
2909     /* assembling of global change of variable */
2910     {
2911       Mat      tmat;
2912       PetscInt bs;
2913 
2914       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2915       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2916       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2917       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2918       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2919       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2920       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2921       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2922       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2923       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2924       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2925       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2926       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2927       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2928       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2929       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2930       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2931       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2932     }
2933     /* check */
2934     if (pcbddc->dbg_flag) {
2935       PetscReal error;
2936       Vec       x,x_change;
2937 
2938       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2939       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2940       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2941       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2942       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2943       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2944       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2945       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2946       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2947       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2948       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2949       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2950       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2951       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2952       ierr = VecDestroy(&x);CHKERRQ(ierr);
2953       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2954     }
2955 
2956     /* adapt sub_schurs computed (if any) */
2957     if (pcbddc->use_deluxe_scaling) {
2958       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2959       if (sub_schurs->S_Ej_all) {
2960         Mat S_new,tmat;
2961         IS is_all_N;
2962 
2963         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2964         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2965         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2966         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2967         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2968         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2969         sub_schurs->S_Ej_all = S_new;
2970         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2971         if (sub_schurs->sum_S_Ej_all) {
2972           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2973           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2974           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2975           sub_schurs->sum_S_Ej_all = S_new;
2976           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2977         }
2978         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2979       }
2980     }
2981     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2982   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2983     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2984     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2985   }
2986 
2987   /* set up change of basis context */
2988   if (pcbddc->ChangeOfBasisMatrix) {
2989     PCBDDCChange_ctx change_ctx;
2990 
2991     if (!pcbddc->new_global_mat) {
2992       PetscInt global_size,local_size;
2993 
2994       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2995       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2996       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2997       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2998       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2999       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3000       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3001       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3002       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3003     } else {
3004       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3005       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3006       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3007     }
3008     if (!pcbddc->user_ChangeOfBasisMatrix) {
3009       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3010       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3011     } else {
3012       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3013       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3014     }
3015     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3016     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3017     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3018     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3019   }
3020 
3021   /* check if a new primal space has been introduced */
3022   pcbddc->new_primal_space_local = PETSC_TRUE;
3023   if (olocal_primal_size == pcbddc->local_primal_size) {
3024     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3025     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3026     if (!pcbddc->new_primal_space_local) {
3027       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3028       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3029     }
3030   }
3031   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3032   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3033   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3034 
3035   /* flush dbg viewer */
3036   if (pcbddc->dbg_flag) {
3037     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3038   }
3039 
3040   /* free workspace */
3041   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3042   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3043   if (!pcbddc->adaptive_selection) {
3044     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3045     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3046   } else {
3047     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3048                       pcbddc->adaptive_constraints_idxs_ptr,
3049                       pcbddc->adaptive_constraints_data_ptr,
3050                       pcbddc->adaptive_constraints_idxs,
3051                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3052     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3053     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3054   }
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 #undef __FUNCT__
3059 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3060 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3061 {
3062   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3063   PC_IS       *pcis = (PC_IS*)pc->data;
3064   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3065   PetscInt    ierr,i,vertex_size,N;
3066   PetscViewer viewer=pcbddc->dbg_viewer;
3067 
3068   PetscFunctionBegin;
3069   /* Reset previously computed graph */
3070   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3071   /* Init local Graph struct */
3072   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3073   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3074 
3075   /* Check validity of the csr graph passed in by the user */
3076   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3077     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3078   }
3079 
3080   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3081   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3082     PetscInt  *xadj,*adjncy;
3083     PetscInt  nvtxs;
3084     PetscBool flg_row=PETSC_FALSE;
3085 
3086     if (pcbddc->use_local_adj) {
3087 
3088       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3089       if (flg_row) {
3090         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3091         pcbddc->computed_rowadj = PETSC_TRUE;
3092       }
3093       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3094     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3095       IS                     is_dummy;
3096       ISLocalToGlobalMapping l2gmap_dummy;
3097       PetscInt               j,sum;
3098       PetscInt               *cxadj,*cadjncy;
3099       const PetscInt         *idxs;
3100       PCBDDCGraph            graph;
3101       PetscBT                is_on_boundary;
3102 
3103       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3104       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3105       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3106       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3107       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3108       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3109       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3110       if (flg_row) {
3111         graph->xadj = xadj;
3112         graph->adjncy = adjncy;
3113       }
3114       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3115       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3116       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3117 
3118       if (pcbddc->dbg_flag) {
3119         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3120         for (i=0;i<graph->ncc;i++) {
3121           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3122         }
3123       }
3124 
3125       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3126       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3127       for (i=0;i<pcis->n_B;i++) {
3128         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3129       }
3130       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3131 
3132       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3133       sum = 0;
3134       for (i=0;i<graph->ncc;i++) {
3135         PetscInt sizecc = 0;
3136         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3137           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3138             sizecc++;
3139           }
3140         }
3141         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3142           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3143             cxadj[graph->queue[j]] = sizecc;
3144           }
3145         }
3146         sum += sizecc*sizecc;
3147       }
3148       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3149       sum = 0;
3150       for (i=0;i<pcis->n;i++) {
3151         PetscInt temp = cxadj[i];
3152         cxadj[i] = sum;
3153         sum += temp;
3154       }
3155       cxadj[pcis->n] = sum;
3156       for (i=0;i<graph->ncc;i++) {
3157         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3158           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3159             PetscInt k,sizecc = 0;
3160             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3161               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3162                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3163                 sizecc++;
3164               }
3165             }
3166           }
3167         }
3168       }
3169       if (sum) {
3170         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3171       } else {
3172         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3173         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3174       }
3175       graph->xadj = 0;
3176       graph->adjncy = 0;
3177       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3178       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3179     }
3180   }
3181   if (pcbddc->dbg_flag) {
3182     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3183   }
3184 
3185   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3186   vertex_size = 1;
3187   if (pcbddc->user_provided_isfordofs) {
3188     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3189       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3190       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3191         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3192         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3193       }
3194       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3195       pcbddc->n_ISForDofs = 0;
3196       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3197     }
3198     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3199     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3200   } else {
3201     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3202       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3203       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3204       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3205         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3206       }
3207     }
3208   }
3209 
3210   /* Setup of Graph */
3211   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3212     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3213   }
3214   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3215     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3216   }
3217   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3218 
3219   /* Graph's connected components analysis */
3220   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3221 
3222   /* print some info to stdout */
3223   if (pcbddc->dbg_flag) {
3224     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3225   }
3226 
3227   /* mark topography has done */
3228   pcbddc->recompute_topography = PETSC_FALSE;
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3233 #undef __FUNCT__
3234 #define __FUNCT__ "PCBDDCSubsetNumbering"
3235 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3236 {
3237   PetscSF        sf;
3238   PetscLayout    map;
3239   const PetscInt *idxs;
3240   PetscInt       *leaf_data,*root_data,*gidxs;
3241   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3242   PetscInt       n_n,nlocals,start,first_index;
3243   PetscMPIInt    commsize;
3244   PetscBool      first_found;
3245   PetscErrorCode ierr;
3246 
3247   PetscFunctionBegin;
3248   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3249   if (subset_mult) {
3250     PetscCheckSameComm(subset,1,subset_mult,2);
3251     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3252     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3253   }
3254   /* create workspace layout for computing global indices of subset */
3255   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3256   lbounds[0] = lbounds[1] = 0;
3257   for (i=0;i<n;i++) {
3258     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3259     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3260   }
3261   lbounds[0] = -lbounds[0];
3262   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3263   gbounds[0] = -gbounds[0];
3264   N = gbounds[1] - gbounds[0] + 1;
3265   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3266   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3267   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3268   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3269   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3270 
3271   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3272   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3273   if (subset_mult) {
3274     const PetscInt* idxs_mult;
3275 
3276     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3277     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3278     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3279   } else {
3280     for (i=0;i<n;i++) leaf_data[i] = 1;
3281   }
3282   /* local size of new subset */
3283   n_n = 0;
3284   for (i=0;i<n;i++) n_n += leaf_data[i];
3285 
3286   /* global indexes in layout */
3287   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3288   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3289   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3290   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3291   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3292   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3293 
3294   /* reduce from leaves to roots */
3295   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3296   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3297   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3298 
3299   /* count indexes in local part of layout */
3300   nlocals = 0;
3301   first_index = -1;
3302   first_found = PETSC_FALSE;
3303   for (i=0;i<Nl;i++) {
3304     if (!first_found && root_data[i]) {
3305       first_found = PETSC_TRUE;
3306       first_index = i;
3307     }
3308     nlocals += root_data[i];
3309   }
3310 
3311   /* cumulative of number of indexes and size of subset without holes */
3312 #if defined(PETSC_HAVE_MPI_EXSCAN)
3313   start = 0;
3314   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3315 #else
3316   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3317   start = start-nlocals;
3318 #endif
3319 
3320   if (N_n) { /* compute total size of new subset if requested */
3321     *N_n = start + nlocals;
3322     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3323     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3324   }
3325 
3326   /* adapt root data with cumulative */
3327   if (first_found) {
3328     PetscInt old_index;
3329 
3330     root_data[first_index] += start;
3331     old_index = first_index;
3332     for (i=first_index+1;i<Nl;i++) {
3333       if (root_data[i]) {
3334         root_data[i] += root_data[old_index];
3335         old_index = i;
3336       }
3337     }
3338   }
3339 
3340   /* from roots to leaves */
3341   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3342   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3343   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3344 
3345   /* create new IS with global indexes without holes */
3346   if (subset_mult) {
3347     const PetscInt* idxs_mult;
3348     PetscInt        cum;
3349 
3350     cum = 0;
3351     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3352     for (i=0;i<n;i++) {
3353       PetscInt j;
3354       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3355     }
3356     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3357   } else {
3358     for (i=0;i<n;i++) {
3359       gidxs[i] = leaf_data[i]-1;
3360     }
3361   }
3362   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3363   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 #undef __FUNCT__
3368 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3369 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3370 {
3371   PetscInt       i,j;
3372   PetscScalar    *alphas;
3373   PetscErrorCode ierr;
3374 
3375   PetscFunctionBegin;
3376   /* this implements stabilized Gram-Schmidt */
3377   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3378   for (i=0;i<n;i++) {
3379     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3380     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3381     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3382   }
3383   ierr = PetscFree(alphas);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 #undef __FUNCT__
3388 #define __FUNCT__ "MatISGetSubassemblingPattern"
3389 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3390 {
3391   IS             ranks_send_to;
3392   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3393   PetscMPIInt    size,rank,color;
3394   PetscInt       *xadj,*adjncy;
3395   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3396   PetscInt       i,local_size,threshold=0;
3397   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3398   PetscSubcomm   subcomm;
3399   PetscErrorCode ierr;
3400 
3401   PetscFunctionBegin;
3402   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3403   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3404   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3405 
3406   /* Get info on mapping */
3407   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3408   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3409 
3410   /* build local CSR graph of subdomains' connectivity */
3411   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3412   xadj[0] = 0;
3413   xadj[1] = PetscMax(n_neighs-1,0);
3414   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3415   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3416 
3417   if (threshold) {
3418     PetscInt xadj_count = 0;
3419     for (i=1;i<n_neighs;i++) {
3420       if (n_shared[i] > threshold) {
3421         adjncy[xadj_count] = neighs[i];
3422         adjncy_wgt[xadj_count] = n_shared[i];
3423         xadj_count++;
3424       }
3425     }
3426     xadj[1] = xadj_count;
3427   } else {
3428     if (xadj[1]) {
3429       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3430       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3431     }
3432   }
3433   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3434   if (use_square) {
3435     for (i=0;i<xadj[1];i++) {
3436       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3437     }
3438   }
3439   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3440 
3441   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3442 
3443   /*
3444     Restrict work on active processes only.
3445   */
3446   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3447   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3448   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3449   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3450   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3451   if (color) {
3452     ierr = PetscFree(xadj);CHKERRQ(ierr);
3453     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3454     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3455   } else {
3456     Mat             subdomain_adj;
3457     IS              new_ranks,new_ranks_contig;
3458     MatPartitioning partitioner;
3459     PetscInt        prank,rstart=0,rend=0;
3460     PetscInt        *is_indices,*oldranks;
3461     PetscBool       aggregate;
3462 
3463     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3464     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3465     prank = rank;
3466     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3467     /*
3468     for (i=0;i<size;i++) {
3469       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3470     }
3471     */
3472     for (i=0;i<xadj[1];i++) {
3473       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3474     }
3475     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3476     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3477     if (aggregate) {
3478       PetscInt    lrows,row,ncols,*cols;
3479       PetscMPIInt nrank;
3480       PetscScalar *vals;
3481 
3482       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3483       lrows = 0;
3484       if (nrank<redprocs) {
3485         lrows = size/redprocs;
3486         if (nrank<size%redprocs) lrows++;
3487       }
3488       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3489       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3490       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3491       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3492       row = nrank;
3493       ncols = xadj[1]-xadj[0];
3494       cols = adjncy;
3495       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3496       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3497       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3498       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3499       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3500       ierr = PetscFree(xadj);CHKERRQ(ierr);
3501       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3502       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3503       ierr = PetscFree(vals);CHKERRQ(ierr);
3504     } else {
3505       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3506     }
3507     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3508 
3509     /* Partition */
3510     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3511     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3512     if (use_vwgt) {
3513       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3514       v_wgt[0] = local_size;
3515       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3516     }
3517     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3518     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3519     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3520     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3521     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3522 
3523     /* renumber new_ranks to avoid "holes" in new set of processors */
3524     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3525     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3526     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3527     if (!redprocs) {
3528       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3529     } else {
3530       PetscInt    idxs[1];
3531       PetscMPIInt tag;
3532       MPI_Request *reqs;
3533 
3534       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3535       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3536       for (i=rstart;i<rend;i++) {
3537         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3538       }
3539       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3540       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3541       ierr = PetscFree(reqs);CHKERRQ(ierr);
3542       ranks_send_to_idx[0] = oldranks[idxs[0]];
3543     }
3544     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3545     /* clean up */
3546     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3547     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3548     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3549     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3550   }
3551   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3552 
3553   /* assemble parallel IS for sends */
3554   i = 1;
3555   if (color) i=0;
3556   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3557   /* get back IS */
3558   *is_sends = ranks_send_to;
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3563 
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatISSubassemble"
3566 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3567 {
3568   Mat                    local_mat;
3569   IS                     is_sends_internal;
3570   PetscInt               rows,cols,new_local_rows;
3571   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3572   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3573   ISLocalToGlobalMapping l2gmap;
3574   PetscInt*              l2gmap_indices;
3575   const PetscInt*        is_indices;
3576   MatType                new_local_type;
3577   /* buffers */
3578   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3579   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3580   PetscInt               *recv_buffer_idxs_local;
3581   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3582   /* MPI */
3583   MPI_Comm               comm,comm_n;
3584   PetscSubcomm           subcomm;
3585   PetscMPIInt            n_sends,n_recvs,commsize;
3586   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3587   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3588   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3589   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3590   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3591   PetscErrorCode         ierr;
3592 
3593   PetscFunctionBegin;
3594   /* TODO: add missing checks */
3595   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3596   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3597   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3598   PetscValidLogicalCollectiveInt(mat,nis,7);
3599   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3600   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3601   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3602   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3603   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3604   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3605   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3606   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3607     PetscInt mrows,mcols,mnrows,mncols;
3608     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3609     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3610     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3611     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3612     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3613     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3614   }
3615   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3616   PetscValidLogicalCollectiveInt(mat,bs,0);
3617   /* prepare IS for sending if not provided */
3618   if (!is_sends) {
3619     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3620     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3621   } else {
3622     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3623     is_sends_internal = is_sends;
3624   }
3625 
3626   /* get comm */
3627   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3628 
3629   /* compute number of sends */
3630   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3631   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3632 
3633   /* compute number of receives */
3634   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3635   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3636   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3637   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3638   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3639   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3640   ierr = PetscFree(iflags);CHKERRQ(ierr);
3641 
3642   /* restrict comm if requested */
3643   subcomm = 0;
3644   destroy_mat = PETSC_FALSE;
3645   if (restrict_comm) {
3646     PetscMPIInt color,subcommsize;
3647 
3648     color = 0;
3649     if (restrict_full) {
3650       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3651     } else {
3652       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3653     }
3654     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3655     subcommsize = commsize - subcommsize;
3656     /* check if reuse has been requested */
3657     if (reuse == MAT_REUSE_MATRIX) {
3658       if (*mat_n) {
3659         PetscMPIInt subcommsize2;
3660         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3661         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3662         comm_n = PetscObjectComm((PetscObject)*mat_n);
3663       } else {
3664         comm_n = PETSC_COMM_SELF;
3665       }
3666     } else { /* MAT_INITIAL_MATRIX */
3667       PetscMPIInt rank;
3668 
3669       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3671       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3672       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3673       comm_n = PetscSubcommChild(subcomm);
3674     }
3675     /* flag to destroy *mat_n if not significative */
3676     if (color) destroy_mat = PETSC_TRUE;
3677   } else {
3678     comm_n = comm;
3679   }
3680 
3681   /* prepare send/receive buffers */
3682   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3683   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3684   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3685   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3686   if (nis) {
3687     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3688   }
3689 
3690   /* Get data from local matrices */
3691   if (!isdense) {
3692     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3693     /* TODO: See below some guidelines on how to prepare the local buffers */
3694     /*
3695        send_buffer_vals should contain the raw values of the local matrix
3696        send_buffer_idxs should contain:
3697        - MatType_PRIVATE type
3698        - PetscInt        size_of_l2gmap
3699        - PetscInt        global_row_indices[size_of_l2gmap]
3700        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3701     */
3702   } else {
3703     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3704     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3705     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3706     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3707     send_buffer_idxs[1] = i;
3708     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3709     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3710     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3711     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3712     for (i=0;i<n_sends;i++) {
3713       ilengths_vals[is_indices[i]] = len*len;
3714       ilengths_idxs[is_indices[i]] = len+2;
3715     }
3716   }
3717   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3718   /* additional is (if any) */
3719   if (nis) {
3720     PetscMPIInt psum;
3721     PetscInt j;
3722     for (j=0,psum=0;j<nis;j++) {
3723       PetscInt plen;
3724       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3725       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3726       psum += len+1; /* indices + lenght */
3727     }
3728     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3729     for (j=0,psum=0;j<nis;j++) {
3730       PetscInt plen;
3731       const PetscInt *is_array_idxs;
3732       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3733       send_buffer_idxs_is[psum] = plen;
3734       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3735       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3736       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3737       psum += plen+1; /* indices + lenght */
3738     }
3739     for (i=0;i<n_sends;i++) {
3740       ilengths_idxs_is[is_indices[i]] = psum;
3741     }
3742     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3743   }
3744 
3745   buf_size_idxs = 0;
3746   buf_size_vals = 0;
3747   buf_size_idxs_is = 0;
3748   for (i=0;i<n_recvs;i++) {
3749     buf_size_idxs += (PetscInt)olengths_idxs[i];
3750     buf_size_vals += (PetscInt)olengths_vals[i];
3751     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3752   }
3753   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3754   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3755   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3756 
3757   /* get new tags for clean communications */
3758   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3759   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3760   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3761 
3762   /* allocate for requests */
3763   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3764   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3765   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3766   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3767   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3768   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3769 
3770   /* communications */
3771   ptr_idxs = recv_buffer_idxs;
3772   ptr_vals = recv_buffer_vals;
3773   ptr_idxs_is = recv_buffer_idxs_is;
3774   for (i=0;i<n_recvs;i++) {
3775     source_dest = onodes[i];
3776     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3777     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3778     ptr_idxs += olengths_idxs[i];
3779     ptr_vals += olengths_vals[i];
3780     if (nis) {
3781       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3782       ptr_idxs_is += olengths_idxs_is[i];
3783     }
3784   }
3785   for (i=0;i<n_sends;i++) {
3786     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3787     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3788     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3789     if (nis) {
3790       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3791     }
3792   }
3793   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3794   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3795 
3796   /* assemble new l2g map */
3797   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3798   ptr_idxs = recv_buffer_idxs;
3799   new_local_rows = 0;
3800   for (i=0;i<n_recvs;i++) {
3801     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3802     ptr_idxs += olengths_idxs[i];
3803   }
3804   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3805   ptr_idxs = recv_buffer_idxs;
3806   new_local_rows = 0;
3807   for (i=0;i<n_recvs;i++) {
3808     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3809     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3810     ptr_idxs += olengths_idxs[i];
3811   }
3812   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3813   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3814   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3815 
3816   /* infer new local matrix type from received local matrices type */
3817   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3818   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3819   if (n_recvs) {
3820     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3821     ptr_idxs = recv_buffer_idxs;
3822     for (i=0;i<n_recvs;i++) {
3823       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3824         new_local_type_private = MATAIJ_PRIVATE;
3825         break;
3826       }
3827       ptr_idxs += olengths_idxs[i];
3828     }
3829     switch (new_local_type_private) {
3830       case MATDENSE_PRIVATE:
3831         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3832           new_local_type = MATSEQAIJ;
3833           bs = 1;
3834         } else { /* if I receive only 1 dense matrix */
3835           new_local_type = MATSEQDENSE;
3836           bs = 1;
3837         }
3838         break;
3839       case MATAIJ_PRIVATE:
3840         new_local_type = MATSEQAIJ;
3841         bs = 1;
3842         break;
3843       case MATBAIJ_PRIVATE:
3844         new_local_type = MATSEQBAIJ;
3845         break;
3846       case MATSBAIJ_PRIVATE:
3847         new_local_type = MATSEQSBAIJ;
3848         break;
3849       default:
3850         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3851         break;
3852     }
3853   } else { /* by default, new_local_type is seqdense */
3854     new_local_type = MATSEQDENSE;
3855     bs = 1;
3856   }
3857 
3858   /* create MATIS object if needed */
3859   if (reuse == MAT_INITIAL_MATRIX) {
3860     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3861     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
3862   } else {
3863     /* it also destroys the local matrices */
3864     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3865   }
3866   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3867   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3868 
3869   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3870 
3871   /* Global to local map of received indices */
3872   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3873   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3874   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3875 
3876   /* restore attributes -> type of incoming data and its size */
3877   buf_size_idxs = 0;
3878   for (i=0;i<n_recvs;i++) {
3879     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3880     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3881     buf_size_idxs += (PetscInt)olengths_idxs[i];
3882   }
3883   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3884 
3885   /* set preallocation */
3886   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3887   if (!newisdense) {
3888     PetscInt *new_local_nnz=0;
3889 
3890     ptr_vals = recv_buffer_vals;
3891     ptr_idxs = recv_buffer_idxs_local;
3892     if (n_recvs) {
3893       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3894     }
3895     for (i=0;i<n_recvs;i++) {
3896       PetscInt j;
3897       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3898         for (j=0;j<*(ptr_idxs+1);j++) {
3899           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3900         }
3901       } else {
3902         /* TODO */
3903       }
3904       ptr_idxs += olengths_idxs[i];
3905     }
3906     if (new_local_nnz) {
3907       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3908       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3909       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3910       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3911       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3912       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3913     } else {
3914       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3915     }
3916     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3917   } else {
3918     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3919   }
3920 
3921   /* set values */
3922   ptr_vals = recv_buffer_vals;
3923   ptr_idxs = recv_buffer_idxs_local;
3924   for (i=0;i<n_recvs;i++) {
3925     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3926       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3927       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3928       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3929       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3930       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3931     } else {
3932       /* TODO */
3933     }
3934     ptr_idxs += olengths_idxs[i];
3935     ptr_vals += olengths_vals[i];
3936   }
3937   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3938   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3939   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3940   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3941   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3942   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3943 
3944 #if 0
3945   if (!restrict_comm) { /* check */
3946     Vec       lvec,rvec;
3947     PetscReal infty_error;
3948 
3949     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3950     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3951     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3952     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3953     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3954     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3955     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3956     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3957     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3958   }
3959 #endif
3960 
3961   /* assemble new additional is (if any) */
3962   if (nis) {
3963     PetscInt **temp_idxs,*count_is,j,psum;
3964 
3965     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3966     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3967     ptr_idxs = recv_buffer_idxs_is;
3968     psum = 0;
3969     for (i=0;i<n_recvs;i++) {
3970       for (j=0;j<nis;j++) {
3971         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3972         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3973         psum += plen;
3974         ptr_idxs += plen+1; /* shift pointer to received data */
3975       }
3976     }
3977     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3978     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3979     for (i=1;i<nis;i++) {
3980       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3981     }
3982     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3983     ptr_idxs = recv_buffer_idxs_is;
3984     for (i=0;i<n_recvs;i++) {
3985       for (j=0;j<nis;j++) {
3986         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3987         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3988         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3989         ptr_idxs += plen+1; /* shift pointer to received data */
3990       }
3991     }
3992     for (i=0;i<nis;i++) {
3993       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3994       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3995       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3996     }
3997     ierr = PetscFree(count_is);CHKERRQ(ierr);
3998     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3999     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4000   }
4001   /* free workspace */
4002   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4003   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4004   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4005   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4006   if (isdense) {
4007     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4008     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4009   } else {
4010     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4011   }
4012   if (nis) {
4013     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4014     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4015   }
4016   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4017   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4018   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4019   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4020   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4021   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4022   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4023   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4024   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4025   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4026   ierr = PetscFree(onodes);CHKERRQ(ierr);
4027   if (nis) {
4028     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4029     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4030     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4031   }
4032   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4033   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4034     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4035     for (i=0;i<nis;i++) {
4036       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4037     }
4038     *mat_n = NULL;
4039   }
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 /* temporary hack into ksp private data structure */
4044 #include <petsc/private/kspimpl.h>
4045 
4046 #undef __FUNCT__
4047 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4048 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4049 {
4050   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4051   PC_IS                  *pcis = (PC_IS*)pc->data;
4052   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4053   MatNullSpace           CoarseNullSpace=NULL;
4054   ISLocalToGlobalMapping coarse_islg;
4055   IS                     coarse_is,*isarray;
4056   PetscInt               i,im_active=-1,active_procs=-1;
4057   PetscInt               nis,nisdofs,nisneu;
4058   PC                     pc_temp;
4059   PCType                 coarse_pc_type;
4060   KSPType                coarse_ksp_type;
4061   PetscBool              multilevel_requested,multilevel_allowed;
4062   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4063   Mat                    t_coarse_mat_is;
4064   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4065   PetscMPIInt            all_procs;
4066   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4067   PetscBool              compute_vecs = PETSC_FALSE;
4068   PetscScalar            *array;
4069   PetscErrorCode         ierr;
4070 
4071   PetscFunctionBegin;
4072   /* Assign global numbering to coarse dofs */
4073   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4074     PetscInt ocoarse_size;
4075     compute_vecs = PETSC_TRUE;
4076     ocoarse_size = pcbddc->coarse_size;
4077     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4078     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4079     /* see if we can avoid some work */
4080     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4081       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4082       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4083         PC        pc;
4084         PetscBool isbddc;
4085 
4086         /* temporary workaround since PCBDDC does not have a reset method so far */
4087         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4088         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4089         if (isbddc) {
4090           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4091         }
4092         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4093         coarse_reuse = PETSC_FALSE;
4094       } else { /* we can safely reuse already computed coarse matrix */
4095         coarse_reuse = PETSC_TRUE;
4096       }
4097     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4098       coarse_reuse = PETSC_FALSE;
4099     }
4100     /* reset any subassembling information */
4101     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4102     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4103   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4104     coarse_reuse = PETSC_TRUE;
4105   }
4106 
4107   /* count "active" (i.e. with positive local size) and "void" processes */
4108   im_active = !!(pcis->n);
4109   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4110   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4111   void_procs = all_procs-active_procs;
4112   csin_type_simple = PETSC_TRUE;
4113   redist = PETSC_FALSE;
4114   if (pcbddc->current_level && void_procs) {
4115     csin_ml = PETSC_TRUE;
4116     ncoarse_ml = void_procs;
4117     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4118     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4119       csin_ds = PETSC_TRUE;
4120       ncoarse_ds = pcbddc->redistribute_coarse;
4121       redist = PETSC_TRUE;
4122     } else {
4123       csin_ds = PETSC_TRUE;
4124       ncoarse_ds = active_procs;
4125       redist = PETSC_TRUE;
4126     }
4127   } else {
4128     csin_ml = PETSC_FALSE;
4129     ncoarse_ml = all_procs;
4130     if (void_procs) {
4131       csin_ds = PETSC_TRUE;
4132       ncoarse_ds = void_procs;
4133       csin_type_simple = PETSC_FALSE;
4134     } else {
4135       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4136         csin_ds = PETSC_TRUE;
4137         ncoarse_ds = pcbddc->redistribute_coarse;
4138         redist = PETSC_TRUE;
4139       } else {
4140         csin_ds = PETSC_FALSE;
4141         ncoarse_ds = all_procs;
4142       }
4143     }
4144   }
4145 
4146   /*
4147     test if we can go multilevel: three conditions must be satisfied:
4148     - we have not exceeded the number of levels requested
4149     - we can actually subassemble the active processes
4150     - we can find a suitable number of MPI processes where we can place the subassembled problem
4151   */
4152   multilevel_allowed = PETSC_FALSE;
4153   multilevel_requested = PETSC_FALSE;
4154   if (pcbddc->current_level < pcbddc->max_levels) {
4155     multilevel_requested = PETSC_TRUE;
4156     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4157       multilevel_allowed = PETSC_FALSE;
4158     } else {
4159       multilevel_allowed = PETSC_TRUE;
4160     }
4161   }
4162   /* determine number of process partecipating to coarse solver */
4163   if (multilevel_allowed) {
4164     ncoarse = ncoarse_ml;
4165     csin = csin_ml;
4166     redist = PETSC_FALSE;
4167   } else {
4168     ncoarse = ncoarse_ds;
4169     csin = csin_ds;
4170   }
4171 
4172   /* creates temporary l2gmap and IS for coarse indexes */
4173   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4174   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4175 
4176   /* creates temporary MATIS object for coarse matrix */
4177   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4178   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4179   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4180   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4181 #if 0
4182   {
4183     PetscViewer viewer;
4184     char filename[256];
4185     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4186     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4187     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4188     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4189     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4190   }
4191 #endif
4192   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4193   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4194   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4195   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4196   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4197 
4198   /* compute dofs splitting and neumann boundaries for coarse dofs */
4199   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4200     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4201     const PetscInt         *idxs;
4202     ISLocalToGlobalMapping tmap;
4203 
4204     /* create map between primal indices (in local representative ordering) and local primal numbering */
4205     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4206     /* allocate space for temporary storage */
4207     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4208     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4209     /* allocate for IS array */
4210     nisdofs = pcbddc->n_ISForDofsLocal;
4211     nisneu = !!pcbddc->NeumannBoundariesLocal;
4212     nis = nisdofs + nisneu;
4213     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4214     /* dofs splitting */
4215     for (i=0;i<nisdofs;i++) {
4216       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4217       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4218       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4219       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4220       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4221       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4222       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4223       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4224     }
4225     /* neumann boundaries */
4226     if (pcbddc->NeumannBoundariesLocal) {
4227       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4228       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4229       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4230       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4231       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4232       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4233       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4234       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4235     }
4236     /* free memory */
4237     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4238     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4239     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4240   } else {
4241     nis = 0;
4242     nisdofs = 0;
4243     nisneu = 0;
4244     isarray = NULL;
4245   }
4246   /* destroy no longer needed map */
4247   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4248 
4249   /* restrict on coarse candidates (if needed) */
4250   coarse_mat_is = NULL;
4251   if (csin) {
4252     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4253       if (redist) {
4254         PetscMPIInt rank;
4255         PetscInt    spc,n_spc_p1,dest[1],destsize;
4256 
4257         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4258         spc = active_procs/ncoarse;
4259         n_spc_p1 = active_procs%ncoarse;
4260         if (im_active) {
4261           destsize = 1;
4262           if (rank > n_spc_p1*(spc+1)-1) {
4263             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4264           } else {
4265             dest[0] = rank/(spc+1);
4266           }
4267         } else {
4268           destsize = 0;
4269         }
4270         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4271       } else if (csin_type_simple) {
4272         PetscMPIInt rank;
4273         PetscInt    issize,isidx;
4274 
4275         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4276         if (im_active) {
4277           issize = 1;
4278           isidx = (PetscInt)rank;
4279         } else {
4280           issize = 0;
4281           isidx = -1;
4282         }
4283         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4284       } else { /* get a suitable subassembling pattern from MATIS code */
4285         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4286       }
4287 
4288       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4289       if (!redist || ncoarse <= void_procs) {
4290         PetscInt ncoarse_cand,tissize,*nisindices;
4291         PetscInt *coarse_candidates;
4292         const PetscInt* tisindices;
4293 
4294         /* get coarse candidates' ranks in pc communicator */
4295         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4296         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4297         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4298           if (!coarse_candidates[i]) {
4299             coarse_candidates[ncoarse_cand++]=i;
4300           }
4301         }
4302         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4303 
4304 
4305         if (pcbddc->dbg_flag) {
4306           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4307           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4308           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4309           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4310           for (i=0;i<ncoarse_cand;i++) {
4311             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4312           }
4313           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4314           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4315         }
4316         /* shift the pattern on coarse candidates */
4317         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4318         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4319         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4320         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4321         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4322         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4323         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4324       }
4325       if (pcbddc->dbg_flag) {
4326         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4327         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4328         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4329         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4330       }
4331     }
4332     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4333     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4334       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4335     } else { /* this is the last level, so use just receiving processes in subcomm */
4336       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4337     }
4338   } else {
4339     if (pcbddc->dbg_flag) {
4340       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4341       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4342       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4343     }
4344     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4345     coarse_mat_is = t_coarse_mat_is;
4346   }
4347 
4348   /* create local to global scatters for coarse problem */
4349   if (compute_vecs) {
4350     PetscInt lrows;
4351     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4352     if (coarse_mat_is) {
4353       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4354     } else {
4355       lrows = 0;
4356     }
4357     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4358     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4359     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4360     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4361     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4362   }
4363   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4364   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4365 
4366   /* set defaults for coarse KSP and PC */
4367   if (multilevel_allowed) {
4368     coarse_ksp_type = KSPRICHARDSON;
4369     coarse_pc_type = PCBDDC;
4370   } else {
4371     coarse_ksp_type = KSPPREONLY;
4372     coarse_pc_type = PCREDUNDANT;
4373   }
4374 
4375   /* print some info if requested */
4376   if (pcbddc->dbg_flag) {
4377     if (!multilevel_allowed) {
4378       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4379       if (multilevel_requested) {
4380         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4381       } else if (pcbddc->max_levels) {
4382         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4383       }
4384       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4385     }
4386   }
4387 
4388   /* create the coarse KSP object only once with defaults */
4389   if (coarse_mat_is) {
4390     MatReuse coarse_mat_reuse;
4391     PetscViewer dbg_viewer = NULL;
4392     if (pcbddc->dbg_flag) {
4393       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4394       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4395     }
4396     if (!pcbddc->coarse_ksp) {
4397       char prefix[256],str_level[16];
4398       size_t len;
4399       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4400       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4401       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4402       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4403       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4404       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4405       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4406       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4407       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4408       /* prefix */
4409       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4410       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4411       if (!pcbddc->current_level) {
4412         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4413         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4414       } else {
4415         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4416         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4417         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4418         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4419         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4420         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4421       }
4422       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4423       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4424       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4425       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4426       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4427       /* allow user customization */
4428       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4429     }
4430     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4431     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4432     if (nisdofs) {
4433       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4434       for (i=0;i<nisdofs;i++) {
4435         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4436       }
4437     }
4438     if (nisneu) {
4439       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4440       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4441     }
4442 
4443     /* get some info after set from options */
4444     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4445     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4446     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4447     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4448       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4449       isbddc = PETSC_FALSE;
4450     }
4451     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4452     if (isredundant) {
4453       KSP inner_ksp;
4454       PC  inner_pc;
4455       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4456       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4457       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4458     }
4459 
4460     /* assemble coarse matrix */
4461     if (coarse_reuse) {
4462       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4463       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4464       coarse_mat_reuse = MAT_REUSE_MATRIX;
4465     } else {
4466       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4467     }
4468     if (isbddc || isnn) {
4469       if (pcbddc->coarsening_ratio > 1) {
4470         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4471           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4472           if (pcbddc->dbg_flag) {
4473             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4474             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4475             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4476             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4477           }
4478         }
4479         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4480       } else {
4481         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4482         coarse_mat = coarse_mat_is;
4483       }
4484     } else {
4485       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4486     }
4487     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4488 
4489     /* propagate symmetry info of coarse matrix */
4490     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4491     if (pc->pmat->symmetric_set) {
4492       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4493     }
4494     if (pc->pmat->hermitian_set) {
4495       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4496     }
4497     if (pc->pmat->spd_set) {
4498       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4499     }
4500     /* set operators */
4501     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4502     if (pcbddc->dbg_flag) {
4503       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4504     }
4505   } else { /* processes non partecipating to coarse solver (if any) */
4506     coarse_mat = 0;
4507   }
4508   ierr = PetscFree(isarray);CHKERRQ(ierr);
4509 #if 0
4510   {
4511     PetscViewer viewer;
4512     char filename[256];
4513     sprintf(filename,"coarse_mat.m");
4514     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4515     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4516     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4517     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4518   }
4519 #endif
4520 
4521   /* Compute coarse null space (special handling by BDDC only) */
4522 #if 0
4523   if (pcbddc->NullSpace) {
4524     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4525   }
4526 #endif
4527 
4528   if (pcbddc->coarse_ksp) {
4529     Vec crhs,csol;
4530     PetscBool ispreonly;
4531 
4532     if (CoarseNullSpace) {
4533       if (isbddc) {
4534         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4535       } else {
4536         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4537       }
4538     }
4539     /* setup coarse ksp */
4540     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4541     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4542     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4543     /* hack */
4544     if (!csol) {
4545       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4546     }
4547     if (!crhs) {
4548       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4549     }
4550     /* Check coarse problem if in debug mode or if solving with an iterative method */
4551     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4552     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4553       KSP       check_ksp;
4554       KSPType   check_ksp_type;
4555       PC        check_pc;
4556       Vec       check_vec,coarse_vec;
4557       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4558       PetscInt  its;
4559       PetscBool compute_eigs;
4560       PetscReal *eigs_r,*eigs_c;
4561       PetscInt  neigs;
4562       const char *prefix;
4563 
4564       /* Create ksp object suitable for estimation of extreme eigenvalues */
4565       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4566       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4567       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4568       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4569       if (ispreonly) {
4570         check_ksp_type = KSPPREONLY;
4571         compute_eigs = PETSC_FALSE;
4572       } else {
4573         check_ksp_type = KSPGMRES;
4574         compute_eigs = PETSC_TRUE;
4575       }
4576       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4577       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4578       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4579       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4580       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4581       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4582       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4583       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4584       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4585       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4586       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4587       /* create random vec */
4588       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4589       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4590       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4591       if (CoarseNullSpace) {
4592         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4593       }
4594       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4595       /* solve coarse problem */
4596       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4597       if (CoarseNullSpace) {
4598         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4599       }
4600       /* set eigenvalue estimation if preonly has not been requested */
4601       if (compute_eigs) {
4602         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4603         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4604         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4605         lambda_max = eigs_r[neigs-1];
4606         lambda_min = eigs_r[0];
4607         if (pcbddc->use_coarse_estimates) {
4608           if (lambda_max>lambda_min) {
4609             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4610             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4611           }
4612         }
4613       }
4614 
4615       /* check coarse problem residual error */
4616       if (pcbddc->dbg_flag) {
4617         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4618         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4619         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4620         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4621         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4622         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4623         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4624         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4625         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4626         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4627         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4628         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4629         if (compute_eigs) {
4630           PetscReal lambda_max_s,lambda_min_s;
4631           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4632           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4633           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4634           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4635           for (i=0;i<neigs;i++) {
4636             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4637           }
4638         }
4639         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4640         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4641       }
4642       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4643       if (compute_eigs) {
4644         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4645         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4646       }
4647     }
4648   }
4649   /* print additional info */
4650   if (pcbddc->dbg_flag) {
4651     /* waits until all processes reaches this point */
4652     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4653     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4654     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4655   }
4656 
4657   /* free memory */
4658   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4659   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 #undef __FUNCT__
4664 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4665 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4666 {
4667   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4668   PC_IS*         pcis = (PC_IS*)pc->data;
4669   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4670   IS             subset,subset_mult,subset_n;
4671   PetscInt       local_size,coarse_size=0;
4672   PetscInt       *local_primal_indices=NULL;
4673   const PetscInt *t_local_primal_indices;
4674   PetscErrorCode ierr;
4675 
4676   PetscFunctionBegin;
4677   /* Compute global number of coarse dofs */
4678   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4679     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4680   }
4681   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4682   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4683   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4684   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4685   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4686   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4687   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4688   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4689   if (local_size != pcbddc->local_primal_size) {
4690     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4691   }
4692   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4693   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4694   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4695   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4696   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4697 
4698   /* check numbering */
4699   if (pcbddc->dbg_flag) {
4700     PetscScalar coarsesum,*array;
4701     PetscInt    i;
4702     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4703 
4704     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4705     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4706     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4707     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4708     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4709     for (i=0;i<pcbddc->local_primal_size;i++) {
4710       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4711     }
4712     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4713     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4714     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4715     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4716     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4717     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4718     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4719     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4720     for (i=0;i<pcis->n;i++) {
4721       if (array[i] == 1.0) {
4722         set_error = PETSC_TRUE;
4723         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4724       }
4725     }
4726     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4727     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4728     for (i=0;i<pcis->n;i++) {
4729       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4730     }
4731     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4732     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4733     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4734     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4735     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4736     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4737     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4738       PetscInt *gidxs;
4739 
4740       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4741       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4742       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4743       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4744       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4745       for (i=0;i<pcbddc->local_primal_size;i++) {
4746         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4747       }
4748       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4749       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4750     }
4751     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4752     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4753   }
4754   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4755   /* get back data */
4756   *coarse_size_n = coarse_size;
4757   *local_primal_indices_n = local_primal_indices;
4758   PetscFunctionReturn(0);
4759 }
4760 
4761 #undef __FUNCT__
4762 #define __FUNCT__ "PCBDDCGlobalToLocal"
4763 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4764 {
4765   IS             localis_t;
4766   PetscInt       i,lsize,*idxs,n;
4767   PetscScalar    *vals;
4768   PetscErrorCode ierr;
4769 
4770   PetscFunctionBegin;
4771   /* get indices in local ordering exploiting local to global map */
4772   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4773   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4774   for (i=0;i<lsize;i++) vals[i] = 1.0;
4775   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4776   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4777   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4778   if (idxs) { /* multilevel guard */
4779     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4780   }
4781   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4782   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4783   ierr = PetscFree(vals);CHKERRQ(ierr);
4784   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4785   /* now compute set in local ordering */
4786   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4787   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4788   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4789   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4790   for (i=0,lsize=0;i<n;i++) {
4791     if (PetscRealPart(vals[i]) > 0.5) {
4792       lsize++;
4793     }
4794   }
4795   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4796   for (i=0,lsize=0;i<n;i++) {
4797     if (PetscRealPart(vals[i]) > 0.5) {
4798       idxs[lsize++] = i;
4799     }
4800   }
4801   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4802   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4803   *localis = localis_t;
4804   PetscFunctionReturn(0);
4805 }
4806 
4807 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4808 #undef __FUNCT__
4809 #define __FUNCT__ "PCBDDCMatMult_Private"
4810 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4811 {
4812   PCBDDCChange_ctx change_ctx;
4813   PetscErrorCode   ierr;
4814 
4815   PetscFunctionBegin;
4816   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4817   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4818   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4819   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4820   PetscFunctionReturn(0);
4821 }
4822 
4823 #undef __FUNCT__
4824 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4825 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4826 {
4827   PCBDDCChange_ctx change_ctx;
4828   PetscErrorCode   ierr;
4829 
4830   PetscFunctionBegin;
4831   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4832   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4833   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4834   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4835   PetscFunctionReturn(0);
4836 }
4837 
4838 #undef __FUNCT__
4839 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4840 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4841 {
4842   PC_IS               *pcis=(PC_IS*)pc->data;
4843   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4844   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4845   Mat                 S_j;
4846   PetscInt            *used_xadj,*used_adjncy;
4847   PetscBool           free_used_adj;
4848   PetscErrorCode      ierr;
4849 
4850   PetscFunctionBegin;
4851   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4852   free_used_adj = PETSC_FALSE;
4853   if (pcbddc->sub_schurs_layers == -1) {
4854     used_xadj = NULL;
4855     used_adjncy = NULL;
4856   } else {
4857     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4858       used_xadj = pcbddc->mat_graph->xadj;
4859       used_adjncy = pcbddc->mat_graph->adjncy;
4860     } else if (pcbddc->computed_rowadj) {
4861       used_xadj = pcbddc->mat_graph->xadj;
4862       used_adjncy = pcbddc->mat_graph->adjncy;
4863     } else {
4864       PetscBool      flg_row=PETSC_FALSE;
4865       const PetscInt *xadj,*adjncy;
4866       PetscInt       nvtxs;
4867 
4868       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4869       if (flg_row) {
4870         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4871         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4872         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4873         free_used_adj = PETSC_TRUE;
4874       } else {
4875         pcbddc->sub_schurs_layers = -1;
4876         used_xadj = NULL;
4877         used_adjncy = NULL;
4878       }
4879       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4880     }
4881   }
4882 
4883   /* setup sub_schurs data */
4884   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4885   if (!sub_schurs->use_mumps) {
4886     /* pcbddc->ksp_D up to date only if not using MUMPS */
4887     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4888     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
4889   } else {
4890     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4891     PetscBool isseqaij;
4892     if (!pcbddc->use_vertices && reuse_solvers) {
4893       PetscInt n_vertices;
4894 
4895       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
4896       reuse_solvers = (PetscBool)!n_vertices;
4897     }
4898     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
4899     if (!isseqaij) {
4900       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4901       if (matis->A == pcbddc->local_mat) {
4902         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
4903         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4904       } else {
4905         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4906       }
4907     }
4908     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
4909   }
4910   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4911 
4912   /* free adjacency */
4913   if (free_used_adj) {
4914     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4915   }
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 #undef __FUNCT__
4920 #define __FUNCT__ "PCBDDCInitSubSchurs"
4921 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4922 {
4923   PC_IS               *pcis=(PC_IS*)pc->data;
4924   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4925   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4926   PCBDDCGraph         graph;
4927   PetscErrorCode      ierr;
4928 
4929   PetscFunctionBegin;
4930   /* attach interface graph for determining subsets */
4931   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4932     IS       verticesIS,verticescomm;
4933     PetscInt vsize,*idxs;
4934 
4935     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4936     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4937     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4938     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4939     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4940     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4941     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4942     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4943     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4944     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4945     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4946 /*
4947     if (pcbddc->dbg_flag) {
4948       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4949     }
4950 */
4951   } else {
4952     graph = pcbddc->mat_graph;
4953   }
4954 
4955   /* sub_schurs init */
4956   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4957 
4958   /* free graph struct */
4959   if (pcbddc->sub_schurs_rebuild) {
4960     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4961   }
4962   PetscFunctionReturn(0);
4963 }
4964