xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 06a4e24a4ea34074cd1dc021a3bf2cd6abf1fe12)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
10 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   const PetscInt *idxs;
14   PetscInt       nz = 0;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   if (!pcbddc->zerodiag) {
19     PetscFunctionReturn(0);
20   } else {
21     ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr);
22     if (!nz) PetscFunctionReturn(0);
23   }
24   ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
25   /* TODO: add error checking
26     - avoid nested pop (or push) calls.
27     - cannot push before pop.
28   */
29   if (pop) {
30     const PetscInt    *cB0_cols;
31     PetscInt          cB0_ncol;
32     const PetscScalar *cB0_vals;
33 
34     /* extract B_0 */
35     ierr = MatGetRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
36     pcbddc->B0_ncol = cB0_ncol;
37     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
38     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
39     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
40     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
41     ierr = MatRestoreRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
42     /* remove rows and cols from local problem */
43     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
44     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,idxs+nz-1,1.,NULL,NULL);CHKERRQ(ierr);
45   } else { /* push */
46     ierr = MatSetValues(pcbddc->local_mat,1,&idxs[nz-1],pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
47     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
48     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
49   }
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "PCBDDCAdaptiveSelection"
55 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
56 {
57   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
58   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
59   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
60   PetscBLASInt    *B_iwork,*B_ifail;
61   PetscScalar     *work,lwork;
62   PetscScalar     *St,*S,*eigv;
63   PetscScalar     *Sarray,*Starray;
64   PetscReal       *eigs,thresh;
65   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
66   PetscBool       allocated_S_St;
67 #if defined(PETSC_USE_COMPLEX)
68   PetscReal       *rwork;
69 #endif
70   PetscErrorCode  ierr;
71 
72   PetscFunctionBegin;
73   if (!sub_schurs->use_mumps) {
74     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
75   }
76 
77   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
78     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
79   }
80 
81   if (pcbddc->dbg_flag) {
82     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
83     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
84     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
85     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
86   }
87 
88   if (pcbddc->dbg_flag) {
89     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
90   }
91 
92   /* max size of subsets */
93   mss = 0;
94   for (i=0;i<sub_schurs->n_subs;i++) {
95     PetscInt subset_size;
96 
97     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
98     mss = PetscMax(mss,subset_size);
99   }
100 
101   /* min/max and threshold */
102   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
103   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
104   nmax = PetscMax(nmin,nmax);
105   allocated_S_St = PETSC_FALSE;
106   if (nmin) {
107     allocated_S_St = PETSC_TRUE;
108   }
109 
110   /* allocate lapack workspace */
111   cum = cum2 = 0;
112   maxneigs = 0;
113   for (i=0;i<sub_schurs->n_subs;i++) {
114     PetscInt n,subset_size;
115 
116     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
117     n = PetscMin(subset_size,nmax);
118     cum += subset_size;
119     cum2 += subset_size*n;
120     maxneigs = PetscMax(maxneigs,n);
121   }
122   if (mss) {
123     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
124       PetscBLASInt B_itype = 1;
125       PetscBLASInt B_N = mss;
126       PetscReal    zero = 0.0;
127       PetscReal    eps = 0.0; /* dlamch? */
128 
129       B_lwork = -1;
130       S = NULL;
131       St = NULL;
132       eigs = NULL;
133       eigv = NULL;
134       B_iwork = NULL;
135       B_ifail = NULL;
136 #if defined(PETSC_USE_COMPLEX)
137       rwork = NULL;
138 #endif
139       thresh = 1.0;
140       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
141 #if defined(PETSC_USE_COMPLEX)
142       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
143 #else
144       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
145 #endif
146       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
147       ierr = PetscFPTrapPop();CHKERRQ(ierr);
148     } else {
149         /* TODO */
150     }
151   } else {
152     lwork = 0;
153   }
154 
155   nv = 0;
156   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
157     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
158   }
159   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
160   if (allocated_S_St) {
161     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
162   }
163   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
164 #if defined(PETSC_USE_COMPLEX)
165   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
166 #endif
167   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
168                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
169                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
170                       nv+cum,&pcbddc->adaptive_constraints_idxs,
171                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
172   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
173 
174   maxneigs = 0;
175   cum = cum2 = cumarray = 0;
176   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
177   pcbddc->adaptive_constraints_data_ptr[0] = 0;
178   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
179     const PetscInt *idxs;
180 
181     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
182     for (cum=0;cum<nv;cum++) {
183       pcbddc->adaptive_constraints_n[cum] = 1;
184       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
185       pcbddc->adaptive_constraints_data[cum] = 1.0;
186       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
187       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
188     }
189     cum2 = cum;
190     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
191   }
192 
193   if (mss) { /* multilevel */
194     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
195     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
196   }
197 
198   for (i=0;i<sub_schurs->n_subs;i++) {
199 
200     const PetscInt *idxs;
201     PetscReal      infty = PETSC_MAX_REAL;
202     PetscInt       j,subset_size,eigs_start = 0;
203     PetscBLASInt   B_N;
204     PetscBool      same_data = PETSC_FALSE;
205 
206     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
207     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
208     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
209       if (sub_schurs->is_hermitian) {
210         PetscInt j,k;
211         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
212           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
213           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
214         }
215         for (j=0;j<subset_size;j++) {
216           for (k=j;k<subset_size;k++) {
217             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
218             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
219           }
220         }
221       } else {
222         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
223         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
224       }
225     } else {
226       S = Sarray + cumarray;
227       St = Starray + cumarray;
228     }
229 
230     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
231     /* see if we can save some work */
232     if (sub_schurs->n_subs == 1) {
233       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
234     }
235 
236     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
237       B_neigs = 0;
238     } else {
239       /* Threshold: this is an heuristic for edges */
240       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
241 
242       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
243         PetscBLASInt B_itype = 1;
244         PetscBLASInt B_IL, B_IU;
245         PetscReal    eps = -1.0; /* dlamch? */
246         PetscInt     nmin_s;
247 
248         /* ask for eigenvalues larger than thresh */
249         if (pcbddc->dbg_flag) {
250           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
251         }
252         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
253 #if defined(PETSC_USE_COMPLEX)
254         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
255 #else
256         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
257 #endif
258         ierr = PetscFPTrapPop();CHKERRQ(ierr);
259         if (B_ierr) {
260           if (B_ierr < 0 ) {
261             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
262           } else if (B_ierr <= B_N) {
263             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
264           } else {
265             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
266           }
267         }
268 
269         if (B_neigs > nmax) {
270           if (pcbddc->dbg_flag) {
271             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
272           }
273           eigs_start = B_neigs -nmax;
274           B_neigs = nmax;
275         }
276 
277         nmin_s = PetscMin(nmin,B_N);
278         if (B_neigs < nmin_s) {
279           PetscBLASInt B_neigs2;
280 
281           B_IU = B_N - B_neigs;
282           B_IL = B_N - nmin_s + 1;
283           if (pcbddc->dbg_flag) {
284             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
285           }
286           if (sub_schurs->is_hermitian) {
287             PetscInt j;
288             for (j=0;j<subset_size;j++) {
289               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
290             }
291             for (j=0;j<subset_size;j++) {
292               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
293             }
294           } else {
295             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
296             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
297           }
298           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
299 #if defined(PETSC_USE_COMPLEX)
300           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
301 #else
302           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
303 #endif
304           ierr = PetscFPTrapPop();CHKERRQ(ierr);
305           B_neigs += B_neigs2;
306         }
307         if (B_ierr) {
308           if (B_ierr < 0 ) {
309             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
310           } else if (B_ierr <= B_N) {
311             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
312           } else {
313             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
314           }
315         }
316         if (pcbddc->dbg_flag) {
317           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
318           for (j=0;j<B_neigs;j++) {
319             if (eigs[j] == 0.0) {
320               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
321             } else {
322               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
323             }
324           }
325         }
326       } else {
327           /* TODO */
328       }
329     }
330     maxneigs = PetscMax(B_neigs,maxneigs);
331     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
332     if (B_neigs) {
333       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
334 
335       if (pcbddc->dbg_flag > 1) {
336         PetscInt ii;
337         for (ii=0;ii<B_neigs;ii++) {
338           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
339           for (j=0;j<B_N;j++) {
340 #if defined(PETSC_USE_COMPLEX)
341             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
342             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
343             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
344 #else
345             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
346 #endif
347           }
348         }
349       }
350 #if 0
351       for (j=0;j<B_neigs;j++) {
352         PetscBLASInt Blas_N,Blas_one = 1.0;
353         PetscScalar norm;
354         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
355         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
356                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
357         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
358           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
359         } else {
360           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
361         }
362         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
363       }
364 #endif
365       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
366       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
367       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
368       cum++;
369     }
370     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
371     /* shift for next computation */
372     cumarray += subset_size*subset_size;
373   }
374   if (pcbddc->dbg_flag) {
375     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
376   }
377 
378   if (mss) {
379     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
380     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
381     /* destroy matrices (junk) */
382     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
383     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
384   }
385   if (allocated_S_St) {
386     ierr = PetscFree2(S,St);CHKERRQ(ierr);
387   }
388   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
389 #if defined(PETSC_USE_COMPLEX)
390   ierr = PetscFree(rwork);CHKERRQ(ierr);
391 #endif
392   if (pcbddc->dbg_flag) {
393     PetscInt maxneigs_r;
394     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
395     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
396   }
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "PCBDDCSetUpSolvers"
402 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
403 {
404   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
405   PetscScalar    *coarse_submat_vals;
406   PetscErrorCode ierr;
407 
408   PetscFunctionBegin;
409   /* Setup local scatters R_to_B and (optionally) R_to_D */
410   /* PCBDDCSetUpLocalWorkVectors should be called first! */
411   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
412 
413   /* Setup local neumann solver ksp_R */
414   /* PCBDDCSetUpLocalScatters should be called first! */
415   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
416 
417   /* Change global null space passed in by the user if change of basis has been requested */
418   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
419     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
420   }
421 
422   /*
423      Setup local correction and local part of coarse basis.
424      Gives back the dense local part of the coarse matrix in column major ordering
425   */
426   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
427 
428   /* Compute total number of coarse nodes and setup coarse solver */
429   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
430 
431   /* free */
432   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "PCBDDCResetCustomization"
438 PetscErrorCode PCBDDCResetCustomization(PC pc)
439 {
440   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
441   PetscErrorCode ierr;
442 
443   PetscFunctionBegin;
444   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
445   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
446   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
450   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
451   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
452   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
453   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
454   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "PCBDDCResetTopography"
460 PetscErrorCode PCBDDCResetTopography(PC pc)
461 {
462   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
467   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
468   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
469   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
470   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "PCBDDCResetSolvers"
476 PetscErrorCode PCBDDCResetSolvers(PC pc)
477 {
478   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
479   PetscScalar    *array;
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
484   if (pcbddc->coarse_phi_B) {
485     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
486     ierr = PetscFree(array);CHKERRQ(ierr);
487   }
488   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
489   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
490   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
491   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
492   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
493   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
494   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
495   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
496   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
497   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
498   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
499   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
500   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
501   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
502   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
503   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
504   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
505   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
506   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
507   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
508   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
509   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
510   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
511   ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr);
512   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
513   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
519 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
520 {
521   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
522   PC_IS          *pcis = (PC_IS*)pc->data;
523   VecType        impVecType;
524   PetscInt       n_constraints,n_R,old_size;
525   PetscErrorCode ierr;
526 
527   PetscFunctionBegin;
528   if (!pcbddc->ConstraintMatrix) {
529     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
530   }
531   /* get sizes */
532   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
533   n_R = pcis->n-pcbddc->n_vertices;
534   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
535   /* local work vectors (try to avoid unneeded work)*/
536   /* R nodes */
537   old_size = -1;
538   if (pcbddc->vec1_R) {
539     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
540   }
541   if (n_R != old_size) {
542     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
543     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
544     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
545     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
546     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
547     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
548   }
549   /* local primal dofs */
550   old_size = -1;
551   if (pcbddc->vec1_P) {
552     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
553   }
554   if (pcbddc->local_primal_size != old_size) {
555     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
556     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
557     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
558     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
559   }
560   /* local explicit constraints */
561   old_size = -1;
562   if (pcbddc->vec1_C) {
563     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
564   }
565   if (n_constraints && n_constraints != old_size) {
566     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
567     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
568     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
569     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
570   }
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "PCBDDCSetUpCorrection"
576 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
577 {
578   PetscErrorCode  ierr;
579   /* pointers to pcis and pcbddc */
580   PC_IS*          pcis = (PC_IS*)pc->data;
581   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
582   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
583   /* submatrices of local problem */
584   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
585   /* submatrices of local coarse problem */
586   Mat             S_VV,S_CV,S_VC,S_CC;
587   /* working matrices */
588   Mat             C_CR;
589   /* additional working stuff */
590   PC              pc_R;
591   Mat             F;
592   PetscBool       isLU,isCHOL,isILU;
593 
594   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
595   PetscScalar     *work;
596   PetscInt        *idx_V_B;
597   PetscInt        n,n_vertices,n_constraints;
598   PetscInt        i,n_R,n_D,n_B;
599   PetscBool       unsymmetric_check;
600   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
601   MatType         impMatType;
602   /* some shortcuts to scalars */
603   PetscScalar     one=1.0,m_one=-1.0;
604 
605   PetscFunctionBegin;
606   n_vertices = pcbddc->n_vertices;
607   n_constraints = pcbddc->local_primal_size-n_vertices;
608   /* Set Non-overlapping dimensions */
609   n_B = pcis->n_B;
610   n_D = pcis->n - n_B;
611   n_R = pcis->n - n_vertices;
612 
613   /* Set types for local objects needed by BDDC precondtioner */
614   impMatType = MATSEQDENSE;
615 
616   /* vertices in boundary numbering */
617   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
618   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
619   if (i != n_vertices) {
620     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
621   }
622 
623   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
624   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
625   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
626   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
627   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
628   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
629   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
630   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
631   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
632   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
633 
634   unsymmetric_check = PETSC_FALSE;
635   /* allocate workspace */
636   n = 0;
637   if (n_constraints) {
638     n += n_R*n_constraints;
639   }
640   if (n_vertices) {
641     n = PetscMax(2*n_R*n_vertices,n);
642     n = PetscMax((n_R+n_B)*n_vertices,n);
643   }
644   if (!pcbddc->symmetric_primal) {
645     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
646     unsymmetric_check = PETSC_TRUE;
647   }
648   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
649 
650   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
651   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
652   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
653   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
654   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
655   if (isLU || isILU || isCHOL) {
656     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
657   } else if (sub_schurs->reuse_mumps) {
658     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
659     MatFactorType type;
660 
661     F = reuse_mumps->F;
662     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
663     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
664   } else {
665     F = NULL;
666   }
667 
668   /* Precompute stuffs needed for preprocessing and application of BDDC*/
669   if (n_constraints) {
670     Mat         M1,M2,M3;
671     Mat         auxmat;
672     IS          is_aux;
673     PetscScalar *array,*array2;
674 
675     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
676     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
677 
678     /* Extract constraints on R nodes: C_{CR}  */
679     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
680     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
681     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
682 
683     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
684     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
685     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
686     for (i=0;i<n_constraints;i++) {
687       const PetscScalar *row_cmat_values;
688       const PetscInt    *row_cmat_indices;
689       PetscInt          size_of_constraint,j;
690 
691       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
692       for (j=0;j<size_of_constraint;j++) {
693         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
694       }
695       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
696     }
697     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
698     if (F) {
699       Mat B;
700 
701       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
702       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
703       ierr = MatDestroy(&B);CHKERRQ(ierr);
704     } else {
705       PetscScalar *marr;
706 
707       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
708       for (i=0;i<n_constraints;i++) {
709         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
710         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
711         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
712         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
713         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
714       }
715       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
716     }
717     if (!pcbddc->switch_static) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
719       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
720       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
721       for (i=0;i<n_constraints;i++) {
722         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
723         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
724         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
727         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
728       }
729       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
730       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
731       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
732     } else {
733       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
734       pcbddc->local_auxmat2 = local_auxmat2_R;
735       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
736     }
737     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
738     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
739     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
740     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
741     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
742     if (isCHOL) {
743       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
744     } else {
745       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
746     }
747     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
748     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
749     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
750     ierr = MatDestroy(&M2);CHKERRQ(ierr);
751     ierr = MatDestroy(&M3);CHKERRQ(ierr);
752     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
753     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
754     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
755     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
756     ierr = MatDestroy(&M1);CHKERRQ(ierr);
757   }
758   /* Get submatrices from subdomain matrix */
759   if (n_vertices) {
760     IS is_aux;
761 
762     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
763       IS tis;
764 
765       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
766       ierr = ISSort(tis);CHKERRQ(ierr);
767       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
768       ierr = ISDestroy(&tis);CHKERRQ(ierr);
769     } else {
770       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
771     }
772     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
773     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
774     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
775     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
776   }
777 
778   /* Matrix of coarse basis functions (local) */
779   if (pcbddc->coarse_phi_B) {
780     PetscInt on_B,on_primal,on_D=n_D;
781     if (pcbddc->coarse_phi_D) {
782       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
783     }
784     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
785     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
786       PetscScalar *marray;
787 
788       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
789       ierr = PetscFree(marray);CHKERRQ(ierr);
790       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
791       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
792       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
793       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
794     }
795   }
796 
797   if (!pcbddc->coarse_phi_B) {
798     PetscScalar *marray;
799 
800     n = n_B*pcbddc->local_primal_size;
801     if (pcbddc->switch_static || pcbddc->dbg_flag) {
802       n += n_D*pcbddc->local_primal_size;
803     }
804     if (!pcbddc->symmetric_primal) {
805       n *= 2;
806     }
807     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
808     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
809     n = n_B*pcbddc->local_primal_size;
810     if (pcbddc->switch_static || pcbddc->dbg_flag) {
811       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
812       n += n_D*pcbddc->local_primal_size;
813     }
814     if (!pcbddc->symmetric_primal) {
815       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
816       if (pcbddc->switch_static || pcbddc->dbg_flag) {
817         n = n_B*pcbddc->local_primal_size;
818         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
819       }
820     } else {
821       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
822       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
823       if (pcbddc->switch_static || pcbddc->dbg_flag) {
824         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
825         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
826       }
827     }
828   }
829   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
830   /* vertices */
831   if (n_vertices) {
832 
833     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
834 
835     if (n_R) {
836       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
837       PetscBLASInt B_N,B_one = 1;
838       PetscScalar  *x,*y;
839       PetscBool    isseqaij;
840 
841       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
842       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
843       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
844       if (F) { /* TODO could be optimized for symmetric problems */
845         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
846       } else {
847         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
848         for (i=0;i<n_vertices;i++) {
849           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
850           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
851           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
852           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
853           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
854         }
855         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
856       }
857       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
858       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
859       if (n_constraints) {
860         Mat B;
861 
862         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
863         for (i=0;i<n_vertices;i++) {
864           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
865           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
866           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
869           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
870         }
871         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
872         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
873         ierr = MatDestroy(&B);CHKERRQ(ierr);
874         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
875         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
876         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
877         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
878         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
879         ierr = MatDestroy(&B);CHKERRQ(ierr);
880       }
881       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
882       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
883         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
884       }
885       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
886       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
887       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
888       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
889       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
890       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
891       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
892       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
893       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
894       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
895     } else {
896       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
897     }
898     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
899     /* coarse basis functions */
900     for (i=0;i<n_vertices;i++) {
901       PetscScalar *y;
902 
903       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
904       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
905       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
906       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908       y[n_B*i+idx_V_B[i]] = 1.0;
909       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911 
912       if (pcbddc->switch_static || pcbddc->dbg_flag) {
913         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
914         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
915         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
918         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
919       }
920       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
921     }
922     /* if n_R == 0 the object is not destroyed */
923     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
924   }
925 
926   if (n_constraints) {
927     Mat B;
928 
929     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
930     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
931     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
932     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
933     if (n_vertices) {
934       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
935         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
936       } else {
937         Mat S_VCt;
938 
939         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
940         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
941         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
942       }
943     }
944     ierr = MatDestroy(&B);CHKERRQ(ierr);
945     /* coarse basis functions */
946     for (i=0;i<n_constraints;i++) {
947       PetscScalar *y;
948 
949       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
950       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
951       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
952       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
955       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
956       if (pcbddc->switch_static || pcbddc->dbg_flag) {
957         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
958         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
959         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
962         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
963       }
964       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
965     }
966   }
967   if (n_constraints) {
968     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
969   }
970 
971   /* compute other basis functions for non-symmetric problems */
972   if (!pcbddc->symmetric_primal) {
973 
974     if (n_constraints) {
975       Mat S_CCT,B_C;
976 
977       /* this is a lazy thing */
978       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
979       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
980       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
981       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
982       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
983       if (n_vertices) {
984         Mat B_V,S_VCT;
985 
986         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
987         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
988         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
989         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
990         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
991       }
992       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
993     } else { /* if there are no constraints, reset work */
994       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
995     }
996     if (n_vertices && n_R) {
997       Mat          A_VRT;
998       PetscScalar  *marray;
999       PetscBLASInt B_N,B_one = 1;
1000 
1001       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1002       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1003       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1004       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1005       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1006       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1007       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1008     }
1009 
1010     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1011       for (i=0;i<pcbddc->local_primal_size;i++) {
1012         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1013         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1014         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1015         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1016         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1017       }
1018     } else {
1019       for (i=0;i<pcbddc->local_primal_size;i++) {
1020         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1021         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1022         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1023         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1024         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1025       }
1026     }
1027     /* coarse basis functions */
1028     for (i=0;i<pcbddc->local_primal_size;i++) {
1029       PetscScalar *y;
1030 
1031       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1032       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1033       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1034       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036       if (i<n_vertices) {
1037         y[n_B*i+idx_V_B[i]] = 1.0;
1038       }
1039       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1040       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1041 
1042       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1043         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1044         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1045         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1047         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1048         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1049       }
1050       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1051     }
1052   }
1053   /* free memory */
1054   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1055   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1056   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1057   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1058   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* Checking coarse_sub_mat and coarse basis functios */
1067   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1068   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1069   if (pcbddc->dbg_flag) {
1070     Mat         coarse_sub_mat;
1071     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1072     Mat         coarse_phi_D,coarse_phi_B;
1073     Mat         coarse_psi_D,coarse_psi_B;
1074     Mat         A_II,A_BB,A_IB,A_BI;
1075     Mat         C_B,CPHI;
1076     IS          is_dummy;
1077     Vec         mones;
1078     MatType     checkmattype=MATSEQAIJ;
1079     PetscReal   real_value;
1080 
1081     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1082     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1083     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1084     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1085     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1086     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1087     if (unsymmetric_check) {
1088       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1089       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1090     }
1091     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1092 
1093     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1094     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1095     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1096     if (unsymmetric_check) {
1097       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1098       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1099       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1100       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1101       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1102       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1103       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1104       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1105       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1106       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1107       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1108       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1109     } else {
1110       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1111       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1112       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1113       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1114       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1115       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1116       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1117       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1118     }
1119     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1120     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1121     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1122     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1123     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1124     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1125     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1126     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1127 
1128     /* check constraints */
1129     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1130     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1131     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1132     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1133     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1134     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1135     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1136     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1137     if (unsymmetric_check) {
1138       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1139       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1140       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1141       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1142       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1143     }
1144     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1145     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1146     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1147     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1148 
1149     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1150     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1151     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1152     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1153     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1154     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1155     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1156     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1157     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1158     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1159     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1160     if (unsymmetric_check) {
1161       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1162       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1163     }
1164     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1165   }
1166   /* get back data */
1167   *coarse_submat_vals_n = coarse_submat_vals;
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1173 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1174 {
1175   Mat            *work_mat;
1176   IS             isrow_s,iscol_s;
1177   PetscBool      rsorted,csorted;
1178   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1183   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1184   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1185   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1186 
1187   if (!rsorted) {
1188     const PetscInt *idxs;
1189     PetscInt *idxs_sorted,i;
1190 
1191     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1192     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1193     for (i=0;i<rsize;i++) {
1194       idxs_perm_r[i] = i;
1195     }
1196     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1197     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1198     for (i=0;i<rsize;i++) {
1199       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1200     }
1201     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1202     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1203   } else {
1204     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1205     isrow_s = isrow;
1206   }
1207 
1208   if (!csorted) {
1209     if (isrow == iscol) {
1210       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1211       iscol_s = isrow_s;
1212     } else {
1213       const PetscInt *idxs;
1214       PetscInt *idxs_sorted,i;
1215 
1216       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1217       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1218       for (i=0;i<csize;i++) {
1219         idxs_perm_c[i] = i;
1220       }
1221       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1222       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1223       for (i=0;i<csize;i++) {
1224         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1225       }
1226       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1227       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1228     }
1229   } else {
1230     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1231     iscol_s = iscol;
1232   }
1233 
1234   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235 
1236   if (!rsorted || !csorted) {
1237     Mat      new_mat;
1238     IS       is_perm_r,is_perm_c;
1239 
1240     if (!rsorted) {
1241       PetscInt *idxs_r,i;
1242       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1243       for (i=0;i<rsize;i++) {
1244         idxs_r[idxs_perm_r[i]] = i;
1245       }
1246       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1247       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1248     } else {
1249       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1250     }
1251     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1252 
1253     if (!csorted) {
1254       if (isrow_s == iscol_s) {
1255         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1256         is_perm_c = is_perm_r;
1257       } else {
1258         PetscInt *idxs_c,i;
1259         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1260         for (i=0;i<csize;i++) {
1261           idxs_c[idxs_perm_c[i]] = i;
1262         }
1263         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1264         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1265       }
1266     } else {
1267       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1268     }
1269     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1270 
1271     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1272     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1273     work_mat[0] = new_mat;
1274     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1275     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1276   }
1277 
1278   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1279   *B = work_mat[0];
1280   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1281   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1282   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1288 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1289 {
1290   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1291   PC_IS*         pcis = (PC_IS*)pc->data;
1292   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1293   Mat            new_mat;
1294   IS             is_local,is_global;
1295   PetscInt       local_size;
1296   PetscBool      isseqaij;
1297   PetscErrorCode ierr;
1298 
1299   PetscFunctionBegin;
1300   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1301   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1302   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1303   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr);
1304   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1305   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1306   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1307 
1308   /* check */
1309   if (pcbddc->dbg_flag) {
1310     Vec       x,x_change;
1311     PetscReal error;
1312 
1313     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1314     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1315     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1316     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1319     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1320     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1321     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1322     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1323     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1324     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1325     ierr = VecDestroy(&x);CHKERRQ(ierr);
1326     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1327   }
1328 
1329   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1330   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1331   if (isseqaij) {
1332     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1333   } else {
1334     Mat work_mat;
1335     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1336     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1337     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1338   }
1339   if (matis->A->symmetric_set) {
1340     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1341 #if !defined(PETSC_USE_COMPLEX)
1342     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1343 #endif
1344   }
1345   /*
1346   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1347   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1348   */
1349   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1355 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1356 {
1357   PC_IS*          pcis = (PC_IS*)(pc->data);
1358   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1359   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1360   PetscInt        *idx_R_local=NULL;
1361   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1362   PetscInt        vbs,bs;
1363   PetscBT         bitmask=NULL;
1364   PetscErrorCode  ierr;
1365 
1366   PetscFunctionBegin;
1367   /*
1368     No need to setup local scatters if
1369       - primal space is unchanged
1370         AND
1371       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1372         AND
1373       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1374   */
1375   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1376     PetscFunctionReturn(0);
1377   }
1378   /* destroy old objects */
1379   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1380   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1381   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1382   /* Set Non-overlapping dimensions */
1383   n_B = pcis->n_B;
1384   n_D = pcis->n - n_B;
1385   n_vertices = pcbddc->n_vertices;
1386 
1387   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1388 
1389   /* create auxiliary bitmask and allocate workspace */
1390   if (!sub_schurs->reuse_mumps) {
1391     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1392     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1393     for (i=0;i<n_vertices;i++) {
1394       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1395     }
1396 
1397     for (i=0, n_R=0; i<pcis->n; i++) {
1398       if (!PetscBTLookup(bitmask,i)) {
1399         idx_R_local[n_R++] = i;
1400       }
1401     }
1402   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1403     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1404 
1405     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1406     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1407   }
1408 
1409   /* Block code */
1410   vbs = 1;
1411   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1412   if (bs>1 && !(n_vertices%bs)) {
1413     PetscBool is_blocked = PETSC_TRUE;
1414     PetscInt  *vary;
1415     if (!sub_schurs->reuse_mumps) {
1416       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1417       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1418       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1419       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1420       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1421       for (i=0; i<pcis->n/bs; i++) {
1422         if (vary[i]!=0 && vary[i]!=bs) {
1423           is_blocked = PETSC_FALSE;
1424           break;
1425         }
1426       }
1427       ierr = PetscFree(vary);CHKERRQ(ierr);
1428     } else {
1429       /* Verify directly the R set */
1430       for (i=0; i<n_R/bs; i++) {
1431         PetscInt j,node=idx_R_local[bs*i];
1432         for (j=1; j<bs; j++) {
1433           if (node != idx_R_local[bs*i+j]-j) {
1434             is_blocked = PETSC_FALSE;
1435             break;
1436           }
1437         }
1438       }
1439     }
1440     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1441       vbs = bs;
1442       for (i=0;i<n_R/vbs;i++) {
1443         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1444       }
1445     }
1446   }
1447   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1448   if (sub_schurs->reuse_mumps) {
1449     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1450 
1451     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1452     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1453     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1454     reuse_mumps->is_R = pcbddc->is_R_local;
1455   } else {
1456     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1457   }
1458 
1459   /* print some info if requested */
1460   if (pcbddc->dbg_flag) {
1461     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1462     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1463     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1464     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1465     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1466     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1467     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1468   }
1469 
1470   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1471   if (!sub_schurs->reuse_mumps) {
1472     IS       is_aux1,is_aux2;
1473     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1474 
1475     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1476     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1477     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1478     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1479     for (i=0; i<n_D; i++) {
1480       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1481     }
1482     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1483     for (i=0, j=0; i<n_R; i++) {
1484       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1485         aux_array1[j++] = i;
1486       }
1487     }
1488     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1489     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1490     for (i=0, j=0; i<n_B; i++) {
1491       if (!PetscBTLookup(bitmask,is_indices[i])) {
1492         aux_array2[j++] = i;
1493       }
1494     }
1495     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1496     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1497     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1498     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1499     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1500 
1501     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1502       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1503       for (i=0, j=0; i<n_R; i++) {
1504         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1505           aux_array1[j++] = i;
1506         }
1507       }
1508       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1509       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1510       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1511     }
1512     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1513     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1514   } else {
1515     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1516     IS               tis;
1517     PetscInt         schur_size;
1518 
1519     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1520     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1521     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1522     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1523     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1524       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1525       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1526       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1527     }
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1535 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1536 {
1537   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1538   PC_IS          *pcis = (PC_IS*)pc->data;
1539   PC             pc_temp;
1540   Mat            A_RR;
1541   MatReuse       reuse;
1542   PetscScalar    m_one = -1.0;
1543   PetscReal      value;
1544   PetscInt       n_D,n_R;
1545   PetscBool      use_exact,use_exact_reduced,issbaij;
1546   PetscErrorCode ierr;
1547   /* prefixes stuff */
1548   char           dir_prefix[256],neu_prefix[256],str_level[16];
1549   size_t         len;
1550 
1551   PetscFunctionBegin;
1552 
1553   /* compute prefixes */
1554   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1555   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1556   if (!pcbddc->current_level) {
1557     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1558     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1559     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1560     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1561   } else {
1562     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1563     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1564     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1565     len -= 15; /* remove "pc_bddc_coarse_" */
1566     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1567     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1568     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1569     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1570     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1571     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1572     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1573     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1574   }
1575 
1576   /* DIRICHLET PROBLEM */
1577   if (dirichlet) {
1578     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1579     if (pcbddc->local_mat->symmetric_set) {
1580       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1581     }
1582     /* Matrix for Dirichlet problem is pcis->A_II */
1583     n_D = pcis->n - pcis->n_B;
1584     if (!pcbddc->ksp_D) { /* create object if not yet build */
1585       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1586       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1587       /* default */
1588       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1589       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1590       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1591       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1592       if (issbaij) {
1593         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1594       } else {
1595         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1596       }
1597       /* Allow user's customization */
1598       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1599       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1600     }
1601     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1602     if (sub_schurs->reuse_mumps) {
1603       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1604 
1605       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1606     }
1607     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1608     if (!n_D) {
1609       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1610       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1611     }
1612     /* Set Up KSP for Dirichlet problem of BDDC */
1613     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1614     /* set ksp_D into pcis data */
1615     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1616     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1617     pcis->ksp_D = pcbddc->ksp_D;
1618   }
1619 
1620   /* NEUMANN PROBLEM */
1621   A_RR = 0;
1622   if (neumann) {
1623     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1624     PetscInt        ibs,mbs;
1625     PetscBool       issbaij;
1626     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1627     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1628     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1629     if (pcbddc->ksp_R) { /* already created ksp */
1630       PetscInt nn_R;
1631       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1632       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1633       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1634       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1635         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1636         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1637         reuse = MAT_INITIAL_MATRIX;
1638       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1639         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1640           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1641           reuse = MAT_INITIAL_MATRIX;
1642         } else { /* safe to reuse the matrix */
1643           reuse = MAT_REUSE_MATRIX;
1644         }
1645       }
1646       /* last check */
1647       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1648         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1649         reuse = MAT_INITIAL_MATRIX;
1650       }
1651     } else { /* first time, so we need to create the matrix */
1652       reuse = MAT_INITIAL_MATRIX;
1653     }
1654     /* extract A_RR */
1655     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1656     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1657     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1658     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1659       if (matis->A == pcbddc->local_mat) {
1660         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1661         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1662       } else {
1663         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1664       }
1665     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1666       if (matis->A == pcbddc->local_mat) {
1667         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1668         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1669       } else {
1670         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1671       }
1672     }
1673     if (!sub_schurs->reuse_mumps) {
1674       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1675       if (pcbddc->local_mat->symmetric_set) {
1676         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1677       }
1678     } else {
1679       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1680 
1681       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1682       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1683       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1684     }
1685     if (!pcbddc->ksp_R) { /* create object if not present */
1686       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1687       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1688       /* default */
1689       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1690       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1691       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1692       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1693       if (issbaij) {
1694         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1695       } else {
1696         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1697       }
1698       /* Allow user's customization */
1699       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1700       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1701     }
1702     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1703     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1704     if (!n_R) {
1705       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1706       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1707     }
1708     /* Reuse MUMPS solver if it is present */
1709     if (sub_schurs->reuse_mumps) {
1710       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1711 
1712       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1713     }
1714     /* Set Up KSP for Neumann problem of BDDC */
1715     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1716   }
1717   /* free Neumann problem's matrix */
1718   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1719 
1720   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1721   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1722     if (pcbddc->dbg_flag) {
1723       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1724       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1725       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1726     }
1727     if (dirichlet) { /* Dirichlet */
1728       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1729       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1730       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1731       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1732       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1733       /* need to be adapted? */
1734       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1735       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1736       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1737       /* print info */
1738       if (pcbddc->dbg_flag) {
1739         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1740         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1741       }
1742       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1743         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1744       }
1745     }
1746     if (neumann) { /* Neumann */
1747       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1748       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1749       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1750       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1751       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1752       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1753       /* need to be adapted? */
1754       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1755       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1756       /* print info */
1757       if (pcbddc->dbg_flag) {
1758         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1759         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1760       }
1761       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1762         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1763       }
1764     }
1765   }
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1771 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1772 {
1773   PetscErrorCode  ierr;
1774   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1775   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1776 
1777   PetscFunctionBegin;
1778   if (!sub_schurs->reuse_mumps) {
1779     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1780   }
1781   if (!pcbddc->switch_static) {
1782     if (applytranspose && pcbddc->local_auxmat1) {
1783       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1784       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1785     }
1786     if (!sub_schurs->reuse_mumps) {
1787       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1788       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1789     } else {
1790       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1791 
1792       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1793       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1794     }
1795   } else {
1796     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1797     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1798     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1799     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1800     if (applytranspose && pcbddc->local_auxmat1) {
1801       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1802       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1803       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1804       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1805     }
1806   }
1807   if (!sub_schurs->reuse_mumps) {
1808     if (applytranspose) {
1809       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1810     } else {
1811       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1812     }
1813 #if defined(PETSC_HAVE_MUMPS)
1814   } else {
1815     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1816 
1817     if (applytranspose) {
1818       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1819     } else {
1820       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1821     }
1822 #endif
1823   }
1824   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1825   if (!pcbddc->switch_static) {
1826     if (!sub_schurs->reuse_mumps) {
1827       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1828       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1829     } else {
1830       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1831 
1832       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1833       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1834     }
1835     if (!applytranspose && pcbddc->local_auxmat1) {
1836       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1837       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1838     }
1839   } else {
1840     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1841     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1842     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1843     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1844     if (!applytranspose && pcbddc->local_auxmat1) {
1845       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1846       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1847     }
1848     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1849     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1851     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1857 #undef __FUNCT__
1858 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1859 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1860 {
1861   PetscErrorCode ierr;
1862   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1863   PC_IS*            pcis = (PC_IS*)  (pc->data);
1864   const PetscScalar zero = 0.0;
1865 
1866   PetscFunctionBegin;
1867   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1868   if (applytranspose) {
1869     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1870     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1871   } else {
1872     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1873     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1874   }
1875   /* start communications from local primal nodes to rhs of coarse solver */
1876   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1877   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1878   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1879 
1880   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1881   /* TODO remove null space when doing multilevel */
1882   if (pcbddc->coarse_ksp) {
1883     Vec rhs,sol;
1884 
1885     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1886     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1887     if (applytranspose) {
1888       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1889     } else {
1890       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1891     }
1892   }
1893 
1894   /* Local solution on R nodes */
1895   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1896     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1897   }
1898 
1899   /* communications from coarse sol to local primal nodes */
1900   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1901   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1902 
1903   /* Sum contributions from two levels */
1904   if (applytranspose) {
1905     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1906     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1907   } else {
1908     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1909     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1916 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1917 {
1918   PetscErrorCode ierr;
1919   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1920   PetscScalar    *array;
1921   Vec            from,to;
1922 
1923   PetscFunctionBegin;
1924   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1925     from = pcbddc->coarse_vec;
1926     to = pcbddc->vec1_P;
1927     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1928       Vec tvec;
1929 
1930       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1931       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1932       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1933       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1934       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1935       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1936     }
1937   } else { /* from local to global -> put data in coarse right hand side */
1938     from = pcbddc->vec1_P;
1939     to = pcbddc->coarse_vec;
1940   }
1941   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1947 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1948 {
1949   PetscErrorCode ierr;
1950   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1951   PetscScalar    *array;
1952   Vec            from,to;
1953 
1954   PetscFunctionBegin;
1955   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1956     from = pcbddc->coarse_vec;
1957     to = pcbddc->vec1_P;
1958   } else { /* from local to global -> put data in coarse right hand side */
1959     from = pcbddc->vec1_P;
1960     to = pcbddc->coarse_vec;
1961   }
1962   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1963   if (smode == SCATTER_FORWARD) {
1964     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1965       Vec tvec;
1966 
1967       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1968       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1969       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1970       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1971     }
1972   } else {
1973     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1974      ierr = VecResetArray(from);CHKERRQ(ierr);
1975     }
1976   }
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 /* uncomment for testing purposes */
1981 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1982 #undef __FUNCT__
1983 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1984 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1985 {
1986   PetscErrorCode    ierr;
1987   PC_IS*            pcis = (PC_IS*)(pc->data);
1988   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1989   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1990   /* one and zero */
1991   PetscScalar       one=1.0,zero=0.0;
1992   /* space to store constraints and their local indices */
1993   PetscScalar       *constraints_data;
1994   PetscInt          *constraints_idxs,*constraints_idxs_B;
1995   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1996   PetscInt          *constraints_n;
1997   /* iterators */
1998   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1999   /* BLAS integers */
2000   PetscBLASInt      lwork,lierr;
2001   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2002   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2003   /* reuse */
2004   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2005   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2006   /* change of basis */
2007   PetscBool         qr_needed;
2008   PetscBT           change_basis,qr_needed_idx;
2009   /* auxiliary stuff */
2010   PetscInt          *nnz,*is_indices;
2011   PetscInt          ncc;
2012   /* some quantities */
2013   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2014   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2015 
2016   PetscFunctionBegin;
2017   /* Destroy Mat objects computed previously */
2018   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2019   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2020   /* save info on constraints from previous setup (if any) */
2021   olocal_primal_size = pcbddc->local_primal_size;
2022   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2023   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2024   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2025   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2026   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2027   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2028 
2029   /* print some info */
2030   if (pcbddc->dbg_flag) {
2031     IS       vertices;
2032     PetscInt nv,nedges,nfaces;
2033     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2034     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2035     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2036     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2037     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2038     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2039     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2040     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2041     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2042   }
2043 
2044   if (!pcbddc->adaptive_selection) {
2045     IS           ISForVertices,*ISForFaces,*ISForEdges;
2046     MatNullSpace nearnullsp;
2047     const Vec    *nearnullvecs;
2048     Vec          *localnearnullsp;
2049     PetscScalar  *array;
2050     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2051     PetscBool    nnsp_has_cnst;
2052     /* LAPACK working arrays for SVD or POD */
2053     PetscBool    skip_lapack,boolforchange;
2054     PetscScalar  *work;
2055     PetscReal    *singular_vals;
2056 #if defined(PETSC_USE_COMPLEX)
2057     PetscReal    *rwork;
2058 #endif
2059 #if defined(PETSC_MISSING_LAPACK_GESVD)
2060     PetscScalar  *temp_basis,*correlation_mat;
2061 #else
2062     PetscBLASInt dummy_int=1;
2063     PetscScalar  dummy_scalar=1.;
2064 #endif
2065 
2066     /* Get index sets for faces, edges and vertices from graph */
2067     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2068     /* free unneeded index sets */
2069     if (!pcbddc->use_vertices) {
2070       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2071     }
2072     if (!pcbddc->use_edges) {
2073       for (i=0;i<n_ISForEdges;i++) {
2074         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2075       }
2076       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2077       n_ISForEdges = 0;
2078     }
2079     if (!pcbddc->use_faces) {
2080       for (i=0;i<n_ISForFaces;i++) {
2081         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2082       }
2083       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2084       n_ISForFaces = 0;
2085     }
2086 
2087 #if defined(PETSC_USE_DEBUG)
2088     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2089        Also use_change_of_basis should be consistent among processors */
2090     if (pcbddc->NullSpace) {
2091       PetscBool tbool[2],gbool[2];
2092 
2093       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2094         pcbddc->use_change_of_basis = PETSC_TRUE;
2095         if (!ISForEdges) {
2096           pcbddc->use_change_on_faces = PETSC_TRUE;
2097         }
2098       }
2099       tbool[0] = pcbddc->use_change_of_basis;
2100       tbool[1] = pcbddc->use_change_on_faces;
2101       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2102       pcbddc->use_change_of_basis = gbool[0];
2103       pcbddc->use_change_on_faces = gbool[1];
2104     }
2105 #endif
2106 
2107     /* check if near null space is attached to global mat */
2108     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2109     if (nearnullsp) {
2110       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2111       /* remove any stored info */
2112       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2113       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2114       /* store information for BDDC solver reuse */
2115       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2116       pcbddc->onearnullspace = nearnullsp;
2117       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2118       for (i=0;i<nnsp_size;i++) {
2119         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2120       }
2121     } else { /* if near null space is not provided BDDC uses constants by default */
2122       nnsp_size = 0;
2123       nnsp_has_cnst = PETSC_TRUE;
2124     }
2125     /* get max number of constraints on a single cc */
2126     max_constraints = nnsp_size;
2127     if (nnsp_has_cnst) max_constraints++;
2128 
2129     /*
2130          Evaluate maximum storage size needed by the procedure
2131          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2132          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2133          There can be multiple constraints per connected component
2134                                                                                                                                                            */
2135     n_vertices = 0;
2136     if (ISForVertices) {
2137       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2138     }
2139     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2140     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2141 
2142     total_counts = n_ISForFaces+n_ISForEdges;
2143     total_counts *= max_constraints;
2144     total_counts += n_vertices;
2145     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2146 
2147     total_counts = 0;
2148     max_size_of_constraint = 0;
2149     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2150       IS used_is;
2151       if (i<n_ISForEdges) {
2152         used_is = ISForEdges[i];
2153       } else {
2154         used_is = ISForFaces[i-n_ISForEdges];
2155       }
2156       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2157       total_counts += j;
2158       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2159     }
2160     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2161 
2162     /* get local part of global near null space vectors */
2163     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2164     for (k=0;k<nnsp_size;k++) {
2165       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2166       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2167       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2168     }
2169 
2170     /* whether or not to skip lapack calls */
2171     skip_lapack = PETSC_TRUE;
2172     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2173 
2174     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2175     if (!skip_lapack) {
2176       PetscScalar temp_work;
2177 
2178 #if defined(PETSC_MISSING_LAPACK_GESVD)
2179       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2180       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2181       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2182       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2183 #if defined(PETSC_USE_COMPLEX)
2184       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2185 #endif
2186       /* now we evaluate the optimal workspace using query with lwork=-1 */
2187       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2188       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2189       lwork = -1;
2190       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2191 #if !defined(PETSC_USE_COMPLEX)
2192       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2193 #else
2194       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2195 #endif
2196       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2197       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2198 #else /* on missing GESVD */
2199       /* SVD */
2200       PetscInt max_n,min_n;
2201       max_n = max_size_of_constraint;
2202       min_n = max_constraints;
2203       if (max_size_of_constraint < max_constraints) {
2204         min_n = max_size_of_constraint;
2205         max_n = max_constraints;
2206       }
2207       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2208 #if defined(PETSC_USE_COMPLEX)
2209       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2210 #endif
2211       /* now we evaluate the optimal workspace using query with lwork=-1 */
2212       lwork = -1;
2213       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2214       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2215       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2216       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2217 #if !defined(PETSC_USE_COMPLEX)
2218       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2219 #else
2220       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2221 #endif
2222       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2223       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2224 #endif /* on missing GESVD */
2225       /* Allocate optimal workspace */
2226       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2227       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2228     }
2229     /* Now we can loop on constraining sets */
2230     total_counts = 0;
2231     constraints_idxs_ptr[0] = 0;
2232     constraints_data_ptr[0] = 0;
2233     /* vertices */
2234     if (n_vertices) {
2235       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2236       if (nnsp_has_cnst) { /* it considers all possible vertices */
2237         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2238         for (i=0;i<n_vertices;i++) {
2239           constraints_n[total_counts] = 1;
2240           constraints_data[total_counts] = 1.0;
2241           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2242           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2243           total_counts++;
2244         }
2245       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2246         PetscBool used_vertex;
2247         for (i=0;i<n_vertices;i++) {
2248           used_vertex = PETSC_FALSE;
2249           k = 0;
2250           while (!used_vertex && k<nnsp_size) {
2251             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2252             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2253               constraints_n[total_counts] = 1;
2254               constraints_idxs[total_counts] = is_indices[i];
2255               constraints_data[total_counts] = 1.0;
2256               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2257               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2258               total_counts++;
2259               used_vertex = PETSC_TRUE;
2260             }
2261             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2262             k++;
2263           }
2264         }
2265       }
2266       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2267       n_vertices = total_counts;
2268     }
2269 
2270     /* edges and faces */
2271     total_counts_cc = total_counts;
2272     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2273       IS        used_is;
2274       PetscBool idxs_copied = PETSC_FALSE;
2275 
2276       if (ncc<n_ISForEdges) {
2277         used_is = ISForEdges[ncc];
2278         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2279       } else {
2280         used_is = ISForFaces[ncc-n_ISForEdges];
2281         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2282       }
2283       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2284 
2285       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2286       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2287       /* change of basis should not be performed on local periodic nodes */
2288       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2289       if (nnsp_has_cnst) {
2290         PetscScalar quad_value;
2291 
2292         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2293         idxs_copied = PETSC_TRUE;
2294 
2295         if (!pcbddc->use_nnsp_true) {
2296           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2297         } else {
2298           quad_value = 1.0;
2299         }
2300         for (j=0;j<size_of_constraint;j++) {
2301           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2302         }
2303         temp_constraints++;
2304         total_counts++;
2305       }
2306       for (k=0;k<nnsp_size;k++) {
2307         PetscReal real_value;
2308         PetscScalar *ptr_to_data;
2309 
2310         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2311         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2312         for (j=0;j<size_of_constraint;j++) {
2313           ptr_to_data[j] = array[is_indices[j]];
2314         }
2315         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2316         /* check if array is null on the connected component */
2317         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2318         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2319         if (real_value > 0.0) { /* keep indices and values */
2320           temp_constraints++;
2321           total_counts++;
2322           if (!idxs_copied) {
2323             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2324             idxs_copied = PETSC_TRUE;
2325           }
2326         }
2327       }
2328       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2329       valid_constraints = temp_constraints;
2330       if (!pcbddc->use_nnsp_true && temp_constraints) {
2331         if (temp_constraints == 1) { /* just normalize the constraint */
2332           PetscScalar norm,*ptr_to_data;
2333 
2334           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2335           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2336           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2337           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2338           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2339         } else { /* perform SVD */
2340           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2341           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2342 
2343 #if defined(PETSC_MISSING_LAPACK_GESVD)
2344           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2345              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2346              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2347                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2348                 from that computed using LAPACKgesvd
2349              -> This is due to a different computation of eigenvectors in LAPACKheev
2350              -> The quality of the POD-computed basis will be the same */
2351           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2352           /* Store upper triangular part of correlation matrix */
2353           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2354           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2355           for (j=0;j<temp_constraints;j++) {
2356             for (k=0;k<j+1;k++) {
2357               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2358             }
2359           }
2360           /* compute eigenvalues and eigenvectors of correlation matrix */
2361           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2362           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2363 #if !defined(PETSC_USE_COMPLEX)
2364           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2365 #else
2366           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2367 #endif
2368           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2369           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2370           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2371           j = 0;
2372           while (j < temp_constraints && singular_vals[j] < tol) j++;
2373           total_counts = total_counts-j;
2374           valid_constraints = temp_constraints-j;
2375           /* scale and copy POD basis into used quadrature memory */
2376           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2377           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2378           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2379           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2380           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2381           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2382           if (j<temp_constraints) {
2383             PetscInt ii;
2384             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2385             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2386             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2387             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2388             for (k=0;k<temp_constraints-j;k++) {
2389               for (ii=0;ii<size_of_constraint;ii++) {
2390                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2391               }
2392             }
2393           }
2394 #else  /* on missing GESVD */
2395           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2396           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2397           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2398           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2399 #if !defined(PETSC_USE_COMPLEX)
2400           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2401 #else
2402           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2403 #endif
2404           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2405           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2406           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2407           k = temp_constraints;
2408           if (k > size_of_constraint) k = size_of_constraint;
2409           j = 0;
2410           while (j < k && singular_vals[k-j-1] < tol) j++;
2411           valid_constraints = k-j;
2412           total_counts = total_counts-temp_constraints+valid_constraints;
2413 #endif /* on missing GESVD */
2414         }
2415       }
2416       /* update pointers information */
2417       if (valid_constraints) {
2418         constraints_n[total_counts_cc] = valid_constraints;
2419         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2420         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2421         /* set change_of_basis flag */
2422         if (boolforchange) {
2423           PetscBTSet(change_basis,total_counts_cc);
2424         }
2425         total_counts_cc++;
2426       }
2427     }
2428     /* free workspace */
2429     if (!skip_lapack) {
2430       ierr = PetscFree(work);CHKERRQ(ierr);
2431 #if defined(PETSC_USE_COMPLEX)
2432       ierr = PetscFree(rwork);CHKERRQ(ierr);
2433 #endif
2434       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2435 #if defined(PETSC_MISSING_LAPACK_GESVD)
2436       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2437       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2438 #endif
2439     }
2440     for (k=0;k<nnsp_size;k++) {
2441       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2442     }
2443     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2444     /* free index sets of faces, edges and vertices */
2445     for (i=0;i<n_ISForFaces;i++) {
2446       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2447     }
2448     if (n_ISForFaces) {
2449       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2450     }
2451     for (i=0;i<n_ISForEdges;i++) {
2452       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2453     }
2454     if (n_ISForEdges) {
2455       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2456     }
2457     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2458   } else {
2459     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2460 
2461     total_counts = 0;
2462     n_vertices = 0;
2463     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2464       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2465     }
2466     max_constraints = 0;
2467     total_counts_cc = 0;
2468     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2469       total_counts += pcbddc->adaptive_constraints_n[i];
2470       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2471       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2472     }
2473     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2474     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2475     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2476     constraints_data = pcbddc->adaptive_constraints_data;
2477     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2478     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2479     total_counts_cc = 0;
2480     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2481       if (pcbddc->adaptive_constraints_n[i]) {
2482         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2483       }
2484     }
2485 #if 0
2486     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2487     for (i=0;i<total_counts_cc;i++) {
2488       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2489       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2490       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2491         printf(" %d",constraints_idxs[j]);
2492       }
2493       printf("\n");
2494       printf("number of cc: %d\n",constraints_n[i]);
2495     }
2496     for (i=0;i<n_vertices;i++) {
2497       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2498     }
2499     for (i=0;i<sub_schurs->n_subs;i++) {
2500       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2501     }
2502 #endif
2503 
2504     max_size_of_constraint = 0;
2505     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2506     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2507     /* Change of basis */
2508     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2509     if (pcbddc->use_change_of_basis) {
2510       for (i=0;i<sub_schurs->n_subs;i++) {
2511         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2512           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2513         }
2514       }
2515     }
2516   }
2517   pcbddc->local_primal_size = total_counts;
2518   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2519 
2520   /* map constraints_idxs in boundary numbering */
2521   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2522   if (i != constraints_idxs_ptr[total_counts_cc]) {
2523     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2524   }
2525 
2526   /* Create constraint matrix */
2527   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2528   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2529   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2530 
2531   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2532   /* determine if a QR strategy is needed for change of basis */
2533   qr_needed = PETSC_FALSE;
2534   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2535   total_primal_vertices=0;
2536   pcbddc->local_primal_size_cc = 0;
2537   for (i=0;i<total_counts_cc;i++) {
2538     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2539     if (size_of_constraint == 1) {
2540       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2541       pcbddc->local_primal_size_cc += 1;
2542     } else if (PetscBTLookup(change_basis,i)) {
2543       for (k=0;k<constraints_n[i];k++) {
2544         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2545       }
2546       pcbddc->local_primal_size_cc += constraints_n[i];
2547       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2548         PetscBTSet(qr_needed_idx,i);
2549         qr_needed = PETSC_TRUE;
2550       }
2551     } else {
2552       pcbddc->local_primal_size_cc += 1;
2553     }
2554   }
2555   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2556   pcbddc->n_vertices = total_primal_vertices;
2557   /* permute indices in order to have a sorted set of vertices */
2558   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2559 
2560   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2561   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2562   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2563 
2564   /* nonzero structure of constraint matrix */
2565   /* and get reference dof for local constraints */
2566   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2567   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2568 
2569   j = total_primal_vertices;
2570   total_counts = total_primal_vertices;
2571   cum = total_primal_vertices;
2572   for (i=n_vertices;i<total_counts_cc;i++) {
2573     if (!PetscBTLookup(change_basis,i)) {
2574       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2575       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2576       cum++;
2577       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2578       for (k=0;k<constraints_n[i];k++) {
2579         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2580         nnz[j+k] = size_of_constraint;
2581       }
2582       j += constraints_n[i];
2583     }
2584   }
2585   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2586   ierr = PetscFree(nnz);CHKERRQ(ierr);
2587 
2588   /* set values in constraint matrix */
2589   for (i=0;i<total_primal_vertices;i++) {
2590     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2591   }
2592   total_counts = total_primal_vertices;
2593   for (i=n_vertices;i<total_counts_cc;i++) {
2594     if (!PetscBTLookup(change_basis,i)) {
2595       PetscInt *cols;
2596 
2597       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2598       cols = constraints_idxs+constraints_idxs_ptr[i];
2599       for (k=0;k<constraints_n[i];k++) {
2600         PetscInt    row = total_counts+k;
2601         PetscScalar *vals;
2602 
2603         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2604         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2605       }
2606       total_counts += constraints_n[i];
2607     }
2608   }
2609   /* assembling */
2610   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2611   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2612 
2613   /*
2614   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2615   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2616   */
2617   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2618   if (pcbddc->use_change_of_basis) {
2619     /* dual and primal dofs on a single cc */
2620     PetscInt     dual_dofs,primal_dofs;
2621     /* working stuff for GEQRF */
2622     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2623     PetscBLASInt lqr_work;
2624     /* working stuff for UNGQR */
2625     PetscScalar  *gqr_work,lgqr_work_t;
2626     PetscBLASInt lgqr_work;
2627     /* working stuff for TRTRS */
2628     PetscScalar  *trs_rhs;
2629     PetscBLASInt Blas_NRHS;
2630     /* pointers for values insertion into change of basis matrix */
2631     PetscInt     *start_rows,*start_cols;
2632     PetscScalar  *start_vals;
2633     /* working stuff for values insertion */
2634     PetscBT      is_primal;
2635     PetscInt     *aux_primal_numbering_B;
2636     /* matrix sizes */
2637     PetscInt     global_size,local_size;
2638     /* temporary change of basis */
2639     Mat          localChangeOfBasisMatrix;
2640     /* extra space for debugging */
2641     PetscScalar  *dbg_work;
2642 
2643     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2644     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2645     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2646     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2647     /* nonzeros for local mat */
2648     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2649     for (i=0;i<pcis->n;i++) nnz[i]=1;
2650     for (i=n_vertices;i<total_counts_cc;i++) {
2651       if (PetscBTLookup(change_basis,i)) {
2652         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2653         if (PetscBTLookup(qr_needed_idx,i)) {
2654           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2655         } else {
2656           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2657           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2658         }
2659       }
2660     }
2661     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2662     ierr = PetscFree(nnz);CHKERRQ(ierr);
2663     /* Set initial identity in the matrix */
2664     for (i=0;i<pcis->n;i++) {
2665       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2666     }
2667 
2668     if (pcbddc->dbg_flag) {
2669       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2670       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2671     }
2672 
2673 
2674     /* Now we loop on the constraints which need a change of basis */
2675     /*
2676        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2677        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2678 
2679        Basic blocks of change of basis matrix T computed by
2680 
2681           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2682 
2683             | 1        0   ...        0         s_1/S |
2684             | 0        1   ...        0         s_2/S |
2685             |              ...                        |
2686             | 0        ...            1     s_{n-1}/S |
2687             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2688 
2689             with S = \sum_{i=1}^n s_i^2
2690             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2691                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2692 
2693           - QR decomposition of constraints otherwise
2694     */
2695     if (qr_needed) {
2696       /* space to store Q */
2697       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2698       /* first we issue queries for optimal work */
2699       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2700       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2701       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2702       lqr_work = -1;
2703       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2704       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2705       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2706       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2707       lgqr_work = -1;
2708       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2709       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2710       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2711       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2712       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2713       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2714       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2715       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2716       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2717       /* array to store scaling factors for reflectors */
2718       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2719       /* array to store rhs and solution of triangular solver */
2720       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2721       /* allocating workspace for check */
2722       if (pcbddc->dbg_flag) {
2723         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2724       }
2725     }
2726     /* array to store whether a node is primal or not */
2727     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2728     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2729     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2730     if (i != total_primal_vertices) {
2731       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2732     }
2733     for (i=0;i<total_primal_vertices;i++) {
2734       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2735     }
2736     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2737 
2738     /* loop on constraints and see whether or not they need a change of basis and compute it */
2739     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2740       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2741       if (PetscBTLookup(change_basis,total_counts)) {
2742         /* get constraint info */
2743         primal_dofs = constraints_n[total_counts];
2744         dual_dofs = size_of_constraint-primal_dofs;
2745 
2746         if (pcbddc->dbg_flag) {
2747           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2748         }
2749 
2750         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2751 
2752           /* copy quadrature constraints for change of basis check */
2753           if (pcbddc->dbg_flag) {
2754             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2755           }
2756           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2757           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2758 
2759           /* compute QR decomposition of constraints */
2760           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2761           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2762           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2763           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2764           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2765           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2766           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2767 
2768           /* explictly compute R^-T */
2769           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2770           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2771           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2772           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2773           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2774           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2775           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2776           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2777           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2778           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2779 
2780           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2781           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2782           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2783           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2784           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2785           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2786           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2787           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2788           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2789 
2790           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2791              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2792              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2793           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2794           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2795           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2796           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2797           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2798           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2799           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2800           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2801           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2802           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2803 
2804           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2805           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2806           /* insert cols for primal dofs */
2807           for (j=0;j<primal_dofs;j++) {
2808             start_vals = &qr_basis[j*size_of_constraint];
2809             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2810             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2811           }
2812           /* insert cols for dual dofs */
2813           for (j=0,k=0;j<dual_dofs;k++) {
2814             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2815               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2816               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2817               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2818               j++;
2819             }
2820           }
2821 
2822           /* check change of basis */
2823           if (pcbddc->dbg_flag) {
2824             PetscInt   ii,jj;
2825             PetscBool valid_qr=PETSC_TRUE;
2826             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2827             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2828             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2829             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2830             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2831             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2832             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2833             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2834             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2835             for (jj=0;jj<size_of_constraint;jj++) {
2836               for (ii=0;ii<primal_dofs;ii++) {
2837                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2838                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2839               }
2840             }
2841             if (!valid_qr) {
2842               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2843               for (jj=0;jj<size_of_constraint;jj++) {
2844                 for (ii=0;ii<primal_dofs;ii++) {
2845                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2846                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2847                   }
2848                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2849                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2850                   }
2851                 }
2852               }
2853             } else {
2854               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2855             }
2856           }
2857         } else { /* simple transformation block */
2858           PetscInt    row,col;
2859           PetscScalar val,norm;
2860 
2861           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2862           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2863           for (j=0;j<size_of_constraint;j++) {
2864             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2865             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2866             if (!PetscBTLookup(is_primal,row_B)) {
2867               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2868               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2869               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2870             } else {
2871               for (k=0;k<size_of_constraint;k++) {
2872                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2873                 if (row != col) {
2874                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2875                 } else {
2876                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2877                 }
2878                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2879               }
2880             }
2881           }
2882           if (pcbddc->dbg_flag) {
2883             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2884           }
2885         }
2886       } else {
2887         if (pcbddc->dbg_flag) {
2888           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2889         }
2890       }
2891     }
2892 
2893     /* free workspace */
2894     if (qr_needed) {
2895       if (pcbddc->dbg_flag) {
2896         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2897       }
2898       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2899       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2900       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2901       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2902       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2903     }
2904     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2905     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907 
2908     /* assembling of global change of variable */
2909     {
2910       Mat      tmat;
2911       PetscInt bs;
2912 
2913       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2914       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2915       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2916       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2917       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2918       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2919       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2920       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2921       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2922       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2923       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2924       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2925       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2926       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2927       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2928       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2929       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2930       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2931     }
2932     /* check */
2933     if (pcbddc->dbg_flag) {
2934       PetscReal error;
2935       Vec       x,x_change;
2936 
2937       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2938       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2939       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2940       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2941       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2942       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2943       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2944       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2945       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2946       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2947       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2948       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2949       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2950       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2951       ierr = VecDestroy(&x);CHKERRQ(ierr);
2952       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2953     }
2954 
2955     /* adapt sub_schurs computed (if any) */
2956     if (pcbddc->use_deluxe_scaling) {
2957       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2958       if (sub_schurs->S_Ej_all) {
2959         Mat S_new,tmat;
2960         IS is_all_N;
2961 
2962         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2963         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2964         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2965         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2966         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2967         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2968         sub_schurs->S_Ej_all = S_new;
2969         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2970         if (sub_schurs->sum_S_Ej_all) {
2971           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2972           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2973           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2974           sub_schurs->sum_S_Ej_all = S_new;
2975           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2976         }
2977         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2978       }
2979     }
2980     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2981   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2982     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2983     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2984   }
2985 
2986   /* set up change of basis context */
2987   if (pcbddc->ChangeOfBasisMatrix) {
2988     PCBDDCChange_ctx change_ctx;
2989 
2990     if (!pcbddc->new_global_mat) {
2991       PetscInt global_size,local_size;
2992 
2993       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2994       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2995       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2996       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2997       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2998       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2999       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3000       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3001       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3002     } else {
3003       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3004       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3005       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3006     }
3007     if (!pcbddc->user_ChangeOfBasisMatrix) {
3008       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3009       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3010     } else {
3011       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3012       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3013     }
3014     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3015     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3016     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3017     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3018   }
3019 
3020   /* check if a new primal space has been introduced */
3021   pcbddc->new_primal_space_local = PETSC_TRUE;
3022   if (olocal_primal_size == pcbddc->local_primal_size) {
3023     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3024     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3025     if (!pcbddc->new_primal_space_local) {
3026       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3027       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3028     }
3029   }
3030   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3031   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3032   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3033 
3034   /* flush dbg viewer */
3035   if (pcbddc->dbg_flag) {
3036     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3037   }
3038 
3039   /* free workspace */
3040   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3041   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3042   if (!pcbddc->adaptive_selection) {
3043     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3044     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3045   } else {
3046     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3047                       pcbddc->adaptive_constraints_idxs_ptr,
3048                       pcbddc->adaptive_constraints_data_ptr,
3049                       pcbddc->adaptive_constraints_idxs,
3050                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3051     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3052     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3053   }
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 #undef __FUNCT__
3058 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3059 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3060 {
3061   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3062   PC_IS       *pcis = (PC_IS*)pc->data;
3063   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3064   PetscInt    ierr,i,vertex_size,N;
3065   PetscViewer viewer=pcbddc->dbg_viewer;
3066 
3067   PetscFunctionBegin;
3068   /* Reset previously computed graph */
3069   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3070   /* Init local Graph struct */
3071   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3072   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3073 
3074   /* Check validity of the csr graph passed in by the user */
3075   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3076     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3077   }
3078 
3079   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3080   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3081     PetscInt  *xadj,*adjncy;
3082     PetscInt  nvtxs;
3083     PetscBool flg_row=PETSC_FALSE;
3084 
3085     if (pcbddc->use_local_adj) {
3086 
3087       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3088       if (flg_row) {
3089         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3090         pcbddc->computed_rowadj = PETSC_TRUE;
3091       }
3092       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3093     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3094       IS                     is_dummy;
3095       ISLocalToGlobalMapping l2gmap_dummy;
3096       PetscInt               j,sum;
3097       PetscInt               *cxadj,*cadjncy;
3098       const PetscInt         *idxs;
3099       PCBDDCGraph            graph;
3100       PetscBT                is_on_boundary;
3101 
3102       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3103       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3104       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3105       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3106       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3107       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3108       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3109       if (flg_row) {
3110         graph->xadj = xadj;
3111         graph->adjncy = adjncy;
3112       }
3113       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3114       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3115       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3116 
3117       if (pcbddc->dbg_flag) {
3118         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3119         for (i=0;i<graph->ncc;i++) {
3120           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3121         }
3122       }
3123 
3124       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3125       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3126       for (i=0;i<pcis->n_B;i++) {
3127         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3128       }
3129       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3130 
3131       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3132       sum = 0;
3133       for (i=0;i<graph->ncc;i++) {
3134         PetscInt sizecc = 0;
3135         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3136           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3137             sizecc++;
3138           }
3139         }
3140         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3141           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3142             cxadj[graph->queue[j]] = sizecc;
3143           }
3144         }
3145         sum += sizecc*sizecc;
3146       }
3147       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3148       sum = 0;
3149       for (i=0;i<pcis->n;i++) {
3150         PetscInt temp = cxadj[i];
3151         cxadj[i] = sum;
3152         sum += temp;
3153       }
3154       cxadj[pcis->n] = sum;
3155       for (i=0;i<graph->ncc;i++) {
3156         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3157           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3158             PetscInt k,sizecc = 0;
3159             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3160               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3161                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3162                 sizecc++;
3163               }
3164             }
3165           }
3166         }
3167       }
3168       if (sum) {
3169         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3170       } else {
3171         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3172         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3173       }
3174       graph->xadj = 0;
3175       graph->adjncy = 0;
3176       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3177       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3178     }
3179   }
3180   if (pcbddc->dbg_flag) {
3181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3182   }
3183 
3184   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3185   vertex_size = 1;
3186   if (pcbddc->user_provided_isfordofs) {
3187     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3188       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3189       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3190         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3191         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3192       }
3193       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3194       pcbddc->n_ISForDofs = 0;
3195       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3196     }
3197     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3198     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3199   } else {
3200     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3201       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3202       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3203       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3204         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3205       }
3206     }
3207   }
3208 
3209   /* Setup of Graph */
3210   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3211     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3212   }
3213   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3214     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3215   }
3216   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3217 
3218   /* Graph's connected components analysis */
3219   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3220 
3221   /* print some info to stdout */
3222   if (pcbddc->dbg_flag) {
3223     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3224   }
3225 
3226   /* mark topography has done */
3227   pcbddc->recompute_topography = PETSC_FALSE;
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3232 #undef __FUNCT__
3233 #define __FUNCT__ "PCBDDCSubsetNumbering"
3234 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3235 {
3236   PetscSF        sf;
3237   PetscLayout    map;
3238   const PetscInt *idxs;
3239   PetscInt       *leaf_data,*root_data,*gidxs;
3240   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3241   PetscInt       n_n,nlocals,start,first_index;
3242   PetscMPIInt    commsize;
3243   PetscBool      first_found;
3244   PetscErrorCode ierr;
3245 
3246   PetscFunctionBegin;
3247   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3248   if (subset_mult) {
3249     PetscCheckSameComm(subset,1,subset_mult,2);
3250     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3251     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3252   }
3253   /* create workspace layout for computing global indices of subset */
3254   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3255   lbounds[0] = lbounds[1] = 0;
3256   for (i=0;i<n;i++) {
3257     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3258     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3259   }
3260   lbounds[0] = -lbounds[0];
3261   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3262   gbounds[0] = -gbounds[0];
3263   N = gbounds[1] - gbounds[0] + 1;
3264   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3265   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3266   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3267   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3268   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3269 
3270   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3271   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3272   if (subset_mult) {
3273     const PetscInt* idxs_mult;
3274 
3275     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3276     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3277     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3278   } else {
3279     for (i=0;i<n;i++) leaf_data[i] = 1;
3280   }
3281   /* local size of new subset */
3282   n_n = 0;
3283   for (i=0;i<n;i++) n_n += leaf_data[i];
3284 
3285   /* global indexes in layout */
3286   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3287   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3288   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3289   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3290   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3291   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3292 
3293   /* reduce from leaves to roots */
3294   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3295   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3296   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3297 
3298   /* count indexes in local part of layout */
3299   nlocals = 0;
3300   first_index = -1;
3301   first_found = PETSC_FALSE;
3302   for (i=0;i<Nl;i++) {
3303     if (!first_found && root_data[i]) {
3304       first_found = PETSC_TRUE;
3305       first_index = i;
3306     }
3307     nlocals += root_data[i];
3308   }
3309 
3310   /* cumulative of number of indexes and size of subset without holes */
3311 #if defined(PETSC_HAVE_MPI_EXSCAN)
3312   start = 0;
3313   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3314 #else
3315   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3316   start = start-nlocals;
3317 #endif
3318 
3319   if (N_n) { /* compute total size of new subset if requested */
3320     *N_n = start + nlocals;
3321     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3322     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3323   }
3324 
3325   /* adapt root data with cumulative */
3326   if (first_found) {
3327     PetscInt old_index;
3328 
3329     root_data[first_index] += start;
3330     old_index = first_index;
3331     for (i=first_index+1;i<Nl;i++) {
3332       if (root_data[i]) {
3333         root_data[i] += root_data[old_index];
3334         old_index = i;
3335       }
3336     }
3337   }
3338 
3339   /* from roots to leaves */
3340   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3341   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3342   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3343 
3344   /* create new IS with global indexes without holes */
3345   if (subset_mult) {
3346     const PetscInt* idxs_mult;
3347     PetscInt        cum;
3348 
3349     cum = 0;
3350     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3351     for (i=0;i<n;i++) {
3352       PetscInt j;
3353       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3354     }
3355     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3356   } else {
3357     for (i=0;i<n;i++) {
3358       gidxs[i] = leaf_data[i]-1;
3359     }
3360   }
3361   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3362   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3368 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3369 {
3370   PetscInt       i,j;
3371   PetscScalar    *alphas;
3372   PetscErrorCode ierr;
3373 
3374   PetscFunctionBegin;
3375   /* this implements stabilized Gram-Schmidt */
3376   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3377   for (i=0;i<n;i++) {
3378     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3379     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3380     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3381   }
3382   ierr = PetscFree(alphas);CHKERRQ(ierr);
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 #undef __FUNCT__
3387 #define __FUNCT__ "MatISGetSubassemblingPattern"
3388 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3389 {
3390   IS             ranks_send_to;
3391   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3392   PetscMPIInt    size,rank,color;
3393   PetscInt       *xadj,*adjncy;
3394   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3395   PetscInt       i,local_size,threshold=0;
3396   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3397   PetscSubcomm   subcomm;
3398   PetscErrorCode ierr;
3399 
3400   PetscFunctionBegin;
3401   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3402   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3403   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3404 
3405   /* Get info on mapping */
3406   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3407   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3408 
3409   /* build local CSR graph of subdomains' connectivity */
3410   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3411   xadj[0] = 0;
3412   xadj[1] = PetscMax(n_neighs-1,0);
3413   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3414   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3415 
3416   if (threshold) {
3417     PetscInt xadj_count = 0;
3418     for (i=1;i<n_neighs;i++) {
3419       if (n_shared[i] > threshold) {
3420         adjncy[xadj_count] = neighs[i];
3421         adjncy_wgt[xadj_count] = n_shared[i];
3422         xadj_count++;
3423       }
3424     }
3425     xadj[1] = xadj_count;
3426   } else {
3427     if (xadj[1]) {
3428       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3429       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3430     }
3431   }
3432   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3433   if (use_square) {
3434     for (i=0;i<xadj[1];i++) {
3435       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3436     }
3437   }
3438   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3439 
3440   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3441 
3442   /*
3443     Restrict work on active processes only.
3444   */
3445   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3446   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3447   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3448   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3449   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3450   if (color) {
3451     ierr = PetscFree(xadj);CHKERRQ(ierr);
3452     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3453     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3454   } else {
3455     Mat             subdomain_adj;
3456     IS              new_ranks,new_ranks_contig;
3457     MatPartitioning partitioner;
3458     PetscInt        prank,rstart=0,rend=0;
3459     PetscInt        *is_indices,*oldranks;
3460     PetscBool       aggregate;
3461 
3462     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3463     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3464     prank = rank;
3465     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3466     /*
3467     for (i=0;i<size;i++) {
3468       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3469     }
3470     */
3471     for (i=0;i<xadj[1];i++) {
3472       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3473     }
3474     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3475     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3476     if (aggregate) {
3477       PetscInt    lrows,row,ncols,*cols;
3478       PetscMPIInt nrank;
3479       PetscScalar *vals;
3480 
3481       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3482       lrows = 0;
3483       if (nrank<redprocs) {
3484         lrows = size/redprocs;
3485         if (nrank<size%redprocs) lrows++;
3486       }
3487       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3488       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3489       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3490       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3491       row = nrank;
3492       ncols = xadj[1]-xadj[0];
3493       cols = adjncy;
3494       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3495       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3496       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3497       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3498       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3499       ierr = PetscFree(xadj);CHKERRQ(ierr);
3500       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3501       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3502       ierr = PetscFree(vals);CHKERRQ(ierr);
3503     } else {
3504       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3505     }
3506     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3507 
3508     /* Partition */
3509     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3510     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3511     if (use_vwgt) {
3512       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3513       v_wgt[0] = local_size;
3514       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3515     }
3516     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3517     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3518     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3519     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3520     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3521 
3522     /* renumber new_ranks to avoid "holes" in new set of processors */
3523     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3524     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3525     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3526     if (!redprocs) {
3527       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3528     } else {
3529       PetscInt    idxs[1];
3530       PetscMPIInt tag;
3531       MPI_Request *reqs;
3532 
3533       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3534       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3535       for (i=rstart;i<rend;i++) {
3536         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3537       }
3538       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3539       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3540       ierr = PetscFree(reqs);CHKERRQ(ierr);
3541       ranks_send_to_idx[0] = oldranks[idxs[0]];
3542     }
3543     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3544     /* clean up */
3545     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3546     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3547     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3548     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3549   }
3550   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3551 
3552   /* assemble parallel IS for sends */
3553   i = 1;
3554   if (color) i=0;
3555   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3556   /* get back IS */
3557   *is_sends = ranks_send_to;
3558   PetscFunctionReturn(0);
3559 }
3560 
3561 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3562 
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatISSubassemble"
3565 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3566 {
3567   Mat                    local_mat;
3568   IS                     is_sends_internal;
3569   PetscInt               rows,cols,new_local_rows;
3570   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3571   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3572   ISLocalToGlobalMapping l2gmap;
3573   PetscInt*              l2gmap_indices;
3574   const PetscInt*        is_indices;
3575   MatType                new_local_type;
3576   /* buffers */
3577   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3578   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3579   PetscInt               *recv_buffer_idxs_local;
3580   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3581   /* MPI */
3582   MPI_Comm               comm,comm_n;
3583   PetscSubcomm           subcomm;
3584   PetscMPIInt            n_sends,n_recvs,commsize;
3585   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3586   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3587   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3588   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3589   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3590   PetscErrorCode         ierr;
3591 
3592   PetscFunctionBegin;
3593   /* TODO: add missing checks */
3594   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3595   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3596   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3597   PetscValidLogicalCollectiveInt(mat,nis,7);
3598   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3599   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3600   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3601   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3602   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3603   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3604   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3605   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3606     PetscInt mrows,mcols,mnrows,mncols;
3607     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3608     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3609     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3610     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3611     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3612     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3613   }
3614   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3615   PetscValidLogicalCollectiveInt(mat,bs,0);
3616   /* prepare IS for sending if not provided */
3617   if (!is_sends) {
3618     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3619     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3620   } else {
3621     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3622     is_sends_internal = is_sends;
3623   }
3624 
3625   /* get comm */
3626   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3627 
3628   /* compute number of sends */
3629   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3630   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3631 
3632   /* compute number of receives */
3633   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3634   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3635   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3636   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3637   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3638   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3639   ierr = PetscFree(iflags);CHKERRQ(ierr);
3640 
3641   /* restrict comm if requested */
3642   subcomm = 0;
3643   destroy_mat = PETSC_FALSE;
3644   if (restrict_comm) {
3645     PetscMPIInt color,subcommsize;
3646 
3647     color = 0;
3648     if (restrict_full) {
3649       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3650     } else {
3651       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3652     }
3653     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3654     subcommsize = commsize - subcommsize;
3655     /* check if reuse has been requested */
3656     if (reuse == MAT_REUSE_MATRIX) {
3657       if (*mat_n) {
3658         PetscMPIInt subcommsize2;
3659         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3660         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3661         comm_n = PetscObjectComm((PetscObject)*mat_n);
3662       } else {
3663         comm_n = PETSC_COMM_SELF;
3664       }
3665     } else { /* MAT_INITIAL_MATRIX */
3666       PetscMPIInt rank;
3667 
3668       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3669       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3670       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3671       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3672       comm_n = PetscSubcommChild(subcomm);
3673     }
3674     /* flag to destroy *mat_n if not significative */
3675     if (color) destroy_mat = PETSC_TRUE;
3676   } else {
3677     comm_n = comm;
3678   }
3679 
3680   /* prepare send/receive buffers */
3681   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3682   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3683   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3684   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3685   if (nis) {
3686     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3687   }
3688 
3689   /* Get data from local matrices */
3690   if (!isdense) {
3691     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3692     /* TODO: See below some guidelines on how to prepare the local buffers */
3693     /*
3694        send_buffer_vals should contain the raw values of the local matrix
3695        send_buffer_idxs should contain:
3696        - MatType_PRIVATE type
3697        - PetscInt        size_of_l2gmap
3698        - PetscInt        global_row_indices[size_of_l2gmap]
3699        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3700     */
3701   } else {
3702     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3703     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3704     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3705     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3706     send_buffer_idxs[1] = i;
3707     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3708     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3709     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3710     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3711     for (i=0;i<n_sends;i++) {
3712       ilengths_vals[is_indices[i]] = len*len;
3713       ilengths_idxs[is_indices[i]] = len+2;
3714     }
3715   }
3716   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3717   /* additional is (if any) */
3718   if (nis) {
3719     PetscMPIInt psum;
3720     PetscInt j;
3721     for (j=0,psum=0;j<nis;j++) {
3722       PetscInt plen;
3723       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3724       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3725       psum += len+1; /* indices + lenght */
3726     }
3727     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3728     for (j=0,psum=0;j<nis;j++) {
3729       PetscInt plen;
3730       const PetscInt *is_array_idxs;
3731       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3732       send_buffer_idxs_is[psum] = plen;
3733       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3734       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3735       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3736       psum += plen+1; /* indices + lenght */
3737     }
3738     for (i=0;i<n_sends;i++) {
3739       ilengths_idxs_is[is_indices[i]] = psum;
3740     }
3741     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3742   }
3743 
3744   buf_size_idxs = 0;
3745   buf_size_vals = 0;
3746   buf_size_idxs_is = 0;
3747   for (i=0;i<n_recvs;i++) {
3748     buf_size_idxs += (PetscInt)olengths_idxs[i];
3749     buf_size_vals += (PetscInt)olengths_vals[i];
3750     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3751   }
3752   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3753   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3754   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3755 
3756   /* get new tags for clean communications */
3757   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3758   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3759   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3760 
3761   /* allocate for requests */
3762   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3763   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3764   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3765   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3766   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3767   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3768 
3769   /* communications */
3770   ptr_idxs = recv_buffer_idxs;
3771   ptr_vals = recv_buffer_vals;
3772   ptr_idxs_is = recv_buffer_idxs_is;
3773   for (i=0;i<n_recvs;i++) {
3774     source_dest = onodes[i];
3775     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3776     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3777     ptr_idxs += olengths_idxs[i];
3778     ptr_vals += olengths_vals[i];
3779     if (nis) {
3780       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3781       ptr_idxs_is += olengths_idxs_is[i];
3782     }
3783   }
3784   for (i=0;i<n_sends;i++) {
3785     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3786     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3787     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3788     if (nis) {
3789       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3790     }
3791   }
3792   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3793   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3794 
3795   /* assemble new l2g map */
3796   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3797   ptr_idxs = recv_buffer_idxs;
3798   new_local_rows = 0;
3799   for (i=0;i<n_recvs;i++) {
3800     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3801     ptr_idxs += olengths_idxs[i];
3802   }
3803   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3804   ptr_idxs = recv_buffer_idxs;
3805   new_local_rows = 0;
3806   for (i=0;i<n_recvs;i++) {
3807     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3808     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3809     ptr_idxs += olengths_idxs[i];
3810   }
3811   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3812   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3813   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3814 
3815   /* infer new local matrix type from received local matrices type */
3816   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3817   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3818   if (n_recvs) {
3819     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3820     ptr_idxs = recv_buffer_idxs;
3821     for (i=0;i<n_recvs;i++) {
3822       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3823         new_local_type_private = MATAIJ_PRIVATE;
3824         break;
3825       }
3826       ptr_idxs += olengths_idxs[i];
3827     }
3828     switch (new_local_type_private) {
3829       case MATDENSE_PRIVATE:
3830         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3831           new_local_type = MATSEQAIJ;
3832           bs = 1;
3833         } else { /* if I receive only 1 dense matrix */
3834           new_local_type = MATSEQDENSE;
3835           bs = 1;
3836         }
3837         break;
3838       case MATAIJ_PRIVATE:
3839         new_local_type = MATSEQAIJ;
3840         bs = 1;
3841         break;
3842       case MATBAIJ_PRIVATE:
3843         new_local_type = MATSEQBAIJ;
3844         break;
3845       case MATSBAIJ_PRIVATE:
3846         new_local_type = MATSEQSBAIJ;
3847         break;
3848       default:
3849         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3850         break;
3851     }
3852   } else { /* by default, new_local_type is seqdense */
3853     new_local_type = MATSEQDENSE;
3854     bs = 1;
3855   }
3856 
3857   /* create MATIS object if needed */
3858   if (reuse == MAT_INITIAL_MATRIX) {
3859     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3860     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
3861   } else {
3862     /* it also destroys the local matrices */
3863     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3864   }
3865   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3866   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3867 
3868   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3869 
3870   /* Global to local map of received indices */
3871   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3872   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3873   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3874 
3875   /* restore attributes -> type of incoming data and its size */
3876   buf_size_idxs = 0;
3877   for (i=0;i<n_recvs;i++) {
3878     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3879     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3880     buf_size_idxs += (PetscInt)olengths_idxs[i];
3881   }
3882   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3883 
3884   /* set preallocation */
3885   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3886   if (!newisdense) {
3887     PetscInt *new_local_nnz=0;
3888 
3889     ptr_vals = recv_buffer_vals;
3890     ptr_idxs = recv_buffer_idxs_local;
3891     if (n_recvs) {
3892       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3893     }
3894     for (i=0;i<n_recvs;i++) {
3895       PetscInt j;
3896       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3897         for (j=0;j<*(ptr_idxs+1);j++) {
3898           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3899         }
3900       } else {
3901         /* TODO */
3902       }
3903       ptr_idxs += olengths_idxs[i];
3904     }
3905     if (new_local_nnz) {
3906       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3907       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3908       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3909       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3910       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3911       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3912     } else {
3913       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3914     }
3915     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3916   } else {
3917     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3918   }
3919 
3920   /* set values */
3921   ptr_vals = recv_buffer_vals;
3922   ptr_idxs = recv_buffer_idxs_local;
3923   for (i=0;i<n_recvs;i++) {
3924     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3925       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3926       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3927       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3928       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3929       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3930     } else {
3931       /* TODO */
3932     }
3933     ptr_idxs += olengths_idxs[i];
3934     ptr_vals += olengths_vals[i];
3935   }
3936   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3938   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3939   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3940   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3941   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3942 
3943 #if 0
3944   if (!restrict_comm) { /* check */
3945     Vec       lvec,rvec;
3946     PetscReal infty_error;
3947 
3948     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3949     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3950     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3951     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3952     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3953     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3954     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3955     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3956     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3957   }
3958 #endif
3959 
3960   /* assemble new additional is (if any) */
3961   if (nis) {
3962     PetscInt **temp_idxs,*count_is,j,psum;
3963 
3964     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3965     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3966     ptr_idxs = recv_buffer_idxs_is;
3967     psum = 0;
3968     for (i=0;i<n_recvs;i++) {
3969       for (j=0;j<nis;j++) {
3970         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3971         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3972         psum += plen;
3973         ptr_idxs += plen+1; /* shift pointer to received data */
3974       }
3975     }
3976     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3977     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3978     for (i=1;i<nis;i++) {
3979       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3980     }
3981     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3982     ptr_idxs = recv_buffer_idxs_is;
3983     for (i=0;i<n_recvs;i++) {
3984       for (j=0;j<nis;j++) {
3985         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3986         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3987         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3988         ptr_idxs += plen+1; /* shift pointer to received data */
3989       }
3990     }
3991     for (i=0;i<nis;i++) {
3992       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3993       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3994       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3995     }
3996     ierr = PetscFree(count_is);CHKERRQ(ierr);
3997     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3998     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3999   }
4000   /* free workspace */
4001   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4002   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4003   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4004   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4005   if (isdense) {
4006     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4007     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4008   } else {
4009     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4010   }
4011   if (nis) {
4012     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4013     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4014   }
4015   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4016   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4017   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4018   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4019   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4020   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4021   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4022   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4023   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4024   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4025   ierr = PetscFree(onodes);CHKERRQ(ierr);
4026   if (nis) {
4027     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4028     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4029     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4030   }
4031   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4032   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4033     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4034     for (i=0;i<nis;i++) {
4035       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4036     }
4037     *mat_n = NULL;
4038   }
4039   PetscFunctionReturn(0);
4040 }
4041 
4042 /* temporary hack into ksp private data structure */
4043 #include <petsc/private/kspimpl.h>
4044 
4045 #undef __FUNCT__
4046 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4047 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4048 {
4049   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4050   PC_IS                  *pcis = (PC_IS*)pc->data;
4051   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4052   MatNullSpace           CoarseNullSpace=NULL;
4053   ISLocalToGlobalMapping coarse_islg;
4054   IS                     coarse_is,*isarray;
4055   PetscInt               i,im_active=-1,active_procs=-1;
4056   PetscInt               nis,nisdofs,nisneu;
4057   PC                     pc_temp;
4058   PCType                 coarse_pc_type;
4059   KSPType                coarse_ksp_type;
4060   PetscBool              multilevel_requested,multilevel_allowed;
4061   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4062   Mat                    t_coarse_mat_is;
4063   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4064   PetscMPIInt            all_procs;
4065   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4066   PetscBool              compute_vecs = PETSC_FALSE;
4067   PetscScalar            *array;
4068   PetscErrorCode         ierr;
4069 
4070   PetscFunctionBegin;
4071   /* Assign global numbering to coarse dofs */
4072   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4073     PetscInt ocoarse_size;
4074     compute_vecs = PETSC_TRUE;
4075     ocoarse_size = pcbddc->coarse_size;
4076     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4077     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4078     /* see if we can avoid some work */
4079     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4080       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4081       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4082         PC        pc;
4083         PetscBool isbddc;
4084 
4085         /* temporary workaround since PCBDDC does not have a reset method so far */
4086         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4087         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4088         if (isbddc) {
4089           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4090         }
4091         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4092         coarse_reuse = PETSC_FALSE;
4093       } else { /* we can safely reuse already computed coarse matrix */
4094         coarse_reuse = PETSC_TRUE;
4095       }
4096     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4097       coarse_reuse = PETSC_FALSE;
4098     }
4099     /* reset any subassembling information */
4100     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4101     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4102   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4103     coarse_reuse = PETSC_TRUE;
4104   }
4105 
4106   /* count "active" (i.e. with positive local size) and "void" processes */
4107   im_active = !!(pcis->n);
4108   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4109   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4110   void_procs = all_procs-active_procs;
4111   csin_type_simple = PETSC_TRUE;
4112   redist = PETSC_FALSE;
4113   if (pcbddc->current_level && void_procs) {
4114     csin_ml = PETSC_TRUE;
4115     ncoarse_ml = void_procs;
4116     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4117     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4118       csin_ds = PETSC_TRUE;
4119       ncoarse_ds = pcbddc->redistribute_coarse;
4120       redist = PETSC_TRUE;
4121     } else {
4122       csin_ds = PETSC_TRUE;
4123       ncoarse_ds = active_procs;
4124       redist = PETSC_TRUE;
4125     }
4126   } else {
4127     csin_ml = PETSC_FALSE;
4128     ncoarse_ml = all_procs;
4129     if (void_procs) {
4130       csin_ds = PETSC_TRUE;
4131       ncoarse_ds = void_procs;
4132       csin_type_simple = PETSC_FALSE;
4133     } else {
4134       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4135         csin_ds = PETSC_TRUE;
4136         ncoarse_ds = pcbddc->redistribute_coarse;
4137         redist = PETSC_TRUE;
4138       } else {
4139         csin_ds = PETSC_FALSE;
4140         ncoarse_ds = all_procs;
4141       }
4142     }
4143   }
4144 
4145   /*
4146     test if we can go multilevel: three conditions must be satisfied:
4147     - we have not exceeded the number of levels requested
4148     - we can actually subassemble the active processes
4149     - we can find a suitable number of MPI processes where we can place the subassembled problem
4150   */
4151   multilevel_allowed = PETSC_FALSE;
4152   multilevel_requested = PETSC_FALSE;
4153   if (pcbddc->current_level < pcbddc->max_levels) {
4154     multilevel_requested = PETSC_TRUE;
4155     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4156       multilevel_allowed = PETSC_FALSE;
4157     } else {
4158       multilevel_allowed = PETSC_TRUE;
4159     }
4160   }
4161   /* determine number of process partecipating to coarse solver */
4162   if (multilevel_allowed) {
4163     ncoarse = ncoarse_ml;
4164     csin = csin_ml;
4165     redist = PETSC_FALSE;
4166   } else {
4167     ncoarse = ncoarse_ds;
4168     csin = csin_ds;
4169   }
4170 
4171   /* creates temporary l2gmap and IS for coarse indexes */
4172   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4173   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4174 
4175   /* creates temporary MATIS object for coarse matrix */
4176   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4177   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4178   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4179   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4180 #if 0
4181   {
4182     PetscViewer viewer;
4183     char filename[256];
4184     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4185     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4186     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4187     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4188     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4189   }
4190 #endif
4191   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4192   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4193   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4194   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4195   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4196 
4197   /* compute dofs splitting and neumann boundaries for coarse dofs */
4198   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4199     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4200     const PetscInt         *idxs;
4201     ISLocalToGlobalMapping tmap;
4202 
4203     /* create map between primal indices (in local representative ordering) and local primal numbering */
4204     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4205     /* allocate space for temporary storage */
4206     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4207     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4208     /* allocate for IS array */
4209     nisdofs = pcbddc->n_ISForDofsLocal;
4210     nisneu = !!pcbddc->NeumannBoundariesLocal;
4211     nis = nisdofs + nisneu;
4212     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4213     /* dofs splitting */
4214     for (i=0;i<nisdofs;i++) {
4215       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4216       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4217       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4218       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4219       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4220       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4221       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4222       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4223     }
4224     /* neumann boundaries */
4225     if (pcbddc->NeumannBoundariesLocal) {
4226       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4227       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4228       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4229       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4230       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4231       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4232       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4233       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4234     }
4235     /* free memory */
4236     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4237     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4238     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4239   } else {
4240     nis = 0;
4241     nisdofs = 0;
4242     nisneu = 0;
4243     isarray = NULL;
4244   }
4245   /* destroy no longer needed map */
4246   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4247 
4248   /* restrict on coarse candidates (if needed) */
4249   coarse_mat_is = NULL;
4250   if (csin) {
4251     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4252       if (redist) {
4253         PetscMPIInt rank;
4254         PetscInt    spc,n_spc_p1,dest[1],destsize;
4255 
4256         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4257         spc = active_procs/ncoarse;
4258         n_spc_p1 = active_procs%ncoarse;
4259         if (im_active) {
4260           destsize = 1;
4261           if (rank > n_spc_p1*(spc+1)-1) {
4262             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4263           } else {
4264             dest[0] = rank/(spc+1);
4265           }
4266         } else {
4267           destsize = 0;
4268         }
4269         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4270       } else if (csin_type_simple) {
4271         PetscMPIInt rank;
4272         PetscInt    issize,isidx;
4273 
4274         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4275         if (im_active) {
4276           issize = 1;
4277           isidx = (PetscInt)rank;
4278         } else {
4279           issize = 0;
4280           isidx = -1;
4281         }
4282         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4283       } else { /* get a suitable subassembling pattern from MATIS code */
4284         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4285       }
4286 
4287       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4288       if (!redist || ncoarse <= void_procs) {
4289         PetscInt ncoarse_cand,tissize,*nisindices;
4290         PetscInt *coarse_candidates;
4291         const PetscInt* tisindices;
4292 
4293         /* get coarse candidates' ranks in pc communicator */
4294         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4295         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4296         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4297           if (!coarse_candidates[i]) {
4298             coarse_candidates[ncoarse_cand++]=i;
4299           }
4300         }
4301         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4302 
4303 
4304         if (pcbddc->dbg_flag) {
4305           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4306           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4307           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4308           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4309           for (i=0;i<ncoarse_cand;i++) {
4310             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4311           }
4312           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4313           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4314         }
4315         /* shift the pattern on coarse candidates */
4316         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4317         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4318         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4319         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4320         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4321         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4322         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4323       }
4324       if (pcbddc->dbg_flag) {
4325         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4326         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4327         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4328         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4329       }
4330     }
4331     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4332     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4333       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4334     } else { /* this is the last level, so use just receiving processes in subcomm */
4335       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4336     }
4337   } else {
4338     if (pcbddc->dbg_flag) {
4339       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4340       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4341       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4342     }
4343     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4344     coarse_mat_is = t_coarse_mat_is;
4345   }
4346 
4347   /* create local to global scatters for coarse problem */
4348   if (compute_vecs) {
4349     PetscInt lrows;
4350     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4351     if (coarse_mat_is) {
4352       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4353     } else {
4354       lrows = 0;
4355     }
4356     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4357     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4358     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4359     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4360     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4361   }
4362   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4363   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4364 
4365   /* set defaults for coarse KSP and PC */
4366   if (multilevel_allowed) {
4367     coarse_ksp_type = KSPRICHARDSON;
4368     coarse_pc_type = PCBDDC;
4369   } else {
4370     coarse_ksp_type = KSPPREONLY;
4371     coarse_pc_type = PCREDUNDANT;
4372   }
4373 
4374   /* print some info if requested */
4375   if (pcbddc->dbg_flag) {
4376     if (!multilevel_allowed) {
4377       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4378       if (multilevel_requested) {
4379         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4380       } else if (pcbddc->max_levels) {
4381         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4382       }
4383       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4384     }
4385   }
4386 
4387   /* create the coarse KSP object only once with defaults */
4388   if (coarse_mat_is) {
4389     MatReuse coarse_mat_reuse;
4390     PetscViewer dbg_viewer = NULL;
4391     if (pcbddc->dbg_flag) {
4392       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4393       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4394     }
4395     if (!pcbddc->coarse_ksp) {
4396       char prefix[256],str_level[16];
4397       size_t len;
4398       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4399       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4400       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4401       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4402       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4403       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4404       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4405       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4406       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4407       /* prefix */
4408       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4409       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4410       if (!pcbddc->current_level) {
4411         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4412         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4413       } else {
4414         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4415         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4416         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4417         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4418         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4419         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4420       }
4421       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4422       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4423       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4424       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4425       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4426       /* allow user customization */
4427       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4428     }
4429     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4430     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4431     if (nisdofs) {
4432       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4433       for (i=0;i<nisdofs;i++) {
4434         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4435       }
4436     }
4437     if (nisneu) {
4438       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4439       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4440     }
4441 
4442     /* get some info after set from options */
4443     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4444     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4445     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4446     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4447       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4448       isbddc = PETSC_FALSE;
4449     }
4450     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4451     if (isredundant) {
4452       KSP inner_ksp;
4453       PC  inner_pc;
4454       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4455       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4456       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4457     }
4458 
4459     /* assemble coarse matrix */
4460     if (coarse_reuse) {
4461       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4462       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4463       coarse_mat_reuse = MAT_REUSE_MATRIX;
4464     } else {
4465       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4466     }
4467     if (isbddc || isnn) {
4468       if (pcbddc->coarsening_ratio > 1) {
4469         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4470           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4471           if (pcbddc->dbg_flag) {
4472             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4473             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4474             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4475             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4476           }
4477         }
4478         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4479       } else {
4480         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4481         coarse_mat = coarse_mat_is;
4482       }
4483     } else {
4484       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4485     }
4486     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4487 
4488     /* propagate symmetry info of coarse matrix */
4489     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4490     if (pc->pmat->symmetric_set) {
4491       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4492     }
4493     if (pc->pmat->hermitian_set) {
4494       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4495     }
4496     if (pc->pmat->spd_set) {
4497       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4498     }
4499     /* set operators */
4500     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4501     if (pcbddc->dbg_flag) {
4502       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4503     }
4504   } else { /* processes non partecipating to coarse solver (if any) */
4505     coarse_mat = 0;
4506   }
4507   ierr = PetscFree(isarray);CHKERRQ(ierr);
4508 #if 0
4509   {
4510     PetscViewer viewer;
4511     char filename[256];
4512     sprintf(filename,"coarse_mat.m");
4513     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4514     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4515     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4516     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4517   }
4518 #endif
4519 
4520   /* Compute coarse null space (special handling by BDDC only) */
4521 #if 0
4522   if (pcbddc->NullSpace) {
4523     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4524   }
4525 #endif
4526 
4527   if (pcbddc->coarse_ksp) {
4528     Vec crhs,csol;
4529     PetscBool ispreonly;
4530 
4531     if (CoarseNullSpace) {
4532       if (isbddc) {
4533         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4534       } else {
4535         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4536       }
4537     }
4538     /* setup coarse ksp */
4539     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4540     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4541     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4542     /* hack */
4543     if (!csol) {
4544       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4545     }
4546     if (!crhs) {
4547       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4548     }
4549     /* Check coarse problem if in debug mode or if solving with an iterative method */
4550     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4551     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4552       KSP       check_ksp;
4553       KSPType   check_ksp_type;
4554       PC        check_pc;
4555       Vec       check_vec,coarse_vec;
4556       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4557       PetscInt  its;
4558       PetscBool compute_eigs;
4559       PetscReal *eigs_r,*eigs_c;
4560       PetscInt  neigs;
4561       const char *prefix;
4562 
4563       /* Create ksp object suitable for estimation of extreme eigenvalues */
4564       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4565       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4566       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4567       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4568       if (ispreonly) {
4569         check_ksp_type = KSPPREONLY;
4570         compute_eigs = PETSC_FALSE;
4571       } else {
4572         check_ksp_type = KSPGMRES;
4573         compute_eigs = PETSC_TRUE;
4574       }
4575       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4576       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4577       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4578       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4579       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4580       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4581       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4582       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4583       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4584       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4585       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4586       /* create random vec */
4587       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4588       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4589       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4590       if (CoarseNullSpace) {
4591         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4592       }
4593       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4594       /* solve coarse problem */
4595       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4596       if (CoarseNullSpace) {
4597         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4598       }
4599       /* set eigenvalue estimation if preonly has not been requested */
4600       if (compute_eigs) {
4601         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4602         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4603         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4604         lambda_max = eigs_r[neigs-1];
4605         lambda_min = eigs_r[0];
4606         if (pcbddc->use_coarse_estimates) {
4607           if (lambda_max>lambda_min) {
4608             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4609             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4610           }
4611         }
4612       }
4613 
4614       /* check coarse problem residual error */
4615       if (pcbddc->dbg_flag) {
4616         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4617         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4618         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4619         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4620         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4621         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4622         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4623         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4624         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4625         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4626         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4627         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4628         if (compute_eigs) {
4629           PetscReal lambda_max_s,lambda_min_s;
4630           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4631           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4632           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4633           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4634           for (i=0;i<neigs;i++) {
4635             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4636           }
4637         }
4638         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4639         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4640       }
4641       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4642       if (compute_eigs) {
4643         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4644         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4645       }
4646     }
4647   }
4648   /* print additional info */
4649   if (pcbddc->dbg_flag) {
4650     /* waits until all processes reaches this point */
4651     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4652     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4653     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4654   }
4655 
4656   /* free memory */
4657   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4658   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4659   PetscFunctionReturn(0);
4660 }
4661 
4662 #undef __FUNCT__
4663 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4664 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4665 {
4666   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4667   PC_IS*         pcis = (PC_IS*)pc->data;
4668   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4669   IS             subset,subset_mult,subset_n;
4670   PetscInt       local_size,coarse_size=0;
4671   PetscInt       *local_primal_indices=NULL;
4672   const PetscInt *t_local_primal_indices;
4673   PetscErrorCode ierr;
4674 
4675   PetscFunctionBegin;
4676   /* Compute global number of coarse dofs */
4677   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4678     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4679   }
4680   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4681   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4682   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4683   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4684   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4685   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4686   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4687   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4688   if (local_size != pcbddc->local_primal_size) {
4689     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4690   }
4691   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4692   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4693   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4694   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4695   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4696 
4697   /* check numbering */
4698   if (pcbddc->dbg_flag) {
4699     PetscScalar coarsesum,*array;
4700     PetscInt    i;
4701     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4702 
4703     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4704     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4705     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4706     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4707     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4708     for (i=0;i<pcbddc->local_primal_size;i++) {
4709       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4710     }
4711     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4712     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4713     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4714     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4715     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4716     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4717     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4718     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4719     for (i=0;i<pcis->n;i++) {
4720       if (array[i] == 1.0) {
4721         set_error = PETSC_TRUE;
4722         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4723       }
4724     }
4725     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4726     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4727     for (i=0;i<pcis->n;i++) {
4728       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4729     }
4730     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4731     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4732     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4733     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4734     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4735     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4736     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4737       PetscInt *gidxs;
4738 
4739       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4740       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4741       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4742       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4743       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4744       for (i=0;i<pcbddc->local_primal_size;i++) {
4745         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4746       }
4747       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4748       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4749     }
4750     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4751     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4752   }
4753   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4754   /* get back data */
4755   *coarse_size_n = coarse_size;
4756   *local_primal_indices_n = local_primal_indices;
4757   PetscFunctionReturn(0);
4758 }
4759 
4760 #undef __FUNCT__
4761 #define __FUNCT__ "PCBDDCGlobalToLocal"
4762 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4763 {
4764   IS             localis_t;
4765   PetscInt       i,lsize,*idxs,n;
4766   PetscScalar    *vals;
4767   PetscErrorCode ierr;
4768 
4769   PetscFunctionBegin;
4770   /* get indices in local ordering exploiting local to global map */
4771   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4772   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4773   for (i=0;i<lsize;i++) vals[i] = 1.0;
4774   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4775   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4776   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4777   if (idxs) { /* multilevel guard */
4778     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4779   }
4780   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4781   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4782   ierr = PetscFree(vals);CHKERRQ(ierr);
4783   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4784   /* now compute set in local ordering */
4785   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4786   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4787   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4788   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4789   for (i=0,lsize=0;i<n;i++) {
4790     if (PetscRealPart(vals[i]) > 0.5) {
4791       lsize++;
4792     }
4793   }
4794   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4795   for (i=0,lsize=0;i<n;i++) {
4796     if (PetscRealPart(vals[i]) > 0.5) {
4797       idxs[lsize++] = i;
4798     }
4799   }
4800   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4801   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4802   *localis = localis_t;
4803   PetscFunctionReturn(0);
4804 }
4805 
4806 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4807 #undef __FUNCT__
4808 #define __FUNCT__ "PCBDDCMatMult_Private"
4809 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4810 {
4811   PCBDDCChange_ctx change_ctx;
4812   PetscErrorCode   ierr;
4813 
4814   PetscFunctionBegin;
4815   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4816   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4817   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4818   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4819   PetscFunctionReturn(0);
4820 }
4821 
4822 #undef __FUNCT__
4823 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4824 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4825 {
4826   PCBDDCChange_ctx change_ctx;
4827   PetscErrorCode   ierr;
4828 
4829   PetscFunctionBegin;
4830   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4831   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4832   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4833   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4834   PetscFunctionReturn(0);
4835 }
4836 
4837 #undef __FUNCT__
4838 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4839 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4840 {
4841   PC_IS               *pcis=(PC_IS*)pc->data;
4842   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4843   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4844   Mat                 S_j;
4845   PetscInt            *used_xadj,*used_adjncy;
4846   PetscBool           free_used_adj;
4847   PetscErrorCode      ierr;
4848 
4849   PetscFunctionBegin;
4850   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4851   free_used_adj = PETSC_FALSE;
4852   if (pcbddc->sub_schurs_layers == -1) {
4853     used_xadj = NULL;
4854     used_adjncy = NULL;
4855   } else {
4856     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4857       used_xadj = pcbddc->mat_graph->xadj;
4858       used_adjncy = pcbddc->mat_graph->adjncy;
4859     } else if (pcbddc->computed_rowadj) {
4860       used_xadj = pcbddc->mat_graph->xadj;
4861       used_adjncy = pcbddc->mat_graph->adjncy;
4862     } else {
4863       PetscBool      flg_row=PETSC_FALSE;
4864       const PetscInt *xadj,*adjncy;
4865       PetscInt       nvtxs;
4866 
4867       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4868       if (flg_row) {
4869         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4870         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4871         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4872         free_used_adj = PETSC_TRUE;
4873       } else {
4874         pcbddc->sub_schurs_layers = -1;
4875         used_xadj = NULL;
4876         used_adjncy = NULL;
4877       }
4878       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4879     }
4880   }
4881 
4882   /* setup sub_schurs data */
4883   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4884   if (!sub_schurs->use_mumps) {
4885     /* pcbddc->ksp_D up to date only if not using MUMPS */
4886     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4887     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
4888   } else {
4889     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4890     PetscBool isseqaij;
4891     if (!pcbddc->use_vertices && reuse_solvers) {
4892       PetscInt n_vertices;
4893 
4894       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
4895       reuse_solvers = (PetscBool)!n_vertices;
4896     }
4897     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
4898     if (!isseqaij) {
4899       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4900       if (matis->A == pcbddc->local_mat) {
4901         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
4902         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4903       } else {
4904         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4905       }
4906     }
4907     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
4908   }
4909   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4910 
4911   /* free adjacency */
4912   if (free_used_adj) {
4913     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4914   }
4915   PetscFunctionReturn(0);
4916 }
4917 
4918 #undef __FUNCT__
4919 #define __FUNCT__ "PCBDDCInitSubSchurs"
4920 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4921 {
4922   PC_IS               *pcis=(PC_IS*)pc->data;
4923   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4924   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4925   PCBDDCGraph         graph;
4926   PetscErrorCode      ierr;
4927 
4928   PetscFunctionBegin;
4929   /* attach interface graph for determining subsets */
4930   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4931     IS       verticesIS,verticescomm;
4932     PetscInt vsize,*idxs;
4933 
4934     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4935     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4936     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4937     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4938     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4939     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4940     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4941     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4942     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4943     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4944     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4945 /*
4946     if (pcbddc->dbg_flag) {
4947       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4948     }
4949 */
4950   } else {
4951     graph = pcbddc->mat_graph;
4952   }
4953 
4954   /* sub_schurs init */
4955   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4956 
4957   /* free graph struct */
4958   if (pcbddc->sub_schurs_rebuild) {
4959     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4960   }
4961   PetscFunctionReturn(0);
4962 }
4963