xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 019a44ce1e325f7dfca7f1433bebb103fa9311f8)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
10 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   const PetscInt *idxs;
14   PetscInt       nz = 0;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   if (!pcbddc->zerodiag) {
19     PetscFunctionReturn(0);
20   } else {
21     ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr);
22     if (!nz) PetscFunctionReturn(0);
23   }
24   ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
25   /* TODO: add error checking
26     - avoid nested pop (or push) calls.
27     - cannot push before pop.
28   */
29   if (pop) {
30     const PetscInt    *cB0_cols;
31     PetscInt          cB0_ncol;
32     const PetscScalar *cB0_vals;
33 
34     /* extract B_0 */
35     ierr = MatGetRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
36     pcbddc->B0_ncol = cB0_ncol;
37     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
38     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
39     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
40     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
41     ierr = MatRestoreRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
42     /* remove rows and cols from local problem */
43     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
44     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,idxs+nz-1,1.,NULL,NULL);CHKERRQ(ierr);
45   } else { /* push */
46     ierr = MatSetValues(pcbddc->local_mat,1,&idxs[nz-1],pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
47     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&idxs[nz-1],pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
48     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
49     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
50   }
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "PCBDDCAdaptiveSelection"
56 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
57 {
58   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
59   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
60   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
61   PetscBLASInt    *B_iwork,*B_ifail;
62   PetscScalar     *work,lwork;
63   PetscScalar     *St,*S,*eigv;
64   PetscScalar     *Sarray,*Starray;
65   PetscReal       *eigs,thresh;
66   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
67   PetscBool       allocated_S_St;
68 #if defined(PETSC_USE_COMPLEX)
69   PetscReal       *rwork;
70 #endif
71   PetscErrorCode  ierr;
72 
73   PetscFunctionBegin;
74   if (!sub_schurs->use_mumps) {
75     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
76   }
77 
78   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
79     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
80   }
81 
82   if (pcbddc->dbg_flag) {
83     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
84     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
85     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
86     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
87   }
88 
89   if (pcbddc->dbg_flag) {
90     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
91   }
92 
93   /* max size of subsets */
94   mss = 0;
95   for (i=0;i<sub_schurs->n_subs;i++) {
96     PetscInt subset_size;
97 
98     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
99     mss = PetscMax(mss,subset_size);
100   }
101 
102   /* min/max and threshold */
103   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
104   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
105   nmax = PetscMax(nmin,nmax);
106   allocated_S_St = PETSC_FALSE;
107   if (nmin) {
108     allocated_S_St = PETSC_TRUE;
109   }
110 
111   /* allocate lapack workspace */
112   cum = cum2 = 0;
113   maxneigs = 0;
114   for (i=0;i<sub_schurs->n_subs;i++) {
115     PetscInt n,subset_size;
116 
117     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
118     n = PetscMin(subset_size,nmax);
119     cum += subset_size;
120     cum2 += subset_size*n;
121     maxneigs = PetscMax(maxneigs,n);
122   }
123   if (mss) {
124     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
125       PetscBLASInt B_itype = 1;
126       PetscBLASInt B_N = mss;
127       PetscReal    zero = 0.0;
128       PetscReal    eps = 0.0; /* dlamch? */
129 
130       B_lwork = -1;
131       S = NULL;
132       St = NULL;
133       eigs = NULL;
134       eigv = NULL;
135       B_iwork = NULL;
136       B_ifail = NULL;
137 #if defined(PETSC_USE_COMPLEX)
138       rwork = NULL;
139 #endif
140       thresh = 1.0;
141       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
142 #if defined(PETSC_USE_COMPLEX)
143       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
144 #else
145       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
146 #endif
147       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
148       ierr = PetscFPTrapPop();CHKERRQ(ierr);
149     } else {
150         /* TODO */
151     }
152   } else {
153     lwork = 0;
154   }
155 
156   nv = 0;
157   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
158     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
159   }
160   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
161   if (allocated_S_St) {
162     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
163   }
164   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
165 #if defined(PETSC_USE_COMPLEX)
166   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
167 #endif
168   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
169                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
170                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
171                       nv+cum,&pcbddc->adaptive_constraints_idxs,
172                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
173   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
174 
175   maxneigs = 0;
176   cum = cum2 = cumarray = 0;
177   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
178   pcbddc->adaptive_constraints_data_ptr[0] = 0;
179   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
180     const PetscInt *idxs;
181 
182     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
183     for (cum=0;cum<nv;cum++) {
184       pcbddc->adaptive_constraints_n[cum] = 1;
185       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
186       pcbddc->adaptive_constraints_data[cum] = 1.0;
187       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
188       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
189     }
190     cum2 = cum;
191     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
192   }
193 
194   if (mss) { /* multilevel */
195     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
196     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
197   }
198 
199   for (i=0;i<sub_schurs->n_subs;i++) {
200 
201     const PetscInt *idxs;
202     PetscReal      infty = PETSC_MAX_REAL;
203     PetscInt       j,subset_size,eigs_start = 0;
204     PetscBLASInt   B_N;
205     PetscBool      same_data = PETSC_FALSE;
206 
207     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
208     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
209     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
210       if (sub_schurs->is_hermitian) {
211         PetscInt j,k;
212         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
213           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
214           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
215         }
216         for (j=0;j<subset_size;j++) {
217           for (k=j;k<subset_size;k++) {
218             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
219             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
220           }
221         }
222       } else {
223         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
224         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
225       }
226     } else {
227       S = Sarray + cumarray;
228       St = Starray + cumarray;
229     }
230 
231     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
232     /* see if we can save some work */
233     if (sub_schurs->n_subs == 1) {
234       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
235     }
236 
237     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
238       B_neigs = 0;
239     } else {
240       /* Threshold: this is an heuristic for edges */
241       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
242 
243       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
244         PetscBLASInt B_itype = 1;
245         PetscBLASInt B_IL, B_IU;
246         PetscReal    eps = -1.0; /* dlamch? */
247         PetscInt     nmin_s;
248 
249         /* ask for eigenvalues larger than thresh */
250         if (pcbddc->dbg_flag) {
251           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
252         }
253         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
254 #if defined(PETSC_USE_COMPLEX)
255         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
256 #else
257         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
258 #endif
259         ierr = PetscFPTrapPop();CHKERRQ(ierr);
260         if (B_ierr) {
261           if (B_ierr < 0 ) {
262             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
263           } else if (B_ierr <= B_N) {
264             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
265           } else {
266             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
267           }
268         }
269 
270         if (B_neigs > nmax) {
271           if (pcbddc->dbg_flag) {
272             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
273           }
274           eigs_start = B_neigs -nmax;
275           B_neigs = nmax;
276         }
277 
278         nmin_s = PetscMin(nmin,B_N);
279         if (B_neigs < nmin_s) {
280           PetscBLASInt B_neigs2;
281 
282           B_IU = B_N - B_neigs;
283           B_IL = B_N - nmin_s + 1;
284           if (pcbddc->dbg_flag) {
285             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
286           }
287           if (sub_schurs->is_hermitian) {
288             PetscInt j;
289             for (j=0;j<subset_size;j++) {
290               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
291             }
292             for (j=0;j<subset_size;j++) {
293               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
294             }
295           } else {
296             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
297             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
298           }
299           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
300 #if defined(PETSC_USE_COMPLEX)
301           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
302 #else
303           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
304 #endif
305           ierr = PetscFPTrapPop();CHKERRQ(ierr);
306           B_neigs += B_neigs2;
307         }
308         if (B_ierr) {
309           if (B_ierr < 0 ) {
310             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
311           } else if (B_ierr <= B_N) {
312             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
313           } else {
314             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
315           }
316         }
317         if (pcbddc->dbg_flag) {
318           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
319           for (j=0;j<B_neigs;j++) {
320             if (eigs[j] == 0.0) {
321               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
322             } else {
323               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
324             }
325           }
326         }
327       } else {
328           /* TODO */
329       }
330     }
331     maxneigs = PetscMax(B_neigs,maxneigs);
332     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
333     if (B_neigs) {
334       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
335 
336       if (pcbddc->dbg_flag > 1) {
337         PetscInt ii;
338         for (ii=0;ii<B_neigs;ii++) {
339           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
340           for (j=0;j<B_N;j++) {
341 #if defined(PETSC_USE_COMPLEX)
342             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
343             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
344             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
345 #else
346             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
347 #endif
348           }
349         }
350       }
351 #if 0
352       for (j=0;j<B_neigs;j++) {
353         PetscBLASInt Blas_N,Blas_one = 1.0;
354         PetscScalar norm;
355         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
356         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
357                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
358         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
359           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
360         } else {
361           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
362         }
363         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
364       }
365 #endif
366       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
367       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
368       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
369       cum++;
370     }
371     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
372     /* shift for next computation */
373     cumarray += subset_size*subset_size;
374   }
375   if (pcbddc->dbg_flag) {
376     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
377   }
378 
379   if (mss) {
380     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
381     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
382     /* destroy matrices (junk) */
383     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
384     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
385   }
386   if (allocated_S_St) {
387     ierr = PetscFree2(S,St);CHKERRQ(ierr);
388   }
389   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
390 #if defined(PETSC_USE_COMPLEX)
391   ierr = PetscFree(rwork);CHKERRQ(ierr);
392 #endif
393   if (pcbddc->dbg_flag) {
394     PetscInt maxneigs_r;
395     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNCT__
402 #define __FUNCT__ "PCBDDCSetUpSolvers"
403 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
404 {
405   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
406   PetscScalar    *coarse_submat_vals;
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   /* Setup local scatters R_to_B and (optionally) R_to_D */
411   /* PCBDDCSetUpLocalWorkVectors should be called first! */
412   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
413 
414   /* Setup local neumann solver ksp_R */
415   /* PCBDDCSetUpLocalScatters should be called first! */
416   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
417 
418   /* Change global null space passed in by the user if change of basis has been requested */
419   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
420     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
421   }
422 
423   /*
424      Setup local correction and local part of coarse basis.
425      Gives back the dense local part of the coarse matrix in column major ordering
426   */
427   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
428 
429   /* Compute total number of coarse nodes and setup coarse solver */
430   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
431 
432   /* free */
433   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "PCBDDCResetCustomization"
439 PetscErrorCode PCBDDCResetCustomization(PC pc)
440 {
441   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
442   PetscErrorCode ierr;
443 
444   PetscFunctionBegin;
445   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
447   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
450   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
451   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
452   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
453   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
454   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
455   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCBDDCResetTopography"
461 PetscErrorCode PCBDDCResetTopography(PC pc)
462 {
463   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
464   PetscErrorCode ierr;
465 
466   PetscFunctionBegin;
467   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
468   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
469   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
470   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
471   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "PCBDDCResetSolvers"
477 PetscErrorCode PCBDDCResetSolvers(PC pc)
478 {
479   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
480   PetscScalar    *array;
481   PetscErrorCode ierr;
482 
483   PetscFunctionBegin;
484   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
485   if (pcbddc->coarse_phi_B) {
486     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
487     ierr = PetscFree(array);CHKERRQ(ierr);
488   }
489   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
490   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
491   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
492   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
493   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
494   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
495   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
496   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
497   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
498   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
499   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
500   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
501   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
502   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
503   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
504   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
505   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
506   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
507   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
508   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
509   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
510   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
511   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
512   ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr);
513   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
514   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
520 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
521 {
522   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
523   PC_IS          *pcis = (PC_IS*)pc->data;
524   VecType        impVecType;
525   PetscInt       n_constraints,n_R,old_size,n_benign;
526   PetscErrorCode ierr;
527 
528   PetscFunctionBegin;
529   if (!pcbddc->ConstraintMatrix) {
530     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
531   }
532   /* get sizes */
533   n_benign = (PetscInt)(!!pcbddc->zerodiag);
534   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
535   if (n_benign && n_constraints) {
536     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis");
537   }
538   n_R = pcis->n - pcbddc->n_vertices;
539   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
540   /* local work vectors (try to avoid unneeded work)*/
541   /* R nodes */
542   old_size = -1;
543   if (pcbddc->vec1_R) {
544     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
545   }
546   if (n_R != old_size) {
547     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
548     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
549     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
550     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
551     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
552     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
553   }
554   /* local primal dofs */
555   old_size = -1;
556   if (pcbddc->vec1_P) {
557     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
558   }
559   if (pcbddc->local_primal_size != old_size) {
560     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
561     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
562     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
563     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
564   }
565   /* local explicit constraints */
566   old_size = -1;
567   if (pcbddc->vec1_C) {
568     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
569   }
570   if (n_constraints && n_constraints != old_size) {
571     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
572     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
573     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
574     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "PCBDDCSetUpCorrection"
581 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
582 {
583   PetscErrorCode  ierr;
584   /* pointers to pcis and pcbddc */
585   PC_IS*          pcis = (PC_IS*)pc->data;
586   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
587   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
588   /* submatrices of local problem */
589   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
590   /* submatrices of benign trick */
591   Mat             B0_R,B0_V;
592   /* submatrices of local coarse problem */
593   Mat             S_VV,S_CV,S_VC,S_CC;
594   /* working matrices */
595   Mat             C_CR;
596   /* additional working stuff */
597   PC              pc_R;
598   Mat             F;
599   Vec             vec_R_benign;
600   PetscBool       isLU,isCHOL,isILU;
601 
602   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
603   PetscScalar     *work;
604   PetscInt        *idx_V_B;
605   PetscInt        n,n_vertices,n_constraints,n_benign;
606   PetscInt        i,n_R,n_D,n_B;
607   PetscBool       unsymmetric_check;
608   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
609   MatType         impMatType;
610   /* some shortcuts to scalars */
611   PetscScalar     one=1.0,m_one=-1.0;
612 
613   PetscFunctionBegin;
614   n_benign = (PetscInt)(!!pcbddc->zerodiag);
615   n_vertices = pcbddc->n_vertices;
616   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
617   /* Set Non-overlapping dimensions */
618   n_B = pcis->n_B;
619   n_D = pcis->n - n_B;
620   n_R = pcis->n - n_vertices;
621 
622   /* Set types for local objects needed by BDDC precondtioner */
623   impMatType = MATSEQDENSE;
624 
625   /* vertices in boundary numbering */
626   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
627   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
628   if (i != n_vertices) {
629     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
630   }
631 
632   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
633   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
634   if (!n_benign) {
635     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
636     ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
637     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
638     ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
639     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
640     ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
641     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
642     ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
643   } else { /* these wrappers are not needed with the benign trick */
644     S_VV = S_CV = S_VC = S_CC = NULL;
645   }
646 
647   unsymmetric_check = PETSC_FALSE;
648   /* allocate workspace */
649   n = 0;
650   if (n_constraints) {
651     n += n_R*n_constraints;
652   }
653   if (n_vertices) {
654     n = PetscMax(2*n_R*n_vertices,n);
655     n = PetscMax((n_R+n_B)*n_vertices,n);
656   }
657   if (!pcbddc->symmetric_primal) {
658     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
659     unsymmetric_check = PETSC_TRUE;
660   }
661   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
662 
663   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
664   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
665   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
666   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
667   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
668   if (isLU || isILU || isCHOL) {
669     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
670   } else if (sub_schurs->reuse_mumps) {
671     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
672     MatFactorType type;
673 
674     F = reuse_mumps->F;
675     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
676     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
677   } else {
678     F = NULL;
679   }
680 
681   /* Precompute stuffs needed for preprocessing and application of BDDC*/
682   if (n_constraints) {
683     Mat         M1,M2,M3;
684     Mat         auxmat;
685     IS          is_aux;
686     PetscScalar *array,*array2;
687 
688     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
689     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
690 
691     /* Extract constraints on R nodes: C_{CR}  */
692     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
693     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
694     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
695 
696     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
697     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
698     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
699     for (i=0;i<n_constraints;i++) {
700       const PetscScalar *row_cmat_values;
701       const PetscInt    *row_cmat_indices;
702       PetscInt          size_of_constraint,j;
703 
704       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
705       for (j=0;j<size_of_constraint;j++) {
706         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
707       }
708       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
709     }
710     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
711     if (F) {
712       Mat B;
713 
714       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
715       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
716       ierr = MatDestroy(&B);CHKERRQ(ierr);
717     } else {
718       PetscScalar *marr;
719 
720       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
721       for (i=0;i<n_constraints;i++) {
722         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
723         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
724         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
725         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
726         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
727       }
728       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
729     }
730     if (!pcbddc->switch_static) {
731       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
732       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
733       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
734       for (i=0;i<n_constraints;i++) {
735         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
736         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
737         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
738         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
739         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
740         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
741       }
742       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
743       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
744       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
745     } else {
746       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
747       pcbddc->local_auxmat2 = local_auxmat2_R;
748       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
749     }
750     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
751     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
752     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
753     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
754     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
755     if (isCHOL) {
756       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
757     } else {
758       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
759     }
760     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
761     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
762     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
763     ierr = MatDestroy(&M2);CHKERRQ(ierr);
764     ierr = MatDestroy(&M3);CHKERRQ(ierr);
765     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
766     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
767     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
768     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
769     ierr = MatDestroy(&M1);CHKERRQ(ierr);
770   }
771   /* Get submatrices from subdomain matrix */
772   if (n_vertices) {
773     IS is_aux;
774 
775     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
776       IS tis;
777 
778       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
779       ierr = ISSort(tis);CHKERRQ(ierr);
780       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
781       ierr = ISDestroy(&tis);CHKERRQ(ierr);
782     } else {
783       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
784     }
785     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
786     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
787     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
788     if (n_benign) {
789       Mat      B0;
790       IS       dummy;
791       PetscInt ii[2];
792 
793       ii[0] = 0;
794       ii[1] = pcbddc->B0_ncol;
795       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
796       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
797       ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
798       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
799       ierr = MatDestroy(&B0);CHKERRQ(ierr);
800       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
801     }
802     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
803   }
804 
805   /* Matrix of coarse basis functions (local) */
806   if (pcbddc->coarse_phi_B) {
807     PetscInt on_B,on_primal,on_D=n_D;
808     if (pcbddc->coarse_phi_D) {
809       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
810     }
811     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
812     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
813       PetscScalar *marray;
814 
815       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
816       ierr = PetscFree(marray);CHKERRQ(ierr);
817       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
818       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
819       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
820       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
821     }
822   }
823 
824   if (!pcbddc->coarse_phi_B) {
825     PetscScalar *marray;
826 
827     n = n_B*pcbddc->local_primal_size;
828     if (pcbddc->switch_static || pcbddc->dbg_flag) {
829       n += n_D*pcbddc->local_primal_size;
830     }
831     if (!pcbddc->symmetric_primal) {
832       n *= 2;
833     }
834     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
835     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
836     n = n_B*pcbddc->local_primal_size;
837     if (pcbddc->switch_static || pcbddc->dbg_flag) {
838       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
839       n += n_D*pcbddc->local_primal_size;
840     }
841     if (!pcbddc->symmetric_primal) {
842       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
843       if (pcbddc->switch_static || pcbddc->dbg_flag) {
844         n = n_B*pcbddc->local_primal_size;
845         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
846       }
847     } else {
848       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
849       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
850       if (pcbddc->switch_static || pcbddc->dbg_flag) {
851         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
852         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
853       }
854     }
855   }
856 
857   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
858   /* vertices */
859   if (n_vertices) {
860 
861     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
862 
863     if (n_R) {
864       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
865       Mat          S_V_benign;
866       PetscBLASInt B_N,B_one = 1;
867       PetscScalar  *x,*y;
868       PetscBool    isseqaij;
869 
870       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
871       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
872       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
873       if (F) { /* TODO could be optimized for symmetric problems */
874         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
875       } else {
876         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
877         for (i=0;i<n_vertices;i++) {
878           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
879           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
880           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
881           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
882           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
883         }
884         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
885       }
886       if (n_benign) {
887         const PetscScalar *vals;
888         const PetscInt    *idxs;
889         PetscInt          n;
890 
891         ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr);
892         ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
893         ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
894         ierr = VecSetValues(pcbddc->vec1_R,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
895         ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
896         ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
897         ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
898         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,vec_R_benign);CHKERRQ(ierr);
899         ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr);
900       }
901       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
902       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
903       if (n_constraints) {
904         Mat B;
905 
906         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
907         for (i=0;i<n_vertices;i++) {
908           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
909           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
910           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
911           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
912           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
913           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
914         }
915         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
916         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
917         ierr = MatDestroy(&B);CHKERRQ(ierr);
918         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
919         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
920         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
921         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
922         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
923         ierr = MatDestroy(&B);CHKERRQ(ierr);
924       }
925       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
926       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
927         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
928       }
929       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
930       if (n_benign) {
931         ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr);
932       }
933       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
934       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
935       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
936       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
937       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
938       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
939       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
940       if (!n_benign) {
941         ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
942       } else {
943         const PetscScalar *vals;
944         PetscScalar       val;
945         const PetscInt    *idxs;
946         PetscInt          n,j;
947 
948         /* Use explicit loops to populate the local coarse matrix */
949         ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr);
950         for (j=0;j<n_vertices;j++) {
951           for (i=0;i<n_vertices;i++) {
952             coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i];
953           }
954         }
955         ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr);
956         ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr);
957         ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
958         for (j=0;j<n_vertices;j++) {
959           coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j];
960           coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j];
961         }
962         ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
963         ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr);
964         ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr);
965         ierr = VecDot(vec_R_benign,vec_R_benign,&val);CHKERRQ(ierr);
966         coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = -val;
967       }
968       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
969     } else {
970       /* TODO benign case also here */
971       if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented");
972       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
973     }
974     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
975     /* coarse basis functions */
976     for (i=0;i<n_vertices;i++) {
977       PetscScalar *y;
978 
979       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
980       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
981       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
982       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984       y[n_B*i+idx_V_B[i]] = 1.0;
985       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
986       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
987 
988       if (pcbddc->switch_static || pcbddc->dbg_flag) {
989         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
990         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
991         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
994         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
995       }
996       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
997     }
998     if (n_benign) {
999       PetscScalar *y;
1000 
1001       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1002       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr);
1003       ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1006       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1007 
1008       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1009         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1010         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1011         ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1012         ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1013         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1014         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1015       }
1016     }
1017     /* if n_R == 0 the object is not destroyed */
1018     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1019   }
1020 
1021   if (n_constraints) {
1022     Mat B;
1023 
1024     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1025     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1026     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1027     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1028     if (n_vertices) {
1029       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1030         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1031       } else {
1032         Mat S_VCt;
1033 
1034         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1035         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1036         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1037       }
1038     }
1039     ierr = MatDestroy(&B);CHKERRQ(ierr);
1040     /* coarse basis functions */
1041     for (i=0;i<n_constraints;i++) {
1042       PetscScalar *y;
1043 
1044       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1045       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1046       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1047       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1050       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1051       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1052         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1053         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1054         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1057         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1058       }
1059       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1060     }
1061   }
1062   if (n_constraints) {
1063     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1064   }
1065 
1066   if (n_benign) {
1067     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1068     ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1069     ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr);
1070   }
1071 
1072   /* compute other basis functions for non-symmetric problems */
1073   if (!pcbddc->symmetric_primal) {
1074 
1075     if (n_constraints) {
1076       Mat S_CCT,B_C;
1077 
1078       /* this is a lazy thing */
1079       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1080       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1081       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1082       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1083       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1084       if (n_vertices) {
1085         Mat B_V,S_VCT;
1086 
1087         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1088         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1089         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1090         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1091         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1092       }
1093       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1094     } else { /* if there are no constraints, reset work */
1095       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1096     }
1097     if (n_vertices && n_R) {
1098       Mat          A_VRT;
1099       PetscScalar  *marray;
1100       PetscBLASInt B_N,B_one = 1;
1101 
1102       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1103       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1104       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1105       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1106       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1107       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1108       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1109     }
1110 
1111     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1112       for (i=0;i<pcbddc->local_primal_size;i++) {
1113         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1114         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1115         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1116         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1117         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1118       }
1119     } else {
1120       for (i=0;i<pcbddc->local_primal_size;i++) {
1121         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1122         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1123         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1124         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1125         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1126       }
1127     }
1128     /* coarse basis functions */
1129     for (i=0;i<pcbddc->local_primal_size;i++) {
1130       PetscScalar *y;
1131 
1132       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1133       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1134       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1135       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137       if (i<n_vertices) {
1138         y[n_B*i+idx_V_B[i]] = 1.0;
1139       }
1140       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1141       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1142 
1143       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1144         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1145         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1146         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1149         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1150       }
1151       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1152     }
1153   }
1154   /* free memory */
1155   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1156   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1157   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1158   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1159   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1160   ierr = PetscFree(work);CHKERRQ(ierr);
1161   if (n_vertices) {
1162     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1163   }
1164   if (n_constraints) {
1165     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1166   }
1167   /* Checking coarse_sub_mat and coarse basis functios */
1168   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1169   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1170   //if (pcbddc->dbg_flag) {
1171   if (0) {
1172     Mat         coarse_sub_mat;
1173     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1174     Mat         coarse_phi_D,coarse_phi_B;
1175     Mat         coarse_psi_D,coarse_psi_B;
1176     Mat         A_II,A_BB,A_IB,A_BI;
1177     Mat         C_B,CPHI;
1178     IS          is_dummy;
1179     Vec         mones;
1180     MatType     checkmattype=MATSEQAIJ;
1181     PetscReal   real_value;
1182 
1183     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1184     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1185     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1186     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1187     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1188     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1189     if (unsymmetric_check) {
1190       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1191       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1192     }
1193     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1194 
1195     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1196     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1197     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1198     if (unsymmetric_check) {
1199       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1200       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1201       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1202       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1203       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1204       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1205       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1206       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1207       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1208       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1209       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1210       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1211     } else {
1212       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1213       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1214       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1215       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1216       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1217       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1218       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1219       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1220     }
1221     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1222     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1223     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1224     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1225     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1226     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1227     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1228     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1229 
1230     /* check constraints */
1231     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1232     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1233     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1234     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1235     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1236     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1237     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1238     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1239     if (unsymmetric_check) {
1240       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1241       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1242       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1243       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1244       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1245     }
1246     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1247     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1248     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1249     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1250 
1251     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1252     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1253     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1254     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1255     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1256     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1257     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1258     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1259     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1260     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1261     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1262     if (unsymmetric_check) {
1263       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1264       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1265     }
1266     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1267   }
1268   /* get back data */
1269   *coarse_submat_vals_n = coarse_submat_vals;
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1275 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1276 {
1277   Mat            *work_mat;
1278   IS             isrow_s,iscol_s;
1279   PetscBool      rsorted,csorted;
1280   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1285   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1286   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1287   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1288 
1289   if (!rsorted) {
1290     const PetscInt *idxs;
1291     PetscInt *idxs_sorted,i;
1292 
1293     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1294     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1295     for (i=0;i<rsize;i++) {
1296       idxs_perm_r[i] = i;
1297     }
1298     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1299     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1300     for (i=0;i<rsize;i++) {
1301       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1302     }
1303     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1304     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1307     isrow_s = isrow;
1308   }
1309 
1310   if (!csorted) {
1311     if (isrow == iscol) {
1312       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1313       iscol_s = isrow_s;
1314     } else {
1315       const PetscInt *idxs;
1316       PetscInt *idxs_sorted,i;
1317 
1318       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1319       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1320       for (i=0;i<csize;i++) {
1321         idxs_perm_c[i] = i;
1322       }
1323       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1324       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1325       for (i=0;i<csize;i++) {
1326         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1327       }
1328       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1329       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1330     }
1331   } else {
1332     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1333     iscol_s = iscol;
1334   }
1335 
1336   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1337 
1338   if (!rsorted || !csorted) {
1339     Mat      new_mat;
1340     IS       is_perm_r,is_perm_c;
1341 
1342     if (!rsorted) {
1343       PetscInt *idxs_r,i;
1344       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1345       for (i=0;i<rsize;i++) {
1346         idxs_r[idxs_perm_r[i]] = i;
1347       }
1348       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1349       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1350     } else {
1351       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1352     }
1353     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1354 
1355     if (!csorted) {
1356       if (isrow_s == iscol_s) {
1357         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1358         is_perm_c = is_perm_r;
1359       } else {
1360         PetscInt *idxs_c,i;
1361         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1362         for (i=0;i<csize;i++) {
1363           idxs_c[idxs_perm_c[i]] = i;
1364         }
1365         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1366         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1367       }
1368     } else {
1369       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1370     }
1371     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1372 
1373     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1374     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1375     work_mat[0] = new_mat;
1376     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1377     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1378   }
1379 
1380   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1381   *B = work_mat[0];
1382   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1383   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1384   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1390 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1391 {
1392   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1393   PC_IS*         pcis = (PC_IS*)pc->data;
1394   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1395   Mat            new_mat;
1396   IS             is_local,is_global;
1397   PetscInt       local_size;
1398   PetscBool      isseqaij;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1403   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1404   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1405   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr);
1406   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1407   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1408   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1409 
1410   /* check */
1411   if (pcbddc->dbg_flag) {
1412     Vec       x,x_change;
1413     PetscReal error;
1414 
1415     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1416     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1417     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1418     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1419     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1420     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1421     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1422     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1423     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1424     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1425     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1426     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1427     ierr = VecDestroy(&x);CHKERRQ(ierr);
1428     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1429   }
1430 
1431   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1432   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1433   if (isseqaij) {
1434     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1435   } else {
1436     Mat work_mat;
1437     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1438     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1439     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1440   }
1441   if (matis->A->symmetric_set) {
1442     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1443 #if !defined(PETSC_USE_COMPLEX)
1444     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1445 #endif
1446   }
1447   /*
1448   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1449   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1450   */
1451   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1457 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1458 {
1459   PC_IS*          pcis = (PC_IS*)(pc->data);
1460   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1461   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1462   PetscInt        *idx_R_local=NULL;
1463   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1464   PetscInt        vbs,bs;
1465   PetscBT         bitmask=NULL;
1466   PetscErrorCode  ierr;
1467 
1468   PetscFunctionBegin;
1469   /*
1470     No need to setup local scatters if
1471       - primal space is unchanged
1472         AND
1473       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1474         AND
1475       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1476   */
1477   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1478     PetscFunctionReturn(0);
1479   }
1480   /* destroy old objects */
1481   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1482   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1483   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1484   /* Set Non-overlapping dimensions */
1485   n_B = pcis->n_B;
1486   n_D = pcis->n - n_B;
1487   n_vertices = pcbddc->n_vertices;
1488 
1489   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1490 
1491   /* create auxiliary bitmask and allocate workspace */
1492   if (!sub_schurs->reuse_mumps) {
1493     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1494     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1495     for (i=0;i<n_vertices;i++) {
1496       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1497     }
1498 
1499     for (i=0, n_R=0; i<pcis->n; i++) {
1500       if (!PetscBTLookup(bitmask,i)) {
1501         idx_R_local[n_R++] = i;
1502       }
1503     }
1504   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1505     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1506 
1507     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1508     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1509   }
1510 
1511   /* Block code */
1512   vbs = 1;
1513   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1514   if (bs>1 && !(n_vertices%bs)) {
1515     PetscBool is_blocked = PETSC_TRUE;
1516     PetscInt  *vary;
1517     if (!sub_schurs->reuse_mumps) {
1518       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1519       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1520       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1521       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1522       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1523       for (i=0; i<pcis->n/bs; i++) {
1524         if (vary[i]!=0 && vary[i]!=bs) {
1525           is_blocked = PETSC_FALSE;
1526           break;
1527         }
1528       }
1529       ierr = PetscFree(vary);CHKERRQ(ierr);
1530     } else {
1531       /* Verify directly the R set */
1532       for (i=0; i<n_R/bs; i++) {
1533         PetscInt j,node=idx_R_local[bs*i];
1534         for (j=1; j<bs; j++) {
1535           if (node != idx_R_local[bs*i+j]-j) {
1536             is_blocked = PETSC_FALSE;
1537             break;
1538           }
1539         }
1540       }
1541     }
1542     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1543       vbs = bs;
1544       for (i=0;i<n_R/vbs;i++) {
1545         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1546       }
1547     }
1548   }
1549   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1550   if (sub_schurs->reuse_mumps) {
1551     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1552 
1553     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1554     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1555     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1556     reuse_mumps->is_R = pcbddc->is_R_local;
1557   } else {
1558     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1559   }
1560 
1561   /* print some info if requested */
1562   if (pcbddc->dbg_flag) {
1563     PetscInt benign;
1564 
1565     benign = (PetscInt)(!!pcbddc->zerodiag);
1566     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1567     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1568     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1569     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1570     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1571     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1572     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1573   }
1574 
1575   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1576   if (!sub_schurs->reuse_mumps) {
1577     IS       is_aux1,is_aux2;
1578     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1579 
1580     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1581     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1582     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1583     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1584     for (i=0; i<n_D; i++) {
1585       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1586     }
1587     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1588     for (i=0, j=0; i<n_R; i++) {
1589       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1590         aux_array1[j++] = i;
1591       }
1592     }
1593     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1594     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1595     for (i=0, j=0; i<n_B; i++) {
1596       if (!PetscBTLookup(bitmask,is_indices[i])) {
1597         aux_array2[j++] = i;
1598       }
1599     }
1600     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1601     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1602     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1603     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1604     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1605 
1606     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1607       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1608       for (i=0, j=0; i<n_R; i++) {
1609         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1610           aux_array1[j++] = i;
1611         }
1612       }
1613       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1614       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1615       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1616     }
1617     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1618     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1619   } else {
1620     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1621     IS               tis;
1622     PetscInt         schur_size;
1623 
1624     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1625     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1626     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1627     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1628     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1629       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1630       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1631       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1640 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1641 {
1642   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1643   PC_IS          *pcis = (PC_IS*)pc->data;
1644   PC             pc_temp;
1645   Mat            A_RR;
1646   MatReuse       reuse;
1647   PetscScalar    m_one = -1.0;
1648   PetscReal      value;
1649   PetscInt       n_D,n_R;
1650   PetscBool      use_exact,use_exact_reduced,issbaij;
1651   PetscErrorCode ierr;
1652   /* prefixes stuff */
1653   char           dir_prefix[256],neu_prefix[256],str_level[16];
1654   size_t         len;
1655 
1656   PetscFunctionBegin;
1657 
1658   /* compute prefixes */
1659   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1660   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1661   if (!pcbddc->current_level) {
1662     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1663     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1664     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1665     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1666   } else {
1667     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1668     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1669     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1670     len -= 15; /* remove "pc_bddc_coarse_" */
1671     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1672     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1673     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1674     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1675     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1676     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1677     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1678     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1679   }
1680 
1681   /* DIRICHLET PROBLEM */
1682   if (dirichlet) {
1683     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1684     if (pcbddc->local_mat->symmetric_set) {
1685       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1686     }
1687     /* Matrix for Dirichlet problem is pcis->A_II */
1688     n_D = pcis->n - pcis->n_B;
1689     if (!pcbddc->ksp_D) { /* create object if not yet build */
1690       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1691       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1692       /* default */
1693       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1694       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1695       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1696       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1697       if (issbaij) {
1698         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1699       } else {
1700         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1701       }
1702       /* Allow user's customization */
1703       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1704       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1705     }
1706     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1707     if (sub_schurs->reuse_mumps) {
1708       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1709 
1710       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1711     }
1712     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1713     if (!n_D) {
1714       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1715       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1716     }
1717     /* Set Up KSP for Dirichlet problem of BDDC */
1718     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1719     /* set ksp_D into pcis data */
1720     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1721     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1722     pcis->ksp_D = pcbddc->ksp_D;
1723   }
1724 
1725   /* NEUMANN PROBLEM */
1726   A_RR = 0;
1727   if (neumann) {
1728     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1729     PetscInt        ibs,mbs;
1730     PetscBool       issbaij;
1731     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1732     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1733     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1734     if (pcbddc->ksp_R) { /* already created ksp */
1735       PetscInt nn_R;
1736       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1737       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1738       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1739       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1740         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1741         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1742         reuse = MAT_INITIAL_MATRIX;
1743       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1744         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1745           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1746           reuse = MAT_INITIAL_MATRIX;
1747         } else { /* safe to reuse the matrix */
1748           reuse = MAT_REUSE_MATRIX;
1749         }
1750       }
1751       /* last check */
1752       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1753         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1754         reuse = MAT_INITIAL_MATRIX;
1755       }
1756     } else { /* first time, so we need to create the matrix */
1757       reuse = MAT_INITIAL_MATRIX;
1758     }
1759     /* extract A_RR */
1760     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1761     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1762     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1763     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1764       if (matis->A == pcbddc->local_mat) {
1765         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1766         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1767       } else {
1768         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1769       }
1770     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1771       if (matis->A == pcbddc->local_mat) {
1772         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1773         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1774       } else {
1775         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1776       }
1777     }
1778     if (!sub_schurs->reuse_mumps) {
1779       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1780       if (pcbddc->local_mat->symmetric_set) {
1781         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1782       }
1783     } else {
1784       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1785 
1786       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1787       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1788       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1789     }
1790     if (!pcbddc->ksp_R) { /* create object if not present */
1791       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1792       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1793       /* default */
1794       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1795       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1796       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1797       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1798       if (issbaij) {
1799         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1800       } else {
1801         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1802       }
1803       /* Allow user's customization */
1804       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1805       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1806     }
1807     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1808     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1809     if (!n_R) {
1810       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1811       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1812     }
1813     /* Reuse MUMPS solver if it is present */
1814     if (sub_schurs->reuse_mumps) {
1815       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1816 
1817       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1818     }
1819     /* Set Up KSP for Neumann problem of BDDC */
1820     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1821   }
1822   /* free Neumann problem's matrix */
1823   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1824 
1825   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1826   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1827     if (pcbddc->dbg_flag) {
1828       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1829       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1830       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1831     }
1832     if (dirichlet) { /* Dirichlet */
1833       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1834       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1835       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1836       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1837       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1838       /* need to be adapted? */
1839       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1840       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1841       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1842       /* print info */
1843       if (pcbddc->dbg_flag) {
1844         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1845         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1846       }
1847       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1848         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1849       }
1850     }
1851     if (neumann) { /* Neumann */
1852       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1853       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1854       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1855       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1856       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1857       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1858       /* need to be adapted? */
1859       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1860       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1861       /* print info */
1862       if (pcbddc->dbg_flag) {
1863         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1864         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1865       }
1866       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1867         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1868       }
1869     }
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1876 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1877 {
1878   PetscErrorCode  ierr;
1879   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1880   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1881 
1882   PetscFunctionBegin;
1883   if (!sub_schurs->reuse_mumps) {
1884     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1885   }
1886   if (!pcbddc->switch_static) {
1887     if (applytranspose && pcbddc->local_auxmat1) {
1888       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1889       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1890     }
1891     if (!sub_schurs->reuse_mumps) {
1892       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1893       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1894     } else {
1895       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1896 
1897       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1898       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1899     }
1900   } else {
1901     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1902     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1903     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1904     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1905     if (applytranspose && pcbddc->local_auxmat1) {
1906       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1907       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1908       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1909       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1910     }
1911   }
1912   if (!sub_schurs->reuse_mumps) {
1913     if (applytranspose) {
1914       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1915     } else {
1916       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1917     }
1918 #if defined(PETSC_HAVE_MUMPS)
1919   } else {
1920     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1921 
1922     if (applytranspose) {
1923       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1924     } else {
1925       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1926     }
1927 #endif
1928   }
1929   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1930   if (!pcbddc->switch_static) {
1931     if (!sub_schurs->reuse_mumps) {
1932       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1933       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1934     } else {
1935       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1936 
1937       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1938       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1939     }
1940     if (!applytranspose && pcbddc->local_auxmat1) {
1941       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1942       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1943     }
1944   } else {
1945     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1946     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1947     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1948     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1949     if (!applytranspose && pcbddc->local_auxmat1) {
1950       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1951       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1952     }
1953     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1954     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1955     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1956     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1962 #undef __FUNCT__
1963 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1964 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1965 {
1966   PetscErrorCode ierr;
1967   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1968   PC_IS*            pcis = (PC_IS*)  (pc->data);
1969   const PetscScalar zero = 0.0;
1970 
1971   PetscFunctionBegin;
1972   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1973   if (applytranspose) {
1974     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1975     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1976   } else {
1977     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1978     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1979   }
1980   /* start communications from local primal nodes to rhs of coarse solver */
1981   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1982   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1983   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1984 
1985   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1986   /* TODO remove null space when doing multilevel */
1987   if (pcbddc->coarse_ksp) {
1988     Vec rhs,sol;
1989 
1990     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1991     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1992     if (applytranspose) {
1993       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1994     } else {
1995       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1996     }
1997   }
1998 
1999   /* Local solution on R nodes */
2000   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2001     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2002   }
2003 
2004   /* communications from coarse sol to local primal nodes */
2005   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2006   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2007 
2008   /* Sum contributions from two levels */
2009   if (applytranspose) {
2010     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2011     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2012   } else {
2013     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2014     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2021 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2022 {
2023   PetscErrorCode ierr;
2024   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2025   PetscScalar    *array;
2026   Vec            from,to;
2027 
2028   PetscFunctionBegin;
2029   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2030     from = pcbddc->coarse_vec;
2031     to = pcbddc->vec1_P;
2032     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2033       Vec tvec;
2034 
2035       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2036       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2037       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2038       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2039       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2040       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2041     }
2042   } else { /* from local to global -> put data in coarse right hand side */
2043     from = pcbddc->vec1_P;
2044     to = pcbddc->coarse_vec;
2045   }
2046   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2052 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2053 {
2054   PetscErrorCode ierr;
2055   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2056   PetscScalar    *array;
2057   Vec            from,to;
2058 
2059   PetscFunctionBegin;
2060   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2061     from = pcbddc->coarse_vec;
2062     to = pcbddc->vec1_P;
2063   } else { /* from local to global -> put data in coarse right hand side */
2064     from = pcbddc->vec1_P;
2065     to = pcbddc->coarse_vec;
2066   }
2067   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2068   if (smode == SCATTER_FORWARD) {
2069     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2070       Vec tvec;
2071 
2072       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2073       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2074       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2075       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2076     }
2077   } else {
2078     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2079      ierr = VecResetArray(from);CHKERRQ(ierr);
2080     }
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 /* uncomment for testing purposes */
2086 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2087 #undef __FUNCT__
2088 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2089 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2090 {
2091   PetscErrorCode    ierr;
2092   PC_IS*            pcis = (PC_IS*)(pc->data);
2093   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2094   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2095   /* one and zero */
2096   PetscScalar       one=1.0,zero=0.0;
2097   /* space to store constraints and their local indices */
2098   PetscScalar       *constraints_data;
2099   PetscInt          *constraints_idxs,*constraints_idxs_B;
2100   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2101   PetscInt          *constraints_n;
2102   /* iterators */
2103   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2104   /* BLAS integers */
2105   PetscBLASInt      lwork,lierr;
2106   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2107   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2108   /* reuse */
2109   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2110   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2111   /* change of basis */
2112   PetscBool         qr_needed;
2113   PetscBT           change_basis,qr_needed_idx;
2114   /* auxiliary stuff */
2115   PetscInt          *nnz,*is_indices;
2116   PetscInt          ncc;
2117   /* some quantities */
2118   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2119   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2120 
2121   PetscFunctionBegin;
2122   /* Destroy Mat objects computed previously */
2123   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2124   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2125   /* save info on constraints from previous setup (if any) */
2126   olocal_primal_size = pcbddc->local_primal_size;
2127   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2128   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2129   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2130   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2131   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2132   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2133 
2134   /* print some info */
2135   if (pcbddc->dbg_flag) {
2136     IS       vertices;
2137     PetscInt nv,nedges,nfaces;
2138     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2139     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2140     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2141     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2142     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2143     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2144     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2145     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2146     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2147   }
2148 
2149   if (!pcbddc->adaptive_selection) {
2150     IS           ISForVertices,*ISForFaces,*ISForEdges;
2151     MatNullSpace nearnullsp;
2152     const Vec    *nearnullvecs;
2153     Vec          *localnearnullsp;
2154     PetscScalar  *array;
2155     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2156     PetscBool    nnsp_has_cnst;
2157     /* LAPACK working arrays for SVD or POD */
2158     PetscBool    skip_lapack,boolforchange;
2159     PetscScalar  *work;
2160     PetscReal    *singular_vals;
2161 #if defined(PETSC_USE_COMPLEX)
2162     PetscReal    *rwork;
2163 #endif
2164 #if defined(PETSC_MISSING_LAPACK_GESVD)
2165     PetscScalar  *temp_basis,*correlation_mat;
2166 #else
2167     PetscBLASInt dummy_int=1;
2168     PetscScalar  dummy_scalar=1.;
2169 #endif
2170 
2171     /* Get index sets for faces, edges and vertices from graph */
2172     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2173     /* free unneeded index sets */
2174     if (!pcbddc->use_vertices) {
2175       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2176     }
2177     if (!pcbddc->use_edges) {
2178       for (i=0;i<n_ISForEdges;i++) {
2179         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2180       }
2181       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2182       n_ISForEdges = 0;
2183     }
2184     if (!pcbddc->use_faces) {
2185       for (i=0;i<n_ISForFaces;i++) {
2186         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2187       }
2188       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2189       n_ISForFaces = 0;
2190     }
2191 
2192 #if defined(PETSC_USE_DEBUG)
2193     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2194        Also use_change_of_basis should be consistent among processors */
2195     if (pcbddc->NullSpace) {
2196       PetscBool tbool[2],gbool[2];
2197 
2198       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2199         pcbddc->use_change_of_basis = PETSC_TRUE;
2200         if (!ISForEdges) {
2201           pcbddc->use_change_on_faces = PETSC_TRUE;
2202         }
2203       }
2204       tbool[0] = pcbddc->use_change_of_basis;
2205       tbool[1] = pcbddc->use_change_on_faces;
2206       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2207       pcbddc->use_change_of_basis = gbool[0];
2208       pcbddc->use_change_on_faces = gbool[1];
2209     }
2210 #endif
2211 
2212     /* check if near null space is attached to global mat */
2213     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2214     if (nearnullsp) {
2215       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2216       /* remove any stored info */
2217       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2218       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2219       /* store information for BDDC solver reuse */
2220       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2221       pcbddc->onearnullspace = nearnullsp;
2222       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2223       for (i=0;i<nnsp_size;i++) {
2224         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2225       }
2226     } else { /* if near null space is not provided BDDC uses constants by default */
2227       nnsp_size = 0;
2228       nnsp_has_cnst = PETSC_TRUE;
2229     }
2230     /* get max number of constraints on a single cc */
2231     max_constraints = nnsp_size;
2232     if (nnsp_has_cnst) max_constraints++;
2233 
2234     /*
2235          Evaluate maximum storage size needed by the procedure
2236          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2237          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2238          There can be multiple constraints per connected component
2239                                                                                                                                                            */
2240     n_vertices = 0;
2241     if (ISForVertices) {
2242       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2243     }
2244     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2245     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2246 
2247     total_counts = n_ISForFaces+n_ISForEdges;
2248     total_counts *= max_constraints;
2249     total_counts += n_vertices;
2250     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2251 
2252     total_counts = 0;
2253     max_size_of_constraint = 0;
2254     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2255       IS used_is;
2256       if (i<n_ISForEdges) {
2257         used_is = ISForEdges[i];
2258       } else {
2259         used_is = ISForFaces[i-n_ISForEdges];
2260       }
2261       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2262       total_counts += j;
2263       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2264     }
2265     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2266 
2267     /* get local part of global near null space vectors */
2268     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2269     for (k=0;k<nnsp_size;k++) {
2270       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2271       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2272       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2273     }
2274 
2275     /* whether or not to skip lapack calls */
2276     skip_lapack = PETSC_TRUE;
2277     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2278 
2279     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2280     if (!skip_lapack) {
2281       PetscScalar temp_work;
2282 
2283 #if defined(PETSC_MISSING_LAPACK_GESVD)
2284       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2285       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2286       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2287       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2288 #if defined(PETSC_USE_COMPLEX)
2289       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2290 #endif
2291       /* now we evaluate the optimal workspace using query with lwork=-1 */
2292       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2293       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2294       lwork = -1;
2295       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2296 #if !defined(PETSC_USE_COMPLEX)
2297       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2298 #else
2299       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2300 #endif
2301       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2302       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2303 #else /* on missing GESVD */
2304       /* SVD */
2305       PetscInt max_n,min_n;
2306       max_n = max_size_of_constraint;
2307       min_n = max_constraints;
2308       if (max_size_of_constraint < max_constraints) {
2309         min_n = max_size_of_constraint;
2310         max_n = max_constraints;
2311       }
2312       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2313 #if defined(PETSC_USE_COMPLEX)
2314       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2315 #endif
2316       /* now we evaluate the optimal workspace using query with lwork=-1 */
2317       lwork = -1;
2318       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2319       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2320       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2321       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2322 #if !defined(PETSC_USE_COMPLEX)
2323       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2324 #else
2325       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2326 #endif
2327       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2328       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2329 #endif /* on missing GESVD */
2330       /* Allocate optimal workspace */
2331       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2332       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2333     }
2334     /* Now we can loop on constraining sets */
2335     total_counts = 0;
2336     constraints_idxs_ptr[0] = 0;
2337     constraints_data_ptr[0] = 0;
2338     /* vertices */
2339     if (n_vertices) {
2340       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2341       if (nnsp_has_cnst) { /* it considers all possible vertices */
2342         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2343         for (i=0;i<n_vertices;i++) {
2344           constraints_n[total_counts] = 1;
2345           constraints_data[total_counts] = 1.0;
2346           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2347           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2348           total_counts++;
2349         }
2350       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2351         PetscBool used_vertex;
2352         for (i=0;i<n_vertices;i++) {
2353           used_vertex = PETSC_FALSE;
2354           k = 0;
2355           while (!used_vertex && k<nnsp_size) {
2356             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2357             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2358               constraints_n[total_counts] = 1;
2359               constraints_idxs[total_counts] = is_indices[i];
2360               constraints_data[total_counts] = 1.0;
2361               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2362               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2363               total_counts++;
2364               used_vertex = PETSC_TRUE;
2365             }
2366             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2367             k++;
2368           }
2369         }
2370       }
2371       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2372       n_vertices = total_counts;
2373     }
2374 
2375     /* edges and faces */
2376     total_counts_cc = total_counts;
2377     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2378       IS        used_is;
2379       PetscBool idxs_copied = PETSC_FALSE;
2380 
2381       if (ncc<n_ISForEdges) {
2382         used_is = ISForEdges[ncc];
2383         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2384       } else {
2385         used_is = ISForFaces[ncc-n_ISForEdges];
2386         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2387       }
2388       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2389 
2390       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2391       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2392       /* change of basis should not be performed on local periodic nodes */
2393       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2394       if (nnsp_has_cnst) {
2395         PetscScalar quad_value;
2396 
2397         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2398         idxs_copied = PETSC_TRUE;
2399 
2400         if (!pcbddc->use_nnsp_true) {
2401           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2402         } else {
2403           quad_value = 1.0;
2404         }
2405         for (j=0;j<size_of_constraint;j++) {
2406           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2407         }
2408         temp_constraints++;
2409         total_counts++;
2410       }
2411       for (k=0;k<nnsp_size;k++) {
2412         PetscReal real_value;
2413         PetscScalar *ptr_to_data;
2414 
2415         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2416         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2417         for (j=0;j<size_of_constraint;j++) {
2418           ptr_to_data[j] = array[is_indices[j]];
2419         }
2420         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2421         /* check if array is null on the connected component */
2422         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2423         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2424         if (real_value > 0.0) { /* keep indices and values */
2425           temp_constraints++;
2426           total_counts++;
2427           if (!idxs_copied) {
2428             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2429             idxs_copied = PETSC_TRUE;
2430           }
2431         }
2432       }
2433       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2434       valid_constraints = temp_constraints;
2435       if (!pcbddc->use_nnsp_true && temp_constraints) {
2436         if (temp_constraints == 1) { /* just normalize the constraint */
2437           PetscScalar norm,*ptr_to_data;
2438 
2439           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2440           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2441           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2442           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2443           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2444         } else { /* perform SVD */
2445           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2446           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2447 
2448 #if defined(PETSC_MISSING_LAPACK_GESVD)
2449           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2450              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2451              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2452                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2453                 from that computed using LAPACKgesvd
2454              -> This is due to a different computation of eigenvectors in LAPACKheev
2455              -> The quality of the POD-computed basis will be the same */
2456           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2457           /* Store upper triangular part of correlation matrix */
2458           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2459           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2460           for (j=0;j<temp_constraints;j++) {
2461             for (k=0;k<j+1;k++) {
2462               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2463             }
2464           }
2465           /* compute eigenvalues and eigenvectors of correlation matrix */
2466           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2467           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2468 #if !defined(PETSC_USE_COMPLEX)
2469           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2470 #else
2471           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2472 #endif
2473           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2474           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2475           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2476           j = 0;
2477           while (j < temp_constraints && singular_vals[j] < tol) j++;
2478           total_counts = total_counts-j;
2479           valid_constraints = temp_constraints-j;
2480           /* scale and copy POD basis into used quadrature memory */
2481           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2482           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2483           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2484           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2485           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2486           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2487           if (j<temp_constraints) {
2488             PetscInt ii;
2489             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2490             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2491             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2492             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2493             for (k=0;k<temp_constraints-j;k++) {
2494               for (ii=0;ii<size_of_constraint;ii++) {
2495                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2496               }
2497             }
2498           }
2499 #else  /* on missing GESVD */
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2501           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2502           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2503           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2504 #if !defined(PETSC_USE_COMPLEX)
2505           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2506 #else
2507           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2508 #endif
2509           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2510           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2511           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2512           k = temp_constraints;
2513           if (k > size_of_constraint) k = size_of_constraint;
2514           j = 0;
2515           while (j < k && singular_vals[k-j-1] < tol) j++;
2516           valid_constraints = k-j;
2517           total_counts = total_counts-temp_constraints+valid_constraints;
2518 #endif /* on missing GESVD */
2519         }
2520       }
2521       /* update pointers information */
2522       if (valid_constraints) {
2523         constraints_n[total_counts_cc] = valid_constraints;
2524         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2525         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2526         /* set change_of_basis flag */
2527         if (boolforchange) {
2528           PetscBTSet(change_basis,total_counts_cc);
2529         }
2530         total_counts_cc++;
2531       }
2532     }
2533     /* free workspace */
2534     if (!skip_lapack) {
2535       ierr = PetscFree(work);CHKERRQ(ierr);
2536 #if defined(PETSC_USE_COMPLEX)
2537       ierr = PetscFree(rwork);CHKERRQ(ierr);
2538 #endif
2539       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2540 #if defined(PETSC_MISSING_LAPACK_GESVD)
2541       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2542       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2543 #endif
2544     }
2545     for (k=0;k<nnsp_size;k++) {
2546       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2547     }
2548     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2549     /* free index sets of faces, edges and vertices */
2550     for (i=0;i<n_ISForFaces;i++) {
2551       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2552     }
2553     if (n_ISForFaces) {
2554       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2555     }
2556     for (i=0;i<n_ISForEdges;i++) {
2557       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2558     }
2559     if (n_ISForEdges) {
2560       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2561     }
2562     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2563   } else {
2564     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2565 
2566     total_counts = 0;
2567     n_vertices = 0;
2568     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2569       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2570     }
2571     max_constraints = 0;
2572     total_counts_cc = 0;
2573     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2574       total_counts += pcbddc->adaptive_constraints_n[i];
2575       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2576       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2577     }
2578     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2579     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2580     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2581     constraints_data = pcbddc->adaptive_constraints_data;
2582     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2583     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2584     total_counts_cc = 0;
2585     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2586       if (pcbddc->adaptive_constraints_n[i]) {
2587         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2588       }
2589     }
2590 #if 0
2591     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2592     for (i=0;i<total_counts_cc;i++) {
2593       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2594       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2595       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2596         printf(" %d",constraints_idxs[j]);
2597       }
2598       printf("\n");
2599       printf("number of cc: %d\n",constraints_n[i]);
2600     }
2601     for (i=0;i<n_vertices;i++) {
2602       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2603     }
2604     for (i=0;i<sub_schurs->n_subs;i++) {
2605       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2606     }
2607 #endif
2608 
2609     max_size_of_constraint = 0;
2610     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2611     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2612     /* Change of basis */
2613     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2614     if (pcbddc->use_change_of_basis) {
2615       for (i=0;i<sub_schurs->n_subs;i++) {
2616         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2617           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2618         }
2619       }
2620     }
2621   }
2622   pcbddc->local_primal_size = total_counts;
2623   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2624   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2625 
2626   /* map constraints_idxs in boundary numbering */
2627   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2628   if (i != constraints_idxs_ptr[total_counts_cc]) {
2629     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2630   }
2631 
2632   /* Create constraint matrix */
2633   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2634   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2635   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2636 
2637   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2638   /* determine if a QR strategy is needed for change of basis */
2639   qr_needed = PETSC_FALSE;
2640   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2641   total_primal_vertices=0;
2642   pcbddc->local_primal_size_cc = 0;
2643   for (i=0;i<total_counts_cc;i++) {
2644     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2645     if (size_of_constraint == 1) {
2646       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2647       pcbddc->local_primal_size_cc += 1;
2648     } else if (PetscBTLookup(change_basis,i)) {
2649       for (k=0;k<constraints_n[i];k++) {
2650         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2651       }
2652       pcbddc->local_primal_size_cc += constraints_n[i];
2653       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2654         PetscBTSet(qr_needed_idx,i);
2655         qr_needed = PETSC_TRUE;
2656       }
2657     } else {
2658       pcbddc->local_primal_size_cc += 1;
2659     }
2660   }
2661   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2662   pcbddc->n_vertices = total_primal_vertices;
2663   /* permute indices in order to have a sorted set of vertices */
2664   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2665 
2666   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2667   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2668   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2669   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2670 
2671   /* nonzero structure of constraint matrix */
2672   /* and get reference dof for local constraints */
2673   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2674   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2675 
2676   j = total_primal_vertices;
2677   total_counts = total_primal_vertices;
2678   cum = total_primal_vertices;
2679   for (i=n_vertices;i<total_counts_cc;i++) {
2680     if (!PetscBTLookup(change_basis,i)) {
2681       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2682       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2683       cum++;
2684       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2685       for (k=0;k<constraints_n[i];k++) {
2686         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2687         nnz[j+k] = size_of_constraint;
2688       }
2689       j += constraints_n[i];
2690     }
2691   }
2692   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2693   ierr = PetscFree(nnz);CHKERRQ(ierr);
2694 
2695   /* set values in constraint matrix */
2696   for (i=0;i<total_primal_vertices;i++) {
2697     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2698   }
2699   total_counts = total_primal_vertices;
2700   for (i=n_vertices;i<total_counts_cc;i++) {
2701     if (!PetscBTLookup(change_basis,i)) {
2702       PetscInt *cols;
2703 
2704       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2705       cols = constraints_idxs+constraints_idxs_ptr[i];
2706       for (k=0;k<constraints_n[i];k++) {
2707         PetscInt    row = total_counts+k;
2708         PetscScalar *vals;
2709 
2710         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2711         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2712       }
2713       total_counts += constraints_n[i];
2714     }
2715   }
2716   /* assembling */
2717   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2718   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2719 
2720   /*
2721   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2722   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2723   */
2724   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2725   if (pcbddc->use_change_of_basis) {
2726     /* dual and primal dofs on a single cc */
2727     PetscInt     dual_dofs,primal_dofs;
2728     /* working stuff for GEQRF */
2729     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2730     PetscBLASInt lqr_work;
2731     /* working stuff for UNGQR */
2732     PetscScalar  *gqr_work,lgqr_work_t;
2733     PetscBLASInt lgqr_work;
2734     /* working stuff for TRTRS */
2735     PetscScalar  *trs_rhs;
2736     PetscBLASInt Blas_NRHS;
2737     /* pointers for values insertion into change of basis matrix */
2738     PetscInt     *start_rows,*start_cols;
2739     PetscScalar  *start_vals;
2740     /* working stuff for values insertion */
2741     PetscBT      is_primal;
2742     PetscInt     *aux_primal_numbering_B;
2743     /* matrix sizes */
2744     PetscInt     global_size,local_size;
2745     /* temporary change of basis */
2746     Mat          localChangeOfBasisMatrix;
2747     /* extra space for debugging */
2748     PetscScalar  *dbg_work;
2749 
2750     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2751     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2752     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2753     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2754     /* nonzeros for local mat */
2755     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2756     for (i=0;i<pcis->n;i++) nnz[i]=1;
2757     for (i=n_vertices;i<total_counts_cc;i++) {
2758       if (PetscBTLookup(change_basis,i)) {
2759         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2760         if (PetscBTLookup(qr_needed_idx,i)) {
2761           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2762         } else {
2763           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2764           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2765         }
2766       }
2767     }
2768     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2769     ierr = PetscFree(nnz);CHKERRQ(ierr);
2770     /* Set initial identity in the matrix */
2771     for (i=0;i<pcis->n;i++) {
2772       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2773     }
2774 
2775     if (pcbddc->dbg_flag) {
2776       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2777       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2778     }
2779 
2780 
2781     /* Now we loop on the constraints which need a change of basis */
2782     /*
2783        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2784        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2785 
2786        Basic blocks of change of basis matrix T computed by
2787 
2788           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2789 
2790             | 1        0   ...        0         s_1/S |
2791             | 0        1   ...        0         s_2/S |
2792             |              ...                        |
2793             | 0        ...            1     s_{n-1}/S |
2794             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2795 
2796             with S = \sum_{i=1}^n s_i^2
2797             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2798                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2799 
2800           - QR decomposition of constraints otherwise
2801     */
2802     if (qr_needed) {
2803       /* space to store Q */
2804       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2805       /* first we issue queries for optimal work */
2806       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2807       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2808       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2809       lqr_work = -1;
2810       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2811       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2812       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2813       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2814       lgqr_work = -1;
2815       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2816       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2817       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2818       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2819       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2820       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2821       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2822       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2823       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2824       /* array to store scaling factors for reflectors */
2825       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2826       /* array to store rhs and solution of triangular solver */
2827       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2828       /* allocating workspace for check */
2829       if (pcbddc->dbg_flag) {
2830         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2831       }
2832     }
2833     /* array to store whether a node is primal or not */
2834     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2835     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2836     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2837     if (i != total_primal_vertices) {
2838       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2839     }
2840     for (i=0;i<total_primal_vertices;i++) {
2841       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2842     }
2843     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2844 
2845     /* loop on constraints and see whether or not they need a change of basis and compute it */
2846     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2847       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2848       if (PetscBTLookup(change_basis,total_counts)) {
2849         /* get constraint info */
2850         primal_dofs = constraints_n[total_counts];
2851         dual_dofs = size_of_constraint-primal_dofs;
2852 
2853         if (pcbddc->dbg_flag) {
2854           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2855         }
2856 
2857         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2858 
2859           /* copy quadrature constraints for change of basis check */
2860           if (pcbddc->dbg_flag) {
2861             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2862           }
2863           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2864           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2865 
2866           /* compute QR decomposition of constraints */
2867           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2868           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2869           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2870           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2871           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2872           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2873           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2874 
2875           /* explictly compute R^-T */
2876           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2877           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2878           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2879           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2880           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2881           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2882           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2883           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2884           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2885           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2886 
2887           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2888           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2889           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2890           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2891           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2892           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2893           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2894           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2895           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2896 
2897           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2898              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2899              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2900           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2901           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2902           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2903           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2904           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2905           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2906           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2907           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2908           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2909           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2910 
2911           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2912           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2913           /* insert cols for primal dofs */
2914           for (j=0;j<primal_dofs;j++) {
2915             start_vals = &qr_basis[j*size_of_constraint];
2916             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2917             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2918           }
2919           /* insert cols for dual dofs */
2920           for (j=0,k=0;j<dual_dofs;k++) {
2921             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2922               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2923               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2924               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2925               j++;
2926             }
2927           }
2928 
2929           /* check change of basis */
2930           if (pcbddc->dbg_flag) {
2931             PetscInt   ii,jj;
2932             PetscBool valid_qr=PETSC_TRUE;
2933             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2934             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2935             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2936             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2937             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2938             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2939             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2940             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2941             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2942             for (jj=0;jj<size_of_constraint;jj++) {
2943               for (ii=0;ii<primal_dofs;ii++) {
2944                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2945                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2946               }
2947             }
2948             if (!valid_qr) {
2949               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2950               for (jj=0;jj<size_of_constraint;jj++) {
2951                 for (ii=0;ii<primal_dofs;ii++) {
2952                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2953                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2954                   }
2955                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2956                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2957                   }
2958                 }
2959               }
2960             } else {
2961               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2962             }
2963           }
2964         } else { /* simple transformation block */
2965           PetscInt    row,col;
2966           PetscScalar val,norm;
2967 
2968           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2969           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2970           for (j=0;j<size_of_constraint;j++) {
2971             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2972             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2973             if (!PetscBTLookup(is_primal,row_B)) {
2974               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2975               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2976               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2977             } else {
2978               for (k=0;k<size_of_constraint;k++) {
2979                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2980                 if (row != col) {
2981                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2982                 } else {
2983                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2984                 }
2985                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2986               }
2987             }
2988           }
2989           if (pcbddc->dbg_flag) {
2990             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2991           }
2992         }
2993       } else {
2994         if (pcbddc->dbg_flag) {
2995           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2996         }
2997       }
2998     }
2999 
3000     /* free workspace */
3001     if (qr_needed) {
3002       if (pcbddc->dbg_flag) {
3003         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3004       }
3005       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3006       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3007       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3008       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3009       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3010     }
3011     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3012     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3013     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3014 
3015     /* assembling of global change of variable */
3016     {
3017       Mat      tmat;
3018       PetscInt bs;
3019 
3020       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3021       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3022       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3023       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3024       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3025       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3026       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3027       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3028       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3029       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3030       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3031       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3032       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3033       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3034       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3035       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3036       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3037       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3038     }
3039     /* check */
3040     if (pcbddc->dbg_flag) {
3041       PetscReal error;
3042       Vec       x,x_change;
3043 
3044       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3045       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3046       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3047       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3048       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3049       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3050       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3051       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3052       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3053       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3054       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3055       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3056       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3057       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3058       ierr = VecDestroy(&x);CHKERRQ(ierr);
3059       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3060     }
3061 
3062     /* adapt sub_schurs computed (if any) */
3063     if (pcbddc->use_deluxe_scaling) {
3064       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3065       if (sub_schurs->S_Ej_all) {
3066         Mat S_new,tmat;
3067         IS is_all_N;
3068 
3069         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3070         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3071         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3072         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3073         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3074         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3075         sub_schurs->S_Ej_all = S_new;
3076         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3077         if (sub_schurs->sum_S_Ej_all) {
3078           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3079           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3080           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3081           sub_schurs->sum_S_Ej_all = S_new;
3082           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3083         }
3084         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3085       }
3086     }
3087     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3088   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3089     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3090     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3091   }
3092 
3093   /* set up change of basis context */
3094   if (pcbddc->ChangeOfBasisMatrix) {
3095     PCBDDCChange_ctx change_ctx;
3096 
3097     if (!pcbddc->new_global_mat) {
3098       PetscInt global_size,local_size;
3099 
3100       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3101       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3102       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3103       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3104       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3105       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3106       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3107       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3108       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3109     } else {
3110       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3111       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3112       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3113     }
3114     if (!pcbddc->user_ChangeOfBasisMatrix) {
3115       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3116       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3117     } else {
3118       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3119       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3120     }
3121     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3122     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3123     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3125   }
3126 
3127   /* add pressure dof to set of primal nodes for numbering purposes */
3128   if (pcbddc->zerodiag) {
3129     const PetscInt *idxs;
3130     PetscInt       nz;
3131 
3132     ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr);
3133     ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
3134     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1];
3135     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1];
3136     ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
3137     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3138     pcbddc->local_primal_size_cc++;
3139     pcbddc->local_primal_size++;
3140   }
3141 
3142   /* check if a new primal space has been introduced (also take into account benign trick) */
3143   pcbddc->new_primal_space_local = PETSC_TRUE;
3144   if (olocal_primal_size == pcbddc->local_primal_size) {
3145     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3146     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3147     if (!pcbddc->new_primal_space_local) {
3148       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3149       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3150     }
3151   }
3152   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3153   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3154   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3155 
3156   /* flush dbg viewer */
3157   if (pcbddc->dbg_flag) {
3158     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3159   }
3160 
3161   /* free workspace */
3162   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3163   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3164   if (!pcbddc->adaptive_selection) {
3165     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3166     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3167   } else {
3168     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3169                       pcbddc->adaptive_constraints_idxs_ptr,
3170                       pcbddc->adaptive_constraints_data_ptr,
3171                       pcbddc->adaptive_constraints_idxs,
3172                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3173     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3174     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3175   }
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 #undef __FUNCT__
3180 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3181 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3182 {
3183   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3184   PC_IS       *pcis = (PC_IS*)pc->data;
3185   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3186   PetscInt    ierr,i,vertex_size,N;
3187   PetscViewer viewer=pcbddc->dbg_viewer;
3188 
3189   PetscFunctionBegin;
3190   /* Reset previously computed graph */
3191   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3192   /* Init local Graph struct */
3193   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3194   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3195 
3196   /* Check validity of the csr graph passed in by the user */
3197   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3198     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3199   }
3200 
3201   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3202   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3203     PetscInt  *xadj,*adjncy;
3204     PetscInt  nvtxs;
3205     PetscBool flg_row=PETSC_FALSE;
3206 
3207     if (pcbddc->use_local_adj) {
3208 
3209       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3210       if (flg_row) {
3211         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3212         pcbddc->computed_rowadj = PETSC_TRUE;
3213       }
3214       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3215     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3216       IS                     is_dummy;
3217       ISLocalToGlobalMapping l2gmap_dummy;
3218       PetscInt               j,sum;
3219       PetscInt               *cxadj,*cadjncy;
3220       const PetscInt         *idxs;
3221       PCBDDCGraph            graph;
3222       PetscBT                is_on_boundary;
3223 
3224       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3225       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3226       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3227       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3228       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3229       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3230       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3231       if (flg_row) {
3232         graph->xadj = xadj;
3233         graph->adjncy = adjncy;
3234       }
3235       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3236       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3237       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3238 
3239       if (pcbddc->dbg_flag) {
3240         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3241         for (i=0;i<graph->ncc;i++) {
3242           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3243         }
3244       }
3245 
3246       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3247       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3248       for (i=0;i<pcis->n_B;i++) {
3249         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3250       }
3251       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3252 
3253       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3254       sum = 0;
3255       for (i=0;i<graph->ncc;i++) {
3256         PetscInt sizecc = 0;
3257         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3258           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3259             sizecc++;
3260           }
3261         }
3262         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3263           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3264             cxadj[graph->queue[j]] = sizecc;
3265           }
3266         }
3267         sum += sizecc*sizecc;
3268       }
3269       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3270       sum = 0;
3271       for (i=0;i<pcis->n;i++) {
3272         PetscInt temp = cxadj[i];
3273         cxadj[i] = sum;
3274         sum += temp;
3275       }
3276       cxadj[pcis->n] = sum;
3277       for (i=0;i<graph->ncc;i++) {
3278         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3279           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3280             PetscInt k,sizecc = 0;
3281             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3282               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3283                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3284                 sizecc++;
3285               }
3286             }
3287           }
3288         }
3289       }
3290       if (sum) {
3291         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3292       } else {
3293         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3294         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3295       }
3296       graph->xadj = 0;
3297       graph->adjncy = 0;
3298       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3299       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3300     }
3301   }
3302   if (pcbddc->dbg_flag) {
3303     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3304   }
3305 
3306   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3307   vertex_size = 1;
3308   if (pcbddc->user_provided_isfordofs) {
3309     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3310       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3311       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3312         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3313         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3314       }
3315       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3316       pcbddc->n_ISForDofs = 0;
3317       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3318     }
3319     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3320     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3321   } else {
3322     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3323       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3324       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3325       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3326         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3327       }
3328     }
3329   }
3330 
3331   /* Setup of Graph */
3332   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3333     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3334   }
3335   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3336     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3337   }
3338   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3339 
3340   /* Graph's connected components analysis */
3341   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3342 
3343   /* print some info to stdout */
3344   if (pcbddc->dbg_flag) {
3345     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3346   }
3347 
3348   /* mark topography has done */
3349   pcbddc->recompute_topography = PETSC_FALSE;
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3354 #undef __FUNCT__
3355 #define __FUNCT__ "PCBDDCSubsetNumbering"
3356 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3357 {
3358   PetscSF        sf;
3359   PetscLayout    map;
3360   const PetscInt *idxs;
3361   PetscInt       *leaf_data,*root_data,*gidxs;
3362   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3363   PetscInt       n_n,nlocals,start,first_index;
3364   PetscMPIInt    commsize;
3365   PetscBool      first_found;
3366   PetscErrorCode ierr;
3367 
3368   PetscFunctionBegin;
3369   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3370   if (subset_mult) {
3371     PetscCheckSameComm(subset,1,subset_mult,2);
3372     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3373     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3374   }
3375   /* create workspace layout for computing global indices of subset */
3376   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3377   lbounds[0] = lbounds[1] = 0;
3378   for (i=0;i<n;i++) {
3379     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3380     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3381   }
3382   lbounds[0] = -lbounds[0];
3383   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3384   gbounds[0] = -gbounds[0];
3385   N = gbounds[1] - gbounds[0] + 1;
3386   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3387   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3388   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3389   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3390   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3391 
3392   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3393   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3394   if (subset_mult) {
3395     const PetscInt* idxs_mult;
3396 
3397     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3398     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3399     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3400   } else {
3401     for (i=0;i<n;i++) leaf_data[i] = 1;
3402   }
3403   /* local size of new subset */
3404   n_n = 0;
3405   for (i=0;i<n;i++) n_n += leaf_data[i];
3406 
3407   /* global indexes in layout */
3408   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3409   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3410   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3411   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3412   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3413   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3414 
3415   /* reduce from leaves to roots */
3416   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3417   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3418   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3419 
3420   /* count indexes in local part of layout */
3421   nlocals = 0;
3422   first_index = -1;
3423   first_found = PETSC_FALSE;
3424   for (i=0;i<Nl;i++) {
3425     if (!first_found && root_data[i]) {
3426       first_found = PETSC_TRUE;
3427       first_index = i;
3428     }
3429     nlocals += root_data[i];
3430   }
3431 
3432   /* cumulative of number of indexes and size of subset without holes */
3433 #if defined(PETSC_HAVE_MPI_EXSCAN)
3434   start = 0;
3435   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3436 #else
3437   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3438   start = start-nlocals;
3439 #endif
3440 
3441   if (N_n) { /* compute total size of new subset if requested */
3442     *N_n = start + nlocals;
3443     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3444     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3445   }
3446 
3447   /* adapt root data with cumulative */
3448   if (first_found) {
3449     PetscInt old_index;
3450 
3451     root_data[first_index] += start;
3452     old_index = first_index;
3453     for (i=first_index+1;i<Nl;i++) {
3454       if (root_data[i]) {
3455         root_data[i] += root_data[old_index];
3456         old_index = i;
3457       }
3458     }
3459   }
3460 
3461   /* from roots to leaves */
3462   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3463   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3464   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3465 
3466   /* create new IS with global indexes without holes */
3467   if (subset_mult) {
3468     const PetscInt* idxs_mult;
3469     PetscInt        cum;
3470 
3471     cum = 0;
3472     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3473     for (i=0;i<n;i++) {
3474       PetscInt j;
3475       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3476     }
3477     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3478   } else {
3479     for (i=0;i<n;i++) {
3480       gidxs[i] = leaf_data[i]-1;
3481     }
3482   }
3483   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3484   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3485   PetscFunctionReturn(0);
3486 }
3487 
3488 #undef __FUNCT__
3489 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3490 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3491 {
3492   PetscInt       i,j;
3493   PetscScalar    *alphas;
3494   PetscErrorCode ierr;
3495 
3496   PetscFunctionBegin;
3497   /* this implements stabilized Gram-Schmidt */
3498   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3499   for (i=0;i<n;i++) {
3500     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3501     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3502     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3503   }
3504   ierr = PetscFree(alphas);CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatISGetSubassemblingPattern"
3510 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3511 {
3512   IS             ranks_send_to;
3513   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3514   PetscMPIInt    size,rank,color;
3515   PetscInt       *xadj,*adjncy;
3516   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3517   PetscInt       i,local_size,threshold=0;
3518   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3519   PetscSubcomm   subcomm;
3520   PetscErrorCode ierr;
3521 
3522   PetscFunctionBegin;
3523   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3524   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3525   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3526 
3527   /* Get info on mapping */
3528   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3529   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3530 
3531   /* build local CSR graph of subdomains' connectivity */
3532   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3533   xadj[0] = 0;
3534   xadj[1] = PetscMax(n_neighs-1,0);
3535   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3536   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3537 
3538   if (threshold) {
3539     PetscInt xadj_count = 0;
3540     for (i=1;i<n_neighs;i++) {
3541       if (n_shared[i] > threshold) {
3542         adjncy[xadj_count] = neighs[i];
3543         adjncy_wgt[xadj_count] = n_shared[i];
3544         xadj_count++;
3545       }
3546     }
3547     xadj[1] = xadj_count;
3548   } else {
3549     if (xadj[1]) {
3550       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3551       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3552     }
3553   }
3554   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3555   if (use_square) {
3556     for (i=0;i<xadj[1];i++) {
3557       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3558     }
3559   }
3560   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3561 
3562   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3563 
3564   /*
3565     Restrict work on active processes only.
3566   */
3567   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3568   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3569   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3570   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3571   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3572   if (color) {
3573     ierr = PetscFree(xadj);CHKERRQ(ierr);
3574     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3575     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3576   } else {
3577     Mat             subdomain_adj;
3578     IS              new_ranks,new_ranks_contig;
3579     MatPartitioning partitioner;
3580     PetscInt        prank,rstart=0,rend=0;
3581     PetscInt        *is_indices,*oldranks;
3582     PetscBool       aggregate;
3583 
3584     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3585     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3586     prank = rank;
3587     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3588     /*
3589     for (i=0;i<size;i++) {
3590       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3591     }
3592     */
3593     for (i=0;i<xadj[1];i++) {
3594       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3595     }
3596     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3597     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3598     if (aggregate) {
3599       PetscInt    lrows,row,ncols,*cols;
3600       PetscMPIInt nrank;
3601       PetscScalar *vals;
3602 
3603       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3604       lrows = 0;
3605       if (nrank<redprocs) {
3606         lrows = size/redprocs;
3607         if (nrank<size%redprocs) lrows++;
3608       }
3609       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3610       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3611       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3612       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3613       row = nrank;
3614       ncols = xadj[1]-xadj[0];
3615       cols = adjncy;
3616       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3617       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3618       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3619       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3621       ierr = PetscFree(xadj);CHKERRQ(ierr);
3622       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3623       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3624       ierr = PetscFree(vals);CHKERRQ(ierr);
3625     } else {
3626       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3627     }
3628     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3629 
3630     /* Partition */
3631     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3632     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3633     if (use_vwgt) {
3634       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3635       v_wgt[0] = local_size;
3636       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3637     }
3638     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3639     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3640     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3641     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3642     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3643 
3644     /* renumber new_ranks to avoid "holes" in new set of processors */
3645     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3646     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3647     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3648     if (!redprocs) {
3649       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3650     } else {
3651       PetscInt    idxs[1];
3652       PetscMPIInt tag;
3653       MPI_Request *reqs;
3654 
3655       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3656       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3657       for (i=rstart;i<rend;i++) {
3658         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3659       }
3660       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3661       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3662       ierr = PetscFree(reqs);CHKERRQ(ierr);
3663       ranks_send_to_idx[0] = oldranks[idxs[0]];
3664     }
3665     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3666     /* clean up */
3667     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3668     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3669     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3670     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3671   }
3672   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3673 
3674   /* assemble parallel IS for sends */
3675   i = 1;
3676   if (color) i=0;
3677   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3678   /* get back IS */
3679   *is_sends = ranks_send_to;
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3684 
3685 #undef __FUNCT__
3686 #define __FUNCT__ "MatISSubassemble"
3687 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3688 {
3689   Mat                    local_mat;
3690   IS                     is_sends_internal;
3691   PetscInt               rows,cols,new_local_rows;
3692   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3693   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3694   ISLocalToGlobalMapping l2gmap;
3695   PetscInt*              l2gmap_indices;
3696   const PetscInt*        is_indices;
3697   MatType                new_local_type;
3698   /* buffers */
3699   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3700   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3701   PetscInt               *recv_buffer_idxs_local;
3702   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3703   /* MPI */
3704   MPI_Comm               comm,comm_n;
3705   PetscSubcomm           subcomm;
3706   PetscMPIInt            n_sends,n_recvs,commsize;
3707   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3708   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3709   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3710   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3711   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3712   PetscErrorCode         ierr;
3713 
3714   PetscFunctionBegin;
3715   /* TODO: add missing checks */
3716   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3717   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3718   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3719   PetscValidLogicalCollectiveInt(mat,nis,7);
3720   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3721   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3722   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3723   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3724   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3725   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3726   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3727   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3728     PetscInt mrows,mcols,mnrows,mncols;
3729     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3730     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3731     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3732     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3733     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3734     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3735   }
3736   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3737   PetscValidLogicalCollectiveInt(mat,bs,0);
3738   /* prepare IS for sending if not provided */
3739   if (!is_sends) {
3740     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3741     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3742   } else {
3743     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3744     is_sends_internal = is_sends;
3745   }
3746 
3747   /* get comm */
3748   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3749 
3750   /* compute number of sends */
3751   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3752   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3753 
3754   /* compute number of receives */
3755   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3756   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3757   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3758   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3759   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3760   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3761   ierr = PetscFree(iflags);CHKERRQ(ierr);
3762 
3763   /* restrict comm if requested */
3764   subcomm = 0;
3765   destroy_mat = PETSC_FALSE;
3766   if (restrict_comm) {
3767     PetscMPIInt color,subcommsize;
3768 
3769     color = 0;
3770     if (restrict_full) {
3771       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3772     } else {
3773       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3774     }
3775     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3776     subcommsize = commsize - subcommsize;
3777     /* check if reuse has been requested */
3778     if (reuse == MAT_REUSE_MATRIX) {
3779       if (*mat_n) {
3780         PetscMPIInt subcommsize2;
3781         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3782         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3783         comm_n = PetscObjectComm((PetscObject)*mat_n);
3784       } else {
3785         comm_n = PETSC_COMM_SELF;
3786       }
3787     } else { /* MAT_INITIAL_MATRIX */
3788       PetscMPIInt rank;
3789 
3790       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3791       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3792       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3793       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3794       comm_n = PetscSubcommChild(subcomm);
3795     }
3796     /* flag to destroy *mat_n if not significative */
3797     if (color) destroy_mat = PETSC_TRUE;
3798   } else {
3799     comm_n = comm;
3800   }
3801 
3802   /* prepare send/receive buffers */
3803   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3804   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3805   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3806   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3807   if (nis) {
3808     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3809   }
3810 
3811   /* Get data from local matrices */
3812   if (!isdense) {
3813     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3814     /* TODO: See below some guidelines on how to prepare the local buffers */
3815     /*
3816        send_buffer_vals should contain the raw values of the local matrix
3817        send_buffer_idxs should contain:
3818        - MatType_PRIVATE type
3819        - PetscInt        size_of_l2gmap
3820        - PetscInt        global_row_indices[size_of_l2gmap]
3821        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3822     */
3823   } else {
3824     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3825     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3826     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3827     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3828     send_buffer_idxs[1] = i;
3829     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3830     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3831     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3832     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3833     for (i=0;i<n_sends;i++) {
3834       ilengths_vals[is_indices[i]] = len*len;
3835       ilengths_idxs[is_indices[i]] = len+2;
3836     }
3837   }
3838   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3839   /* additional is (if any) */
3840   if (nis) {
3841     PetscMPIInt psum;
3842     PetscInt j;
3843     for (j=0,psum=0;j<nis;j++) {
3844       PetscInt plen;
3845       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3846       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3847       psum += len+1; /* indices + lenght */
3848     }
3849     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3850     for (j=0,psum=0;j<nis;j++) {
3851       PetscInt plen;
3852       const PetscInt *is_array_idxs;
3853       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3854       send_buffer_idxs_is[psum] = plen;
3855       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3856       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3857       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3858       psum += plen+1; /* indices + lenght */
3859     }
3860     for (i=0;i<n_sends;i++) {
3861       ilengths_idxs_is[is_indices[i]] = psum;
3862     }
3863     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3864   }
3865 
3866   buf_size_idxs = 0;
3867   buf_size_vals = 0;
3868   buf_size_idxs_is = 0;
3869   for (i=0;i<n_recvs;i++) {
3870     buf_size_idxs += (PetscInt)olengths_idxs[i];
3871     buf_size_vals += (PetscInt)olengths_vals[i];
3872     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3873   }
3874   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3875   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3876   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3877 
3878   /* get new tags for clean communications */
3879   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3880   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3881   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3882 
3883   /* allocate for requests */
3884   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3885   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3886   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3887   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3888   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3889   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3890 
3891   /* communications */
3892   ptr_idxs = recv_buffer_idxs;
3893   ptr_vals = recv_buffer_vals;
3894   ptr_idxs_is = recv_buffer_idxs_is;
3895   for (i=0;i<n_recvs;i++) {
3896     source_dest = onodes[i];
3897     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3898     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3899     ptr_idxs += olengths_idxs[i];
3900     ptr_vals += olengths_vals[i];
3901     if (nis) {
3902       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3903       ptr_idxs_is += olengths_idxs_is[i];
3904     }
3905   }
3906   for (i=0;i<n_sends;i++) {
3907     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3908     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3909     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3910     if (nis) {
3911       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3912     }
3913   }
3914   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3915   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3916 
3917   /* assemble new l2g map */
3918   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3919   ptr_idxs = recv_buffer_idxs;
3920   new_local_rows = 0;
3921   for (i=0;i<n_recvs;i++) {
3922     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3923     ptr_idxs += olengths_idxs[i];
3924   }
3925   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3926   ptr_idxs = recv_buffer_idxs;
3927   new_local_rows = 0;
3928   for (i=0;i<n_recvs;i++) {
3929     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3930     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3931     ptr_idxs += olengths_idxs[i];
3932   }
3933   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3934   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3935   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3936 
3937   /* infer new local matrix type from received local matrices type */
3938   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3939   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3940   if (n_recvs) {
3941     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3942     ptr_idxs = recv_buffer_idxs;
3943     for (i=0;i<n_recvs;i++) {
3944       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3945         new_local_type_private = MATAIJ_PRIVATE;
3946         break;
3947       }
3948       ptr_idxs += olengths_idxs[i];
3949     }
3950     switch (new_local_type_private) {
3951       case MATDENSE_PRIVATE:
3952         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3953           new_local_type = MATSEQAIJ;
3954           bs = 1;
3955         } else { /* if I receive only 1 dense matrix */
3956           new_local_type = MATSEQDENSE;
3957           bs = 1;
3958         }
3959         break;
3960       case MATAIJ_PRIVATE:
3961         new_local_type = MATSEQAIJ;
3962         bs = 1;
3963         break;
3964       case MATBAIJ_PRIVATE:
3965         new_local_type = MATSEQBAIJ;
3966         break;
3967       case MATSBAIJ_PRIVATE:
3968         new_local_type = MATSEQSBAIJ;
3969         break;
3970       default:
3971         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3972         break;
3973     }
3974   } else { /* by default, new_local_type is seqdense */
3975     new_local_type = MATSEQDENSE;
3976     bs = 1;
3977   }
3978 
3979   /* create MATIS object if needed */
3980   if (reuse == MAT_INITIAL_MATRIX) {
3981     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3982     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
3983   } else {
3984     /* it also destroys the local matrices */
3985     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3986   }
3987   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3988   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3989 
3990   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3991 
3992   /* Global to local map of received indices */
3993   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3994   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3995   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3996 
3997   /* restore attributes -> type of incoming data and its size */
3998   buf_size_idxs = 0;
3999   for (i=0;i<n_recvs;i++) {
4000     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4001     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4002     buf_size_idxs += (PetscInt)olengths_idxs[i];
4003   }
4004   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4005 
4006   /* set preallocation */
4007   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4008   if (!newisdense) {
4009     PetscInt *new_local_nnz=0;
4010 
4011     ptr_vals = recv_buffer_vals;
4012     ptr_idxs = recv_buffer_idxs_local;
4013     if (n_recvs) {
4014       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4015     }
4016     for (i=0;i<n_recvs;i++) {
4017       PetscInt j;
4018       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4019         for (j=0;j<*(ptr_idxs+1);j++) {
4020           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4021         }
4022       } else {
4023         /* TODO */
4024       }
4025       ptr_idxs += olengths_idxs[i];
4026     }
4027     if (new_local_nnz) {
4028       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4029       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4030       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4031       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4032       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4033       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4034     } else {
4035       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4036     }
4037     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4038   } else {
4039     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4040   }
4041 
4042   /* set values */
4043   ptr_vals = recv_buffer_vals;
4044   ptr_idxs = recv_buffer_idxs_local;
4045   for (i=0;i<n_recvs;i++) {
4046     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4047       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4048       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4049       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4050       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4051       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4052     } else {
4053       /* TODO */
4054     }
4055     ptr_idxs += olengths_idxs[i];
4056     ptr_vals += olengths_vals[i];
4057   }
4058   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4059   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4060   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4061   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4062   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4063   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4064 
4065 #if 0
4066   if (!restrict_comm) { /* check */
4067     Vec       lvec,rvec;
4068     PetscReal infty_error;
4069 
4070     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4071     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4072     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4073     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4074     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4075     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4076     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4077     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4078     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4079   }
4080 #endif
4081 
4082   /* assemble new additional is (if any) */
4083   if (nis) {
4084     PetscInt **temp_idxs,*count_is,j,psum;
4085 
4086     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4087     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4088     ptr_idxs = recv_buffer_idxs_is;
4089     psum = 0;
4090     for (i=0;i<n_recvs;i++) {
4091       for (j=0;j<nis;j++) {
4092         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4093         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4094         psum += plen;
4095         ptr_idxs += plen+1; /* shift pointer to received data */
4096       }
4097     }
4098     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4099     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4100     for (i=1;i<nis;i++) {
4101       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4102     }
4103     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4104     ptr_idxs = recv_buffer_idxs_is;
4105     for (i=0;i<n_recvs;i++) {
4106       for (j=0;j<nis;j++) {
4107         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4108         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4109         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4110         ptr_idxs += plen+1; /* shift pointer to received data */
4111       }
4112     }
4113     for (i=0;i<nis;i++) {
4114       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4115       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4116       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4117     }
4118     ierr = PetscFree(count_is);CHKERRQ(ierr);
4119     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4120     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4121   }
4122   /* free workspace */
4123   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4124   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4125   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4126   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4127   if (isdense) {
4128     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4129     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4130   } else {
4131     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4132   }
4133   if (nis) {
4134     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4135     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4136   }
4137   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4138   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4139   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4140   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4141   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4142   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4143   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4144   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4145   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4146   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4147   ierr = PetscFree(onodes);CHKERRQ(ierr);
4148   if (nis) {
4149     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4150     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4151     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4152   }
4153   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4154   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4155     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4156     for (i=0;i<nis;i++) {
4157       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4158     }
4159     *mat_n = NULL;
4160   }
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 /* temporary hack into ksp private data structure */
4165 #include <petsc/private/kspimpl.h>
4166 
4167 #undef __FUNCT__
4168 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4169 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4170 {
4171   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4172   PC_IS                  *pcis = (PC_IS*)pc->data;
4173   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4174   MatNullSpace           CoarseNullSpace=NULL;
4175   ISLocalToGlobalMapping coarse_islg;
4176   IS                     coarse_is,*isarray;
4177   PetscInt               i,im_active=-1,active_procs=-1;
4178   PetscInt               nis,nisdofs,nisneu;
4179   PC                     pc_temp;
4180   PCType                 coarse_pc_type;
4181   KSPType                coarse_ksp_type;
4182   PetscBool              multilevel_requested,multilevel_allowed;
4183   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4184   Mat                    t_coarse_mat_is;
4185   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4186   PetscMPIInt            all_procs;
4187   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4188   PetscBool              compute_vecs = PETSC_FALSE;
4189   PetscScalar            *array;
4190   PetscErrorCode         ierr;
4191 
4192   PetscFunctionBegin;
4193   /* Assign global numbering to coarse dofs */
4194   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4195     PetscInt ocoarse_size;
4196     compute_vecs = PETSC_TRUE;
4197     ocoarse_size = pcbddc->coarse_size;
4198     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4199     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4200     /* see if we can avoid some work */
4201     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4202       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4203       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4204         PC        pc;
4205         PetscBool isbddc;
4206 
4207         /* temporary workaround since PCBDDC does not have a reset method so far */
4208         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4209         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4210         if (isbddc) {
4211           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4212         }
4213         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4214         coarse_reuse = PETSC_FALSE;
4215       } else { /* we can safely reuse already computed coarse matrix */
4216         coarse_reuse = PETSC_TRUE;
4217       }
4218     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4219       coarse_reuse = PETSC_FALSE;
4220     }
4221     /* reset any subassembling information */
4222     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4223     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4224   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4225     coarse_reuse = PETSC_TRUE;
4226   }
4227 
4228   /* count "active" (i.e. with positive local size) and "void" processes */
4229   im_active = !!(pcis->n);
4230   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4231   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4232   void_procs = all_procs-active_procs;
4233   csin_type_simple = PETSC_TRUE;
4234   redist = PETSC_FALSE;
4235   if (pcbddc->current_level && void_procs) {
4236     csin_ml = PETSC_TRUE;
4237     ncoarse_ml = void_procs;
4238     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4239     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4240       csin_ds = PETSC_TRUE;
4241       ncoarse_ds = pcbddc->redistribute_coarse;
4242       redist = PETSC_TRUE;
4243     } else {
4244       csin_ds = PETSC_TRUE;
4245       ncoarse_ds = active_procs;
4246       redist = PETSC_TRUE;
4247     }
4248   } else {
4249     csin_ml = PETSC_FALSE;
4250     ncoarse_ml = all_procs;
4251     if (void_procs) {
4252       csin_ds = PETSC_TRUE;
4253       ncoarse_ds = void_procs;
4254       csin_type_simple = PETSC_FALSE;
4255     } else {
4256       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4257         csin_ds = PETSC_TRUE;
4258         ncoarse_ds = pcbddc->redistribute_coarse;
4259         redist = PETSC_TRUE;
4260       } else {
4261         csin_ds = PETSC_FALSE;
4262         ncoarse_ds = all_procs;
4263       }
4264     }
4265   }
4266 
4267   /*
4268     test if we can go multilevel: three conditions must be satisfied:
4269     - we have not exceeded the number of levels requested
4270     - we can actually subassemble the active processes
4271     - we can find a suitable number of MPI processes where we can place the subassembled problem
4272   */
4273   multilevel_allowed = PETSC_FALSE;
4274   multilevel_requested = PETSC_FALSE;
4275   if (pcbddc->current_level < pcbddc->max_levels) {
4276     multilevel_requested = PETSC_TRUE;
4277     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4278       multilevel_allowed = PETSC_FALSE;
4279     } else {
4280       multilevel_allowed = PETSC_TRUE;
4281     }
4282   }
4283   /* determine number of process partecipating to coarse solver */
4284   if (multilevel_allowed) {
4285     ncoarse = ncoarse_ml;
4286     csin = csin_ml;
4287     redist = PETSC_FALSE;
4288   } else {
4289     ncoarse = ncoarse_ds;
4290     csin = csin_ds;
4291   }
4292 
4293   /* creates temporary l2gmap and IS for coarse indexes */
4294   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4295   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4296 
4297   /* creates temporary MATIS object for coarse matrix */
4298   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4299   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4300   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4301   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4302 #if 0
4303   {
4304     PetscViewer viewer;
4305     char filename[256];
4306     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4307     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4308     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4309     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4310     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4311   }
4312 #endif
4313   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4314   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4315   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4316   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4317   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4318 
4319   /* compute dofs splitting and neumann boundaries for coarse dofs */
4320   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4321     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4322     const PetscInt         *idxs;
4323     ISLocalToGlobalMapping tmap;
4324 
4325     /* create map between primal indices (in local representative ordering) and local primal numbering */
4326     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4327     /* allocate space for temporary storage */
4328     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4329     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4330     /* allocate for IS array */
4331     nisdofs = pcbddc->n_ISForDofsLocal;
4332     nisneu = !!pcbddc->NeumannBoundariesLocal;
4333     nis = nisdofs + nisneu;
4334     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4335     /* dofs splitting */
4336     for (i=0;i<nisdofs;i++) {
4337       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4338       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4339       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4340       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4341       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4342       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4343       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4344       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4345     }
4346     /* neumann boundaries */
4347     if (pcbddc->NeumannBoundariesLocal) {
4348       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4349       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4350       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4351       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4352       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4353       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4354       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4355       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4356     }
4357     /* free memory */
4358     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4359     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4360     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4361   } else {
4362     nis = 0;
4363     nisdofs = 0;
4364     nisneu = 0;
4365     isarray = NULL;
4366   }
4367   /* destroy no longer needed map */
4368   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4369 
4370   /* restrict on coarse candidates (if needed) */
4371   coarse_mat_is = NULL;
4372   if (csin) {
4373     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4374       if (redist) {
4375         PetscMPIInt rank;
4376         PetscInt    spc,n_spc_p1,dest[1],destsize;
4377 
4378         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4379         spc = active_procs/ncoarse;
4380         n_spc_p1 = active_procs%ncoarse;
4381         if (im_active) {
4382           destsize = 1;
4383           if (rank > n_spc_p1*(spc+1)-1) {
4384             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4385           } else {
4386             dest[0] = rank/(spc+1);
4387           }
4388         } else {
4389           destsize = 0;
4390         }
4391         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4392       } else if (csin_type_simple) {
4393         PetscMPIInt rank;
4394         PetscInt    issize,isidx;
4395 
4396         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4397         if (im_active) {
4398           issize = 1;
4399           isidx = (PetscInt)rank;
4400         } else {
4401           issize = 0;
4402           isidx = -1;
4403         }
4404         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4405       } else { /* get a suitable subassembling pattern from MATIS code */
4406         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4407       }
4408 
4409       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4410       if (!redist || ncoarse <= void_procs) {
4411         PetscInt ncoarse_cand,tissize,*nisindices;
4412         PetscInt *coarse_candidates;
4413         const PetscInt* tisindices;
4414 
4415         /* get coarse candidates' ranks in pc communicator */
4416         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4417         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4418         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4419           if (!coarse_candidates[i]) {
4420             coarse_candidates[ncoarse_cand++]=i;
4421           }
4422         }
4423         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4424 
4425 
4426         if (pcbddc->dbg_flag) {
4427           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4428           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4429           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4430           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4431           for (i=0;i<ncoarse_cand;i++) {
4432             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4433           }
4434           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4435           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4436         }
4437         /* shift the pattern on coarse candidates */
4438         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4439         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4440         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4441         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4442         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4443         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4444         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4445       }
4446       if (pcbddc->dbg_flag) {
4447         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4448         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4449         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4450         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4451       }
4452     }
4453     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4454     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4455       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4456     } else { /* this is the last level, so use just receiving processes in subcomm */
4457       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4458     }
4459   } else {
4460     if (pcbddc->dbg_flag) {
4461       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4462       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4463       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4464     }
4465     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4466     coarse_mat_is = t_coarse_mat_is;
4467   }
4468 
4469   /* create local to global scatters for coarse problem */
4470   if (compute_vecs) {
4471     PetscInt lrows;
4472     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4473     if (coarse_mat_is) {
4474       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4475     } else {
4476       lrows = 0;
4477     }
4478     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4479     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4480     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4481     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4482     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4483   }
4484   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4485   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4486 
4487   /* set defaults for coarse KSP and PC */
4488   if (multilevel_allowed) {
4489     coarse_ksp_type = KSPRICHARDSON;
4490     coarse_pc_type = PCBDDC;
4491   } else {
4492     coarse_ksp_type = KSPPREONLY;
4493     coarse_pc_type = PCREDUNDANT;
4494   }
4495 
4496   /* print some info if requested */
4497   if (pcbddc->dbg_flag) {
4498     if (!multilevel_allowed) {
4499       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4500       if (multilevel_requested) {
4501         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4502       } else if (pcbddc->max_levels) {
4503         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4504       }
4505       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4506     }
4507   }
4508 
4509   /* create the coarse KSP object only once with defaults */
4510   if (coarse_mat_is) {
4511     MatReuse coarse_mat_reuse;
4512     PetscViewer dbg_viewer = NULL;
4513     if (pcbddc->dbg_flag) {
4514       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4515       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4516     }
4517     if (!pcbddc->coarse_ksp) {
4518       char prefix[256],str_level[16];
4519       size_t len;
4520       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4521       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4522       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4523       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4524       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4525       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4526       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4527       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4528       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4529       /* prefix */
4530       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4531       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4532       if (!pcbddc->current_level) {
4533         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4534         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4535       } else {
4536         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4537         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4538         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4539         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4540         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4541         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4542       }
4543       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4544       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4545       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4546       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4547       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4548       /* allow user customization */
4549       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4550     }
4551     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4552     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4553     if (nisdofs) {
4554       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4555       for (i=0;i<nisdofs;i++) {
4556         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4557       }
4558     }
4559     if (nisneu) {
4560       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4561       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4562     }
4563 
4564     /* get some info after set from options */
4565     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4566     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4567     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4568     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4569       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4570       isbddc = PETSC_FALSE;
4571     }
4572     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4573     if (isredundant) {
4574       KSP inner_ksp;
4575       PC  inner_pc;
4576       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4577       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4578       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4579     }
4580 
4581     /* assemble coarse matrix */
4582     if (coarse_reuse) {
4583       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4584       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4585       coarse_mat_reuse = MAT_REUSE_MATRIX;
4586     } else {
4587       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4588     }
4589     if (isbddc || isnn) {
4590       if (pcbddc->coarsening_ratio > 1) {
4591         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4592           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4593           if (pcbddc->dbg_flag) {
4594             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4595             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4596             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4597             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4598           }
4599         }
4600         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4601       } else {
4602         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4603         coarse_mat = coarse_mat_is;
4604       }
4605     } else {
4606       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4607     }
4608     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4609 
4610     /* propagate symmetry info of coarse matrix */
4611     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4612     if (pc->pmat->symmetric_set) {
4613       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4614     }
4615     if (pc->pmat->hermitian_set) {
4616       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4617     }
4618     if (pc->pmat->spd_set) {
4619       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4620     }
4621     /* set operators */
4622     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4623     if (pcbddc->dbg_flag) {
4624       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4625     }
4626   } else { /* processes non partecipating to coarse solver (if any) */
4627     coarse_mat = 0;
4628   }
4629   ierr = PetscFree(isarray);CHKERRQ(ierr);
4630 #if 0
4631   {
4632     PetscViewer viewer;
4633     char filename[256];
4634     sprintf(filename,"coarse_mat.m");
4635     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4636     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4637     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4638     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4639   }
4640 #endif
4641 
4642   /* Compute coarse null space (special handling by BDDC only) */
4643 #if 0
4644   if (pcbddc->NullSpace) {
4645     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4646   }
4647 #endif
4648 
4649   if (pcbddc->coarse_ksp) {
4650     Vec crhs,csol;
4651     PetscBool ispreonly;
4652 
4653     if (CoarseNullSpace) {
4654       if (isbddc) {
4655         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4656       } else {
4657         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4658       }
4659     }
4660     /* setup coarse ksp */
4661     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4662     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4663     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4664     /* hack */
4665     if (!csol) {
4666       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4667     }
4668     if (!crhs) {
4669       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4670     }
4671     /* Check coarse problem if in debug mode or if solving with an iterative method */
4672     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4673     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4674       KSP       check_ksp;
4675       KSPType   check_ksp_type;
4676       PC        check_pc;
4677       Vec       check_vec,coarse_vec;
4678       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4679       PetscInt  its;
4680       PetscBool compute_eigs;
4681       PetscReal *eigs_r,*eigs_c;
4682       PetscInt  neigs;
4683       const char *prefix;
4684 
4685       /* Create ksp object suitable for estimation of extreme eigenvalues */
4686       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4687       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4688       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4689       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4690       if (ispreonly) {
4691         check_ksp_type = KSPPREONLY;
4692         compute_eigs = PETSC_FALSE;
4693       } else {
4694         check_ksp_type = KSPGMRES;
4695         compute_eigs = PETSC_TRUE;
4696       }
4697       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4698       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4699       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4700       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4701       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4702       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4703       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4704       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4705       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4706       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4707       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4708       /* create random vec */
4709       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4710       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4711       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4712       if (CoarseNullSpace) {
4713         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4714       }
4715       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4716       /* solve coarse problem */
4717       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4718       if (CoarseNullSpace) {
4719         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4720       }
4721       /* set eigenvalue estimation if preonly has not been requested */
4722       if (compute_eigs) {
4723         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4724         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4725         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4726         lambda_max = eigs_r[neigs-1];
4727         lambda_min = eigs_r[0];
4728         if (pcbddc->use_coarse_estimates) {
4729           if (lambda_max>lambda_min) {
4730             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4731             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4732           }
4733         }
4734       }
4735 
4736       /* check coarse problem residual error */
4737       if (pcbddc->dbg_flag) {
4738         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4739         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4740         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4741         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4742         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4743         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4744         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4745         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4746         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4747         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4748         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4749         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4750         if (compute_eigs) {
4751           PetscReal lambda_max_s,lambda_min_s;
4752           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4753           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4754           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4755           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4756           for (i=0;i<neigs;i++) {
4757             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4758           }
4759         }
4760         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4761         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4762       }
4763       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4764       if (compute_eigs) {
4765         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4766         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4767       }
4768     }
4769   }
4770   /* print additional info */
4771   if (pcbddc->dbg_flag) {
4772     /* waits until all processes reaches this point */
4773     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4774     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4775     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4776   }
4777 
4778   /* free memory */
4779   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4780   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4781   PetscFunctionReturn(0);
4782 }
4783 
4784 #undef __FUNCT__
4785 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4786 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4787 {
4788   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4789   PC_IS*         pcis = (PC_IS*)pc->data;
4790   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4791   IS             subset,subset_mult,subset_n;
4792   PetscInt       local_size,coarse_size=0;
4793   PetscInt       *local_primal_indices=NULL;
4794   const PetscInt *t_local_primal_indices;
4795   PetscErrorCode ierr;
4796 
4797   PetscFunctionBegin;
4798   /* Compute global number of coarse dofs */
4799   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4800     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4801   }
4802   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4803   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4804   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4805   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4806   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4807   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4808   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4809   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4810   if (local_size != pcbddc->local_primal_size) {
4811     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4812   }
4813   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4814   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4815   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4816   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4817   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4818 
4819   /* check numbering */
4820   if (pcbddc->dbg_flag) {
4821     PetscScalar coarsesum,*array,*array2;
4822     PetscInt    i;
4823     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4824 
4825     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4826     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4827     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4828     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4829     /* counter */
4830     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4831     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
4832     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4833     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4834     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4835     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4836 
4837     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4838     for (i=0;i<pcbddc->local_primal_size;i++) {
4839       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4840     }
4841     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4842     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4843     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4844     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4845     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4846     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4847     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4848     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4849     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4850     for (i=0;i<pcis->n;i++) {
4851       if (array[i] != 0.0 && array[i] != array2[i]) {
4852         PetscInt owned = (PetscInt)(array[i]);
4853         PetscInt neigh = (PetscInt)(array2[i]);
4854         set_error = PETSC_TRUE;
4855         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
4856       }
4857     }
4858     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4859     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4860     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4861     for (i=0;i<pcis->n;i++) {
4862       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4863     }
4864     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4865     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4866     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4867     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4868     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4869     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4870     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4871       PetscInt *gidxs;
4872 
4873       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4874       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4875       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4876       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4877       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4878       for (i=0;i<pcbddc->local_primal_size;i++) {
4879         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4880       }
4881       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4882       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4883     }
4884     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4885     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4886   }
4887   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4888   /* get back data */
4889   *coarse_size_n = coarse_size;
4890   *local_primal_indices_n = local_primal_indices;
4891   PetscFunctionReturn(0);
4892 }
4893 
4894 #undef __FUNCT__
4895 #define __FUNCT__ "PCBDDCGlobalToLocal"
4896 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4897 {
4898   IS             localis_t;
4899   PetscInt       i,lsize,*idxs,n;
4900   PetscScalar    *vals;
4901   PetscErrorCode ierr;
4902 
4903   PetscFunctionBegin;
4904   /* get indices in local ordering exploiting local to global map */
4905   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4906   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4907   for (i=0;i<lsize;i++) vals[i] = 1.0;
4908   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4909   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4910   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4911   if (idxs) { /* multilevel guard */
4912     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4913   }
4914   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4915   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4916   ierr = PetscFree(vals);CHKERRQ(ierr);
4917   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4918   /* now compute set in local ordering */
4919   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4920   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4921   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4922   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4923   for (i=0,lsize=0;i<n;i++) {
4924     if (PetscRealPart(vals[i]) > 0.5) {
4925       lsize++;
4926     }
4927   }
4928   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4929   for (i=0,lsize=0;i<n;i++) {
4930     if (PetscRealPart(vals[i]) > 0.5) {
4931       idxs[lsize++] = i;
4932     }
4933   }
4934   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4935   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4936   *localis = localis_t;
4937   PetscFunctionReturn(0);
4938 }
4939 
4940 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4941 #undef __FUNCT__
4942 #define __FUNCT__ "PCBDDCMatMult_Private"
4943 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4944 {
4945   PCBDDCChange_ctx change_ctx;
4946   PetscErrorCode   ierr;
4947 
4948   PetscFunctionBegin;
4949   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4950   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4951   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4952   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4953   PetscFunctionReturn(0);
4954 }
4955 
4956 #undef __FUNCT__
4957 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4958 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4959 {
4960   PCBDDCChange_ctx change_ctx;
4961   PetscErrorCode   ierr;
4962 
4963   PetscFunctionBegin;
4964   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4965   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4966   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4967   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4968   PetscFunctionReturn(0);
4969 }
4970 
4971 #undef __FUNCT__
4972 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4973 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4974 {
4975   PC_IS               *pcis=(PC_IS*)pc->data;
4976   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4977   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4978   Mat                 S_j;
4979   PetscInt            *used_xadj,*used_adjncy;
4980   PetscBool           free_used_adj;
4981   PetscErrorCode      ierr;
4982 
4983   PetscFunctionBegin;
4984   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4985   free_used_adj = PETSC_FALSE;
4986   if (pcbddc->sub_schurs_layers == -1) {
4987     used_xadj = NULL;
4988     used_adjncy = NULL;
4989   } else {
4990     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4991       used_xadj = pcbddc->mat_graph->xadj;
4992       used_adjncy = pcbddc->mat_graph->adjncy;
4993     } else if (pcbddc->computed_rowadj) {
4994       used_xadj = pcbddc->mat_graph->xadj;
4995       used_adjncy = pcbddc->mat_graph->adjncy;
4996     } else {
4997       PetscBool      flg_row=PETSC_FALSE;
4998       const PetscInt *xadj,*adjncy;
4999       PetscInt       nvtxs;
5000 
5001       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5002       if (flg_row) {
5003         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5004         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5005         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5006         free_used_adj = PETSC_TRUE;
5007       } else {
5008         pcbddc->sub_schurs_layers = -1;
5009         used_xadj = NULL;
5010         used_adjncy = NULL;
5011       }
5012       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5013     }
5014   }
5015 
5016   /* setup sub_schurs data */
5017   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5018   if (!sub_schurs->use_mumps) {
5019     /* pcbddc->ksp_D up to date only if not using MUMPS */
5020     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5021     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5022   } else {
5023     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5024     PetscBool isseqaij;
5025     if (!pcbddc->use_vertices && reuse_solvers) {
5026       PetscInt n_vertices;
5027 
5028       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5029       reuse_solvers = (PetscBool)!n_vertices;
5030     }
5031     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5032     if (!isseqaij) {
5033       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5034       if (matis->A == pcbddc->local_mat) {
5035         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5036         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5037       } else {
5038         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5039       }
5040     }
5041     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5042   }
5043   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5044 
5045   /* free adjacency */
5046   if (free_used_adj) {
5047     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5048   }
5049   PetscFunctionReturn(0);
5050 }
5051 
5052 #undef __FUNCT__
5053 #define __FUNCT__ "PCBDDCInitSubSchurs"
5054 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5055 {
5056   PC_IS               *pcis=(PC_IS*)pc->data;
5057   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5058   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5059   PCBDDCGraph         graph;
5060   PetscErrorCode      ierr;
5061 
5062   PetscFunctionBegin;
5063   /* attach interface graph for determining subsets */
5064   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5065     IS       verticesIS,verticescomm;
5066     PetscInt vsize,*idxs;
5067 
5068     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5069     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5070     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5071     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5072     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5073     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5074     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5075     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5076     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5077     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5078     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5079 /*
5080     if (pcbddc->dbg_flag) {
5081       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5082     }
5083 */
5084   } else {
5085     graph = pcbddc->mat_graph;
5086   }
5087 
5088   /* sub_schurs init */
5089   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5090 
5091   /* free graph struct */
5092   if (pcbddc->sub_schurs_rebuild) {
5093     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5094   }
5095   PetscFunctionReturn(0);
5096 }
5097