xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 862806e4fc84b788053332115ec1181d367cf638)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     PetscInt subset_size;
51 
52     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53     mss = PetscMax(mss,subset_size);
54   }
55 
56   /* min/max and threshold */
57   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
58   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
59   nmax = PetscMax(nmin,nmax);
60   allocated_S_St = PETSC_FALSE;
61   if (nmin) {
62     allocated_S_St = PETSC_TRUE;
63   }
64 
65   /* allocate lapack workspace */
66   cum = cum2 = 0;
67   maxneigs = 0;
68   for (i=0;i<sub_schurs->n_subs;i++) {
69     PetscInt n,subset_size;
70 
71     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
72     n = PetscMin(subset_size,nmax);
73     cum += subset_size;
74     cum2 += subset_size*n;
75     maxneigs = PetscMax(maxneigs,n);
76   }
77   if (mss) {
78     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
79       PetscBLASInt B_itype = 1;
80       PetscBLASInt B_N = mss;
81       PetscReal    zero = 0.0;
82       PetscReal    eps = 0.0; /* dlamch? */
83 
84       B_lwork = -1;
85       S = NULL;
86       St = NULL;
87       eigs = NULL;
88       eigv = NULL;
89       B_iwork = NULL;
90       B_ifail = NULL;
91       thresh = 1.0;
92       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
93 #if defined(PETSC_USE_COMPLEX)
94       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
95 #else
96       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
97 #endif
98       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
99       ierr = PetscFPTrapPop();CHKERRQ(ierr);
100     } else {
101         /* TODO */
102     }
103   } else {
104     lwork = 0;
105   }
106 
107   nv = 0;
108   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
109     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
110   }
111   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
112   if (allocated_S_St) {
113     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
114   }
115   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
116 #if defined(PETSC_USE_COMPLEX)
117   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
118 #endif
119   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
120                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
121                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
122                       nv+cum,&pcbddc->adaptive_constraints_idxs,
123                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
124   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
125 
126   maxneigs = 0;
127   cum = cum2 = cumarray = 0;
128   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
129   pcbddc->adaptive_constraints_data_ptr[0] = 0;
130   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_data[cum] = 1.0;
138       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
139       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
140     }
141     cum2 = cum;
142     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
143   }
144 
145   if (mss) { /* multilevel */
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
147     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
148   }
149 
150   for (i=0;i<sub_schurs->n_subs;i++) {
151 
152     const PetscInt *idxs;
153     PetscReal      infty = PETSC_MAX_REAL;
154     PetscInt       j,subset_size,eigs_start = 0;
155     PetscBLASInt   B_N;
156     PetscBool      same_data = PETSC_FALSE;
157 
158     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
159     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161       if (sub_schurs->is_hermitian) {
162         PetscInt j,k;
163         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
164           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166         }
167         for (j=0;j<subset_size;j++) {
168           for (k=j;k<subset_size;k++) {
169             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
170             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
171           }
172         }
173       } else {
174         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176       }
177     } else {
178       S = Sarray + cumarray;
179       St = Starray + cumarray;
180     }
181 
182     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
183     /* see if we can save some work */
184     if (sub_schurs->n_subs == 1) {
185       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
186     }
187 
188     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
189       B_neigs = 0;
190     } else {
191       /* Threshold: this is an heuristic for edges */
192       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
193 
194       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
195         PetscBLASInt B_itype = 1;
196         PetscBLASInt B_IL, B_IU;
197         PetscReal    eps = -1.0; /* dlamch? */
198         PetscInt     nmin_s;
199 
200         /* ask for eigenvalues larger than thresh */
201         if (pcbddc->dbg_flag) {
202           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
203         }
204         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
205 #if defined(PETSC_USE_COMPLEX)
206         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
207 #else
208         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
209 #endif
210         ierr = PetscFPTrapPop();CHKERRQ(ierr);
211         if (B_ierr) {
212           if (B_ierr < 0 ) {
213             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
214           } else if (B_ierr <= B_N) {
215             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
216           } else {
217             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
218           }
219         }
220 
221         if (B_neigs > nmax) {
222           if (pcbddc->dbg_flag) {
223             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
224           }
225           eigs_start = B_neigs -nmax;
226           B_neigs = nmax;
227         }
228 
229         nmin_s = PetscMin(nmin,B_N);
230         if (B_neigs < nmin_s) {
231           PetscBLASInt B_neigs2;
232 
233           B_IU = B_N - B_neigs;
234           B_IL = B_N - nmin_s + 1;
235           if (pcbddc->dbg_flag) {
236             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
237           }
238           if (sub_schurs->is_hermitian) {
239             PetscInt j;
240             for (j=0;j<subset_size;j++) {
241               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
242             }
243             for (j=0;j<subset_size;j++) {
244               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
245             }
246           } else {
247             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
248             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
249           }
250           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
251 #if defined(PETSC_USE_COMPLEX)
252           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
253 #else
254           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
255 #endif
256           ierr = PetscFPTrapPop();CHKERRQ(ierr);
257           B_neigs += B_neigs2;
258         }
259         if (B_ierr) {
260           if (B_ierr < 0 ) {
261             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
262           } else if (B_ierr <= B_N) {
263             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
264           } else {
265             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
266           }
267         }
268         if (pcbddc->dbg_flag) {
269           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
270           for (j=0;j<B_neigs;j++) {
271             if (eigs[j] == 0.0) {
272               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
273             } else {
274               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
275             }
276           }
277         }
278       } else {
279           /* TODO */
280       }
281     }
282     maxneigs = PetscMax(B_neigs,maxneigs);
283     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
284     if (B_neigs) {
285       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
286 
287       if (pcbddc->dbg_flag > 1) {
288         PetscInt ii;
289         for (ii=0;ii<B_neigs;ii++) {
290           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
291           for (j=0;j<B_N;j++) {
292             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
293           }
294         }
295       }
296 #if 0
297       for (j=0;j<B_neigs;j++) {
298         PetscBLASInt Blas_N,Blas_one = 1.0;
299         PetscScalar norm;
300         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
301         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
302                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
303         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
304           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
305         } else {
306           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
307         }
308         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
309       }
310 #endif
311       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
312       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
313       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
314       cum++;
315     }
316     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
317     /* shift for next computation */
318     cumarray += subset_size*subset_size;
319   }
320   if (pcbddc->dbg_flag) {
321     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
322   }
323 
324   if (mss) {
325     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
326     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
327     /* destroy matrices (junk) */
328     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
329     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
330   }
331   if (allocated_S_St) {
332     ierr = PetscFree2(S,St);CHKERRQ(ierr);
333   }
334   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
335 #if defined(PETSC_USE_COMPLEX)
336   ierr = PetscFree(rwork);CHKERRQ(ierr);
337 #endif
338   if (pcbddc->dbg_flag) {
339     PetscInt maxneigs_r;
340     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
341     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
342   }
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "PCBDDCSetUpSolvers"
348 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
349 {
350   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
351   PetscScalar    *coarse_submat_vals;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   /* Setup local scatters R_to_B and (optionally) R_to_D */
356   /* PCBDDCSetUpLocalWorkVectors should be called first! */
357   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
358 
359   /* Setup local neumann solver ksp_R */
360   /* PCBDDCSetUpLocalScatters should be called first! */
361   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
362 
363   /* Change global null space passed in by the user if change of basis has been requested */
364   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
365     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
366   }
367 
368   /*
369      Setup local correction and local part of coarse basis.
370      Gives back the dense local part of the coarse matrix in column major ordering
371   */
372   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
373 
374   /* Compute total number of coarse nodes and setup coarse solver */
375   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
376 
377   /* free */
378   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "PCBDDCResetCustomization"
384 PetscErrorCode PCBDDCResetCustomization(PC pc)
385 {
386   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
387   PetscErrorCode ierr;
388 
389   PetscFunctionBegin;
390   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
392   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
393   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
394   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
395   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
396   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
397   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
398   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
399   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
400   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "PCBDDCResetTopography"
406 PetscErrorCode PCBDDCResetTopography(PC pc)
407 {
408   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
413   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
414   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
415   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
416   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "PCBDDCResetSolvers"
422 PetscErrorCode PCBDDCResetSolvers(PC pc)
423 {
424   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
425   PetscScalar    *array;
426   PetscErrorCode ierr;
427 
428   PetscFunctionBegin;
429   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
430   if (pcbddc->coarse_phi_B) {
431     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
432     ierr = PetscFree(array);CHKERRQ(ierr);
433   }
434   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
435   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
436   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
437   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
438   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
439   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
440   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
441   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
442   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
443   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
444   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
445   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
446   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
447   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
448   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
449   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
450   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
451   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
452   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
453   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
454   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
455   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
456   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
462 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
463 {
464   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
465   PC_IS          *pcis = (PC_IS*)pc->data;
466   VecType        impVecType;
467   PetscInt       n_constraints,n_R,old_size;
468   PetscErrorCode ierr;
469 
470   PetscFunctionBegin;
471   if (!pcbddc->ConstraintMatrix) {
472     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
473   }
474   /* get sizes */
475   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
476   n_R = pcis->n-pcbddc->n_vertices;
477   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
478   /* local work vectors (try to avoid unneeded work)*/
479   /* R nodes */
480   old_size = -1;
481   if (pcbddc->vec1_R) {
482     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
483   }
484   if (n_R != old_size) {
485     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
486     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
487     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
488     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
489     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
490     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
491   }
492   /* local primal dofs */
493   old_size = -1;
494   if (pcbddc->vec1_P) {
495     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
496   }
497   if (pcbddc->local_primal_size != old_size) {
498     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
499     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
500     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
501     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
502   }
503   /* local explicit constraints */
504   old_size = -1;
505   if (pcbddc->vec1_C) {
506     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
507   }
508   if (n_constraints && n_constraints != old_size) {
509     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
510     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
511     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
512     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "PCBDDCSetUpCorrection"
519 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
520 {
521   PetscErrorCode  ierr;
522   /* pointers to pcis and pcbddc */
523   PC_IS*          pcis = (PC_IS*)pc->data;
524   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
525   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
526   /* submatrices of local problem */
527   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
528   /* submatrices of local coarse problem */
529   Mat             S_VV,S_CV,S_VC,S_CC;
530   /* working matrices */
531   Mat             C_CR;
532   /* additional working stuff */
533   PC              pc_R;
534   Mat             F;
535   PetscBool       isLU,isCHOL,isILU;
536 
537   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
538   PetscScalar     *work;
539   PetscInt        *idx_V_B;
540   PetscInt        n,n_vertices,n_constraints;
541   PetscInt        i,n_R,n_D,n_B;
542   PetscBool       unsymmetric_check;
543   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
544   MatType         impMatType;
545   /* some shortcuts to scalars */
546   PetscScalar     one=1.0,m_one=-1.0;
547 
548   PetscFunctionBegin;
549   n_vertices = pcbddc->n_vertices;
550   n_constraints = pcbddc->local_primal_size-n_vertices;
551   /* Set Non-overlapping dimensions */
552   n_B = pcis->n_B;
553   n_D = pcis->n - n_B;
554   n_R = pcis->n - n_vertices;
555 
556   /* Set types for local objects needed by BDDC precondtioner */
557   impMatType = MATSEQDENSE;
558 
559   /* vertices in boundary numbering */
560   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
561   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
562   if (i != n_vertices) {
563     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
564   }
565 
566   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
567   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
568   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
569   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
570   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
571   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
572   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
573   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
574   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
575   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
576 
577   unsymmetric_check = PETSC_FALSE;
578   /* allocate workspace */
579   n = 0;
580   if (n_constraints) {
581     n += n_R*n_constraints;
582   }
583   if (n_vertices) {
584     n = PetscMax(2*n_R*n_vertices,n);
585     n = PetscMax((n_R+n_B)*n_vertices,n);
586   }
587   if (!pcbddc->symmetric_primal) {
588     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
589     unsymmetric_check = PETSC_TRUE;
590   }
591   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
592 
593   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
594   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
595   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
596   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
597   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
598   if (isLU || isILU || isCHOL) {
599     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
600   } else if (sub_schurs->reuse_mumps) {
601     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
602     MatFactorType type;
603 
604     F = reuse_mumps->F;
605     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
606     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
607   } else {
608     F = NULL;
609   }
610 
611   /* Precompute stuffs needed for preprocessing and application of BDDC*/
612   if (n_constraints) {
613     Mat         M1,M2,M3;
614     Mat         auxmat;
615     IS          is_aux;
616     PetscScalar *array,*array2;
617 
618     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
625 
626     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
627     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
628     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
629     for (i=0;i<n_constraints;i++) {
630       const PetscScalar *row_cmat_values;
631       const PetscInt    *row_cmat_indices;
632       PetscInt          size_of_constraint,j;
633 
634       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
635       for (j=0;j<size_of_constraint;j++) {
636         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
637       }
638       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
639     }
640     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
641     if (F) {
642       Mat B;
643 
644       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
645       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
646       ierr = MatDestroy(&B);CHKERRQ(ierr);
647     } else {
648       PetscScalar *marr;
649 
650       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
651       for (i=0;i<n_constraints;i++) {
652         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
653         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
654         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
655         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
656         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
657       }
658       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
659     }
660     if (!pcbddc->switch_static) {
661       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
662       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
663       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
664       for (i=0;i<n_constraints;i++) {
665         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
666         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
667         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
668         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
669         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
670         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
671       }
672       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
673       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
674       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
675     } else {
676       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
677       pcbddc->local_auxmat2 = local_auxmat2_R;
678       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
679     }
680     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
681     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
682     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
683     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
684     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
685     if (isCHOL) {
686       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
687     } else {
688       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
689     }
690     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
691     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
692     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
693     ierr = MatDestroy(&M2);CHKERRQ(ierr);
694     ierr = MatDestroy(&M3);CHKERRQ(ierr);
695     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
696     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
697     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
698     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
699     ierr = MatDestroy(&M1);CHKERRQ(ierr);
700   }
701   /* Get submatrices from subdomain matrix */
702   if (n_vertices) {
703     Mat       newmat;
704     IS        is_aux;
705     PetscInt  ibs,mbs;
706     PetscBool issbaij;
707 
708     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
709       IS tis;
710 
711       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
712       ierr = ISSort(tis);CHKERRQ(ierr);
713       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
714       ierr = ISDestroy(&tis);CHKERRQ(ierr);
715     } else {
716       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
717     }
718     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
719     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
720     if (ibs != mbs) { /* need to convert to SEQAIJ */
721       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
722       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
723       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
724       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
725       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
726       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
727     } else {
728       /* this is safe */
729       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
730       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
731       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
732         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
733         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
734         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
735         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
736         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
737         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
738       } else {
739         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
740         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
741         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
742       }
743       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
744       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
745     }
746     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
747   }
748 
749   /* Matrix of coarse basis functions (local) */
750   if (pcbddc->coarse_phi_B) {
751     PetscInt on_B,on_primal,on_D=n_D;
752     if (pcbddc->coarse_phi_D) {
753       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
754     }
755     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
756     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
757       PetscScalar *marray;
758 
759       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
760       ierr = PetscFree(marray);CHKERRQ(ierr);
761       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
762       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
763       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
764       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
765     }
766   }
767 
768   if (!pcbddc->coarse_phi_B) {
769     PetscScalar *marray;
770 
771     n = n_B*pcbddc->local_primal_size;
772     if (pcbddc->switch_static || pcbddc->dbg_flag) {
773       n += n_D*pcbddc->local_primal_size;
774     }
775     if (!pcbddc->symmetric_primal) {
776       n *= 2;
777     }
778     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
779     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
780     n = n_B*pcbddc->local_primal_size;
781     if (pcbddc->switch_static || pcbddc->dbg_flag) {
782       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
783       n += n_D*pcbddc->local_primal_size;
784     }
785     if (!pcbddc->symmetric_primal) {
786       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
787       if (pcbddc->switch_static || pcbddc->dbg_flag) {
788         n = n_B*pcbddc->local_primal_size;
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
790       }
791     } else {
792       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
793       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
794       if (pcbddc->switch_static || pcbddc->dbg_flag) {
795         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
796         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
797       }
798     }
799   }
800   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
801   /* vertices */
802   if (n_vertices) {
803 
804     if (n_R) {
805       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
806       PetscBLASInt B_N,B_one = 1;
807       PetscScalar  *x,*y;
808 
809       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
810       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
811       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
812       if (F) { /* TODO could be optimized for symmetric problems */
813         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
814       } else {
815         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
816         for (i=0;i<n_vertices;i++) {
817           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
818           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
819           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
820           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
821           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
822         }
823         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
824       }
825       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
826       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
827       if (n_constraints) {
828         Mat B;
829 
830         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
831         for (i=0;i<n_vertices;i++) {
832           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
833           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
834           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
837           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
838         }
839         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
840         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
841         ierr = MatDestroy(&B);CHKERRQ(ierr);
842         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
843         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
844         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
845         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
846         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
847         ierr = MatDestroy(&B);CHKERRQ(ierr);
848       }
849       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
850       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
851       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
852       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
853       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
854       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
855       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
856       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
857       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
858       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
859     } else {
860       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
861     }
862     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
863     /* coarse basis functions */
864     for (i=0;i<n_vertices;i++) {
865       PetscScalar *y;
866 
867       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
868       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
869       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
870       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872       y[n_B*i+idx_V_B[i]] = 1.0;
873       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
874       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
875 
876       if (pcbddc->switch_static || pcbddc->dbg_flag) {
877         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
878         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
879         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
882         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
883       }
884       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
885     }
886   }
887 
888   if (n_constraints) {
889     Mat B;
890 
891     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
892     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
893     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
894     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
895     if (n_vertices) {
896       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
897         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
898       } else {
899         Mat S_VCt;
900 
901         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
902         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
903         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
904       }
905     }
906     ierr = MatDestroy(&B);CHKERRQ(ierr);
907     /* coarse basis functions */
908     for (i=0;i<n_constraints;i++) {
909       PetscScalar *y;
910 
911       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
912       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
913       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
914       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
917       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
918       if (pcbddc->switch_static || pcbddc->dbg_flag) {
919         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
920         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
921         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
924         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
925       }
926       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927     }
928   }
929   if (n_constraints) {
930     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
931   }
932 
933   /* compute other basis functions for non-symmetric problems */
934   if (!pcbddc->symmetric_primal) {
935 
936     if (n_constraints) {
937       Mat S_CCT,B_C;
938 
939       /* this is a lazy thing */
940       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
941       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
942       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
943       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
944       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
945       if (n_vertices) {
946         Mat B_V,S_VCT;
947 
948         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
949         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
950         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
951         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
952         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
953       }
954       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
955     } else { /* if there are no constraints, reset work */
956       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
957     }
958     if (n_vertices && n_R) {
959       Mat          A_VRT;
960       PetscScalar  *marray;
961       PetscBLASInt B_N,B_one = 1;
962 
963       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
964       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
965       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
966       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
967       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
968       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
969       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
970     }
971 
972     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
973       for (i=0;i<pcbddc->local_primal_size;i++) {
974         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
975         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
976         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
977         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
978         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
979       }
980     } else {
981       for (i=0;i<pcbddc->local_primal_size;i++) {
982         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
983         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
984         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
985         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
986         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
987       }
988     }
989     /* coarse basis functions */
990     for (i=0;i<pcbddc->local_primal_size;i++) {
991       PetscScalar *y;
992 
993       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
994       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
995       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
996       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998       if (i<n_vertices) {
999         y[n_B*i+idx_V_B[i]] = 1.0;
1000       }
1001       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1002       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1003 
1004       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1005         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1006         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1007         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1010         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1011       }
1012       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1013     }
1014   }
1015   /* free memory */
1016   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1017   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1018   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1019   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1020   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1021   ierr = PetscFree(work);CHKERRQ(ierr);
1022   if (n_vertices) {
1023     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1024   }
1025   if (n_constraints) {
1026     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1027   }
1028   /* Checking coarse_sub_mat and coarse basis functios */
1029   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1030   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1031   if (pcbddc->dbg_flag) {
1032     Mat         coarse_sub_mat;
1033     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1034     Mat         coarse_phi_D,coarse_phi_B;
1035     Mat         coarse_psi_D,coarse_psi_B;
1036     Mat         A_II,A_BB,A_IB,A_BI;
1037     Mat         C_B,CPHI;
1038     IS          is_dummy;
1039     Vec         mones;
1040     MatType     checkmattype=MATSEQAIJ;
1041     PetscReal   real_value;
1042 
1043     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1044     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1045     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1046     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1047     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1048     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1049     if (unsymmetric_check) {
1050       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1051       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1052     }
1053     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1054 
1055     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1056     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1057     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1058     if (unsymmetric_check) {
1059       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1060       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1061       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1062       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1063       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1064       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1065       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1066       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1067       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1069       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1070       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1071     } else {
1072       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1073       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1074       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1075       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1076       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1077       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1078       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1079       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1080     }
1081     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1082     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1083     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1084     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1085     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1086     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1087     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1088     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1089 
1090     /* check constraints */
1091     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1092     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1093     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1094     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1095     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1096     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1097     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1098     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1099     if (unsymmetric_check) {
1100       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1101       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1102       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1103       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1104       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1105     }
1106     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1107     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1108     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1109     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1110 
1111     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1113     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1114     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1115     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1116     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1117     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1118     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1119     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1120     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1121     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1122     if (unsymmetric_check) {
1123       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1124       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1125     }
1126     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1127   }
1128   /* get back data */
1129   *coarse_submat_vals_n = coarse_submat_vals;
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1136 {
1137   Mat            *work_mat;
1138   IS             isrow_s,iscol_s;
1139   PetscBool      rsorted,csorted;
1140   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1145   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1146   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1147   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1148 
1149   if (!rsorted) {
1150     const PetscInt *idxs;
1151     PetscInt *idxs_sorted,i;
1152 
1153     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1154     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1155     for (i=0;i<rsize;i++) {
1156       idxs_perm_r[i] = i;
1157     }
1158     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1159     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1160     for (i=0;i<rsize;i++) {
1161       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1162     }
1163     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1164     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1165   } else {
1166     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1167     isrow_s = isrow;
1168   }
1169 
1170   if (!csorted) {
1171     if (isrow == iscol) {
1172       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1173       iscol_s = isrow_s;
1174     } else {
1175       const PetscInt *idxs;
1176       PetscInt *idxs_sorted,i;
1177 
1178       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1179       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1180       for (i=0;i<csize;i++) {
1181         idxs_perm_c[i] = i;
1182       }
1183       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1184       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1185       for (i=0;i<csize;i++) {
1186         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1187       }
1188       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1189       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1190     }
1191   } else {
1192     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1193     iscol_s = iscol;
1194   }
1195 
1196   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1197 
1198   if (!rsorted || !csorted) {
1199     Mat      new_mat;
1200     IS       is_perm_r,is_perm_c;
1201 
1202     if (!rsorted) {
1203       PetscInt *idxs_r,i;
1204       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1205       for (i=0;i<rsize;i++) {
1206         idxs_r[idxs_perm_r[i]] = i;
1207       }
1208       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1209       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1210     } else {
1211       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1212     }
1213     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1214 
1215     if (!csorted) {
1216       if (isrow_s == iscol_s) {
1217         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1218         is_perm_c = is_perm_r;
1219       } else {
1220         PetscInt *idxs_c,i;
1221         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1222         for (i=0;i<csize;i++) {
1223           idxs_c[idxs_perm_c[i]] = i;
1224         }
1225         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1226         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1227       }
1228     } else {
1229       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1230     }
1231     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1232 
1233     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1234     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1235     work_mat[0] = new_mat;
1236     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1237     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1238   }
1239 
1240   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1241   *B = work_mat[0];
1242   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1243   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1244   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1251 {
1252   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1253   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1254   Mat            new_mat;
1255   IS             is_local,is_global;
1256   PetscInt       local_size;
1257   PetscBool      isseqaij;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1262   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1263   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1264   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1265   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1266   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1267   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1268 
1269   /* check */
1270   if (pcbddc->dbg_flag) {
1271     Vec       x,x_change;
1272     PetscReal error;
1273 
1274     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1275     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1276     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1277     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1280     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1281     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1282     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1283     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1286     ierr = VecDestroy(&x);CHKERRQ(ierr);
1287     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1288   }
1289 
1290   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1291   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1292   if (isseqaij) {
1293     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1294   } else {
1295     Mat work_mat;
1296     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1297     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1298     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1299   }
1300   if (matis->A->symmetric_set) {
1301     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1302 #if !defined(PETSC_USE_COMPLEX)
1303     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1304 #endif
1305   }
1306   /*
1307   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1308   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1309   */
1310   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1316 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1317 {
1318   PC_IS*          pcis = (PC_IS*)(pc->data);
1319   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1320   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1321   PetscInt        *idx_R_local=NULL;
1322   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1323   PetscInt        vbs,bs;
1324   PetscBT         bitmask=NULL;
1325   PetscErrorCode  ierr;
1326 
1327   PetscFunctionBegin;
1328   /*
1329     No need to setup local scatters if
1330       - primal space is unchanged
1331         AND
1332       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1333         AND
1334       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1335   */
1336   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1337     PetscFunctionReturn(0);
1338   }
1339   /* destroy old objects */
1340   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1341   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1342   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1343   /* Set Non-overlapping dimensions */
1344   n_B = pcis->n_B;
1345   n_D = pcis->n - n_B;
1346   n_vertices = pcbddc->n_vertices;
1347 
1348   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1349 
1350   /* create auxiliary bitmask and allocate workspace */
1351   if (!sub_schurs->reuse_mumps) {
1352     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1353     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1354     for (i=0;i<n_vertices;i++) {
1355       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1356     }
1357 
1358     for (i=0, n_R=0; i<pcis->n; i++) {
1359       if (!PetscBTLookup(bitmask,i)) {
1360         idx_R_local[n_R++] = i;
1361       }
1362     }
1363   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1364     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1365 
1366     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1367     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1368   }
1369 
1370   /* Block code */
1371   vbs = 1;
1372   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1373   if (bs>1 && !(n_vertices%bs)) {
1374     PetscBool is_blocked = PETSC_TRUE;
1375     PetscInt  *vary;
1376     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1377     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1378     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1379     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1380     for (i=0; i<n_vertices/bs; i++) {
1381       if (vary[i]!=0 && vary[i]!=bs) {
1382         is_blocked = PETSC_FALSE;
1383         break;
1384       }
1385     }
1386     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1387       vbs = bs;
1388       for (i=0;i<n_R/vbs;i++) {
1389         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1390       }
1391     }
1392     ierr = PetscFree(vary);CHKERRQ(ierr);
1393   }
1394   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1395   if (sub_schurs->reuse_mumps) {
1396     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1397 
1398     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1399     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1400     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1401     reuse_mumps->is_R = pcbddc->is_R_local;
1402   } else {
1403     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1404   }
1405 
1406   /* print some info if requested */
1407   if (pcbddc->dbg_flag) {
1408     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1409     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1410     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1411     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1412     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1413     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1414     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1415   }
1416 
1417   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1418   if (!sub_schurs->reuse_mumps) {
1419     IS       is_aux1,is_aux2;
1420     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1421 
1422     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1423     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1424     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1425     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1426     for (i=0; i<n_D; i++) {
1427       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1428     }
1429     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1430     for (i=0, j=0; i<n_R; i++) {
1431       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1432         aux_array1[j++] = i;
1433       }
1434     }
1435     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1436     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1437     for (i=0, j=0; i<n_B; i++) {
1438       if (!PetscBTLookup(bitmask,is_indices[i])) {
1439         aux_array2[j++] = i;
1440       }
1441     }
1442     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1443     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1444     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1445     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1446     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1447 
1448     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1449       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1450       for (i=0, j=0; i<n_R; i++) {
1451         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1452           aux_array1[j++] = i;
1453         }
1454       }
1455       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1456       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1457       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1458     }
1459     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1460     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1461   } else {
1462     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1463     IS               tis;
1464     PetscInt         schur_size;
1465 
1466     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1467     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1468     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1469     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1470     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1471       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1472       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1473       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1474     }
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1482 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1483 {
1484   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1485   PC_IS          *pcis = (PC_IS*)pc->data;
1486   PC             pc_temp;
1487   Mat            A_RR;
1488   MatReuse       reuse;
1489   PetscScalar    m_one = -1.0;
1490   PetscReal      value;
1491   PetscInt       n_D,n_R,ibs,mbs;
1492   PetscBool      use_exact,use_exact_reduced,issbaij;
1493   PetscErrorCode ierr;
1494   /* prefixes stuff */
1495   char           dir_prefix[256],neu_prefix[256],str_level[16];
1496   size_t         len;
1497 
1498   PetscFunctionBegin;
1499 
1500   /* compute prefixes */
1501   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1502   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1503   if (!pcbddc->current_level) {
1504     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1505     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1506     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1507     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1508   } else {
1509     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1510     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1511     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1512     len -= 15; /* remove "pc_bddc_coarse_" */
1513     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1514     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1515     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1516     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1517     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1518     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1519     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1520     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1521   }
1522 
1523   /* DIRICHLET PROBLEM */
1524   if (dirichlet) {
1525     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1526     if (pcbddc->local_mat->symmetric_set) {
1527       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1528     }
1529     /* Matrix for Dirichlet problem is pcis->A_II */
1530     n_D = pcis->n - pcis->n_B;
1531     if (!pcbddc->ksp_D) { /* create object if not yet build */
1532       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1533       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1534       /* default */
1535       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1536       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1537       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1538       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1539       if (issbaij) {
1540         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1541       } else {
1542         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1543       }
1544       /* Allow user's customization */
1545       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1546       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1547     }
1548     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1549     if (sub_schurs->reuse_mumps) {
1550       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1551 
1552       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1553     }
1554     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1555     if (!n_D) {
1556       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1557       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1558     }
1559     /* Set Up KSP for Dirichlet problem of BDDC */
1560     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1561     /* set ksp_D into pcis data */
1562     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1563     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1564     pcis->ksp_D = pcbddc->ksp_D;
1565   }
1566 
1567   /* NEUMANN PROBLEM */
1568   A_RR = 0;
1569   if (neumann) {
1570     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1571     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1572     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1573     if (pcbddc->ksp_R) { /* already created ksp */
1574       PetscInt nn_R;
1575       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1576       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1577       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1578       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1579         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1580         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1581         reuse = MAT_INITIAL_MATRIX;
1582       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1583         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1584           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1585           reuse = MAT_INITIAL_MATRIX;
1586         } else { /* safe to reuse the matrix */
1587           reuse = MAT_REUSE_MATRIX;
1588         }
1589       }
1590       /* last check */
1591       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1592         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1593         reuse = MAT_INITIAL_MATRIX;
1594       }
1595     } else { /* first time, so we need to create the matrix */
1596       reuse = MAT_INITIAL_MATRIX;
1597     }
1598     /* extract A_RR */
1599     if (!sub_schurs->reuse_mumps) {
1600       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1601       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1602       if (ibs != mbs) {
1603         Mat newmat;
1604         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1605         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1606         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1607       } else {
1608         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1609       }
1610       if (pcbddc->local_mat->symmetric_set) {
1611         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1612       }
1613     } else {
1614       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1615 
1616       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1617       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1618       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1619     }
1620     if (!pcbddc->ksp_R) { /* create object if not present */
1621       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1622       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1623       /* default */
1624       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1625       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1626       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1627       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1628       if (issbaij) {
1629         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1630       } else {
1631         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1632       }
1633       /* Allow user's customization */
1634       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1635       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1636     }
1637     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1638     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1639     if (!n_R) {
1640       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1641       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1642     }
1643     /* Reuse MUMPS solver if it is present */
1644     if (sub_schurs->reuse_mumps) {
1645       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1646 
1647       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1648     }
1649     /* Set Up KSP for Neumann problem of BDDC */
1650     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1651   }
1652   /* free Neumann problem's matrix */
1653   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1654 
1655   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1656   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1657     if (pcbddc->dbg_flag) {
1658       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1659       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1660       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1661     }
1662     if (dirichlet) { /* Dirichlet */
1663       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1664       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1665       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1666       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1667       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1668       /* need to be adapted? */
1669       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1670       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1671       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1672       /* print info */
1673       if (pcbddc->dbg_flag) {
1674         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1675         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1676       }
1677       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1678         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1679       }
1680     }
1681     if (neumann) { /* Neumann */
1682       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1683       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1684       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1685       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1686       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1687       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1688       /* need to be adapted? */
1689       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1690       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1691       /* print info */
1692       if (pcbddc->dbg_flag) {
1693         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1694         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1695       }
1696       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1697         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1698       }
1699     }
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1706 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1707 {
1708   PetscErrorCode  ierr;
1709   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1710   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1711 
1712   PetscFunctionBegin;
1713   if (!sub_schurs->reuse_mumps) {
1714     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1715   }
1716   if (!pcbddc->switch_static) {
1717     if (applytranspose && pcbddc->local_auxmat1) {
1718       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1719       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1720     }
1721     if (!sub_schurs->reuse_mumps) {
1722       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1724     } else {
1725       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1726 
1727       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     }
1730   } else {
1731     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1732     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1733     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1734     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1735     if (applytranspose && pcbddc->local_auxmat1) {
1736       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1737       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1738       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1739       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1740     }
1741   }
1742   if (!sub_schurs->reuse_mumps) {
1743     if (applytranspose) {
1744       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1745     } else {
1746       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1747     }
1748 #if defined(PETSC_HAVE_MUMPS)
1749   } else {
1750     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1751 
1752     if (applytranspose) {
1753       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1754     } else {
1755       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1756     }
1757 #endif
1758   }
1759   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1760   if (!pcbddc->switch_static) {
1761     if (!sub_schurs->reuse_mumps) {
1762       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1763       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1764     } else {
1765       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1766 
1767       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1768       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1769     }
1770     if (!applytranspose && pcbddc->local_auxmat1) {
1771       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1772       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1773     }
1774   } else {
1775     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1776     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1777     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1778     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1779     if (!applytranspose && pcbddc->local_auxmat1) {
1780       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1781       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1782     }
1783     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1785     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1786     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1787   }
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1792 #undef __FUNCT__
1793 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1794 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1795 {
1796   PetscErrorCode ierr;
1797   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1798   PC_IS*            pcis = (PC_IS*)  (pc->data);
1799   const PetscScalar zero = 0.0;
1800 
1801   PetscFunctionBegin;
1802   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1803   if (applytranspose) {
1804     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1805     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1806   } else {
1807     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1808     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1809   }
1810   /* start communications from local primal nodes to rhs of coarse solver */
1811   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1812   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1813   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1814 
1815   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1816   /* TODO remove null space when doing multilevel */
1817   if (pcbddc->coarse_ksp) {
1818     Vec rhs,sol;
1819 
1820     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1821     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1822     if (applytranspose) {
1823       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1824     } else {
1825       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1826     }
1827   }
1828 
1829   /* Local solution on R nodes */
1830   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1831     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1832   }
1833 
1834   /* communications from coarse sol to local primal nodes */
1835   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1836   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1837 
1838   /* Sum contributions from two levels */
1839   if (applytranspose) {
1840     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1841     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1842   } else {
1843     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1844     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1851 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1852 {
1853   PetscErrorCode ierr;
1854   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1855   PetscScalar    *array;
1856   Vec            from,to;
1857 
1858   PetscFunctionBegin;
1859   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1860     from = pcbddc->coarse_vec;
1861     to = pcbddc->vec1_P;
1862     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1863       Vec tvec;
1864 
1865       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1866       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1867       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1868       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1869       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1870       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1871     }
1872   } else { /* from local to global -> put data in coarse right hand side */
1873     from = pcbddc->vec1_P;
1874     to = pcbddc->coarse_vec;
1875   }
1876   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1882 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1883 {
1884   PetscErrorCode ierr;
1885   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1886   PetscScalar    *array;
1887   Vec            from,to;
1888 
1889   PetscFunctionBegin;
1890   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1891     from = pcbddc->coarse_vec;
1892     to = pcbddc->vec1_P;
1893   } else { /* from local to global -> put data in coarse right hand side */
1894     from = pcbddc->vec1_P;
1895     to = pcbddc->coarse_vec;
1896   }
1897   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1898   if (smode == SCATTER_FORWARD) {
1899     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1900       Vec tvec;
1901 
1902       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1903       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1904       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1905       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1906     }
1907   } else {
1908     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1909      ierr = VecResetArray(from);CHKERRQ(ierr);
1910     }
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 /* uncomment for testing purposes */
1916 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1917 #undef __FUNCT__
1918 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1919 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1920 {
1921   PetscErrorCode    ierr;
1922   PC_IS*            pcis = (PC_IS*)(pc->data);
1923   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1924   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1925   /* one and zero */
1926   PetscScalar       one=1.0,zero=0.0;
1927   /* space to store constraints and their local indices */
1928   PetscScalar       *constraints_data;
1929   PetscInt          *constraints_idxs,*constraints_idxs_B;
1930   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1931   PetscInt          *constraints_n;
1932   /* iterators */
1933   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1934   /* BLAS integers */
1935   PetscBLASInt      lwork,lierr;
1936   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1937   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1938   /* reuse */
1939   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1940   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1941   /* change of basis */
1942   PetscBool         qr_needed;
1943   PetscBT           change_basis,qr_needed_idx;
1944   /* auxiliary stuff */
1945   PetscInt          *nnz,*is_indices;
1946   PetscInt          ncc;
1947   /* some quantities */
1948   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1949   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1950 
1951   PetscFunctionBegin;
1952   /* Destroy Mat objects computed previously */
1953   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1954   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1955   /* save info on constraints from previous setup (if any) */
1956   olocal_primal_size = pcbddc->local_primal_size;
1957   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1958   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1959   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1960   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1961   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1962   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1963 
1964   /* print some info */
1965   if (pcbddc->dbg_flag) {
1966     IS       vertices;
1967     PetscInt nv,nedges,nfaces;
1968     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1969     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1970     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1971     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1972     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1973     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1974     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1975     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1976     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1977   }
1978 
1979   if (!pcbddc->adaptive_selection) {
1980     IS           ISForVertices,*ISForFaces,*ISForEdges;
1981     MatNullSpace nearnullsp;
1982     const Vec    *nearnullvecs;
1983     Vec          *localnearnullsp;
1984     PetscScalar  *array;
1985     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1986     PetscBool    nnsp_has_cnst;
1987     /* LAPACK working arrays for SVD or POD */
1988     PetscBool    skip_lapack,boolforchange;
1989     PetscScalar  *work;
1990     PetscReal    *singular_vals;
1991 #if defined(PETSC_USE_COMPLEX)
1992     PetscReal    *rwork;
1993 #endif
1994 #if defined(PETSC_MISSING_LAPACK_GESVD)
1995     PetscScalar  *temp_basis,*correlation_mat;
1996 #else
1997     PetscBLASInt dummy_int=1;
1998     PetscScalar  dummy_scalar=1.;
1999 #endif
2000 
2001     /* Get index sets for faces, edges and vertices from graph */
2002     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2003     /* free unneeded index sets */
2004     if (!pcbddc->use_vertices) {
2005       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2006     }
2007     if (!pcbddc->use_edges) {
2008       for (i=0;i<n_ISForEdges;i++) {
2009         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2010       }
2011       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2012       n_ISForEdges = 0;
2013     }
2014     if (!pcbddc->use_faces) {
2015       for (i=0;i<n_ISForFaces;i++) {
2016         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2017       }
2018       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2019       n_ISForFaces = 0;
2020     }
2021 
2022 #if defined(PETSC_USE_DEBUG)
2023     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2024        Also use_change_of_basis should be consistent among processors */
2025     if (pcbddc->NullSpace) {
2026       PetscBool tbool[2],gbool[2];
2027 
2028       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2029         pcbddc->use_change_of_basis = PETSC_TRUE;
2030         if (!ISForEdges) {
2031           pcbddc->use_change_on_faces = PETSC_TRUE;
2032         }
2033       }
2034       tbool[0] = pcbddc->use_change_of_basis;
2035       tbool[1] = pcbddc->use_change_on_faces;
2036       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2037       pcbddc->use_change_of_basis = gbool[0];
2038       pcbddc->use_change_on_faces = gbool[1];
2039     }
2040 #endif
2041 
2042     /* check if near null space is attached to global mat */
2043     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2044     if (nearnullsp) {
2045       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2046       /* remove any stored info */
2047       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2048       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2049       /* store information for BDDC solver reuse */
2050       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2051       pcbddc->onearnullspace = nearnullsp;
2052       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2053       for (i=0;i<nnsp_size;i++) {
2054         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2055       }
2056     } else { /* if near null space is not provided BDDC uses constants by default */
2057       nnsp_size = 0;
2058       nnsp_has_cnst = PETSC_TRUE;
2059     }
2060     /* get max number of constraints on a single cc */
2061     max_constraints = nnsp_size;
2062     if (nnsp_has_cnst) max_constraints++;
2063 
2064     /*
2065          Evaluate maximum storage size needed by the procedure
2066          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2067          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2068          There can be multiple constraints per connected component
2069                                                                                                                                                            */
2070     n_vertices = 0;
2071     if (ISForVertices) {
2072       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2073     }
2074     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2075     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2076 
2077     total_counts = n_ISForFaces+n_ISForEdges;
2078     total_counts *= max_constraints;
2079     total_counts += n_vertices;
2080     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2081 
2082     total_counts = 0;
2083     max_size_of_constraint = 0;
2084     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2085       IS used_is;
2086       if (i<n_ISForEdges) {
2087         used_is = ISForEdges[i];
2088       } else {
2089         used_is = ISForFaces[i-n_ISForEdges];
2090       }
2091       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2092       total_counts += j;
2093       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2094     }
2095     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2096 
2097     /* get local part of global near null space vectors */
2098     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2099     for (k=0;k<nnsp_size;k++) {
2100       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2101       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103     }
2104 
2105     /* whether or not to skip lapack calls */
2106     skip_lapack = PETSC_TRUE;
2107     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2108 
2109     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2110     if (!skip_lapack) {
2111       PetscScalar temp_work;
2112 
2113 #if defined(PETSC_MISSING_LAPACK_GESVD)
2114       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2115       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2116       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2117       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2118 #if defined(PETSC_USE_COMPLEX)
2119       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2120 #endif
2121       /* now we evaluate the optimal workspace using query with lwork=-1 */
2122       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2123       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2124       lwork = -1;
2125       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2126 #if !defined(PETSC_USE_COMPLEX)
2127       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2128 #else
2129       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2130 #endif
2131       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2132       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2133 #else /* on missing GESVD */
2134       /* SVD */
2135       PetscInt max_n,min_n;
2136       max_n = max_size_of_constraint;
2137       min_n = max_constraints;
2138       if (max_size_of_constraint < max_constraints) {
2139         min_n = max_size_of_constraint;
2140         max_n = max_constraints;
2141       }
2142       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2143 #if defined(PETSC_USE_COMPLEX)
2144       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2145 #endif
2146       /* now we evaluate the optimal workspace using query with lwork=-1 */
2147       lwork = -1;
2148       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2149       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2150       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2151       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2152 #if !defined(PETSC_USE_COMPLEX)
2153       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2154 #else
2155       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2156 #endif
2157       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2158       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2159 #endif /* on missing GESVD */
2160       /* Allocate optimal workspace */
2161       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2162       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2163     }
2164     /* Now we can loop on constraining sets */
2165     total_counts = 0;
2166     constraints_idxs_ptr[0] = 0;
2167     constraints_data_ptr[0] = 0;
2168     /* vertices */
2169     if (n_vertices) {
2170       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2171       if (nnsp_has_cnst) { /* it considers all possible vertices */
2172         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2173         for (i=0;i<n_vertices;i++) {
2174           constraints_n[total_counts] = 1;
2175           constraints_data[total_counts] = 1.0;
2176           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2177           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2178           total_counts++;
2179         }
2180       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2181         PetscBool used_vertex;
2182         for (i=0;i<n_vertices;i++) {
2183           used_vertex = PETSC_FALSE;
2184           k = 0;
2185           while (!used_vertex && k<nnsp_size) {
2186             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2187             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2188               constraints_n[total_counts] = 1;
2189               constraints_idxs[total_counts] = is_indices[i];
2190               constraints_data[total_counts] = 1.0;
2191               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2192               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2193               total_counts++;
2194               used_vertex = PETSC_TRUE;
2195             }
2196             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2197             k++;
2198           }
2199         }
2200       }
2201       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2202       n_vertices = total_counts;
2203     }
2204 
2205     /* edges and faces */
2206     total_counts_cc = total_counts;
2207     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2208       IS        used_is;
2209       PetscBool idxs_copied = PETSC_FALSE;
2210 
2211       if (ncc<n_ISForEdges) {
2212         used_is = ISForEdges[ncc];
2213         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2214       } else {
2215         used_is = ISForFaces[ncc-n_ISForEdges];
2216         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2217       }
2218       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2219 
2220       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2221       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2222       /* change of basis should not be performed on local periodic nodes */
2223       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2224       if (nnsp_has_cnst) {
2225         PetscScalar quad_value;
2226 
2227         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2228         idxs_copied = PETSC_TRUE;
2229 
2230         if (!pcbddc->use_nnsp_true) {
2231           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2232         } else {
2233           quad_value = 1.0;
2234         }
2235         for (j=0;j<size_of_constraint;j++) {
2236           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2237         }
2238         temp_constraints++;
2239         total_counts++;
2240       }
2241       for (k=0;k<nnsp_size;k++) {
2242         PetscReal real_value;
2243         PetscScalar *ptr_to_data;
2244 
2245         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2246         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2247         for (j=0;j<size_of_constraint;j++) {
2248           ptr_to_data[j] = array[is_indices[j]];
2249         }
2250         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2251         /* check if array is null on the connected component */
2252         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2253         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2254         if (real_value > 0.0) { /* keep indices and values */
2255           temp_constraints++;
2256           total_counts++;
2257           if (!idxs_copied) {
2258             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2259             idxs_copied = PETSC_TRUE;
2260           }
2261         }
2262       }
2263       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2264       valid_constraints = temp_constraints;
2265       if (!pcbddc->use_nnsp_true && temp_constraints) {
2266         if (temp_constraints == 1) { /* just normalize the constraint */
2267           PetscScalar norm,*ptr_to_data;
2268 
2269           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2270           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2271           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2272           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2273           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2274         } else { /* perform SVD */
2275           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2276           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2277 
2278 #if defined(PETSC_MISSING_LAPACK_GESVD)
2279           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2280              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2281              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2282                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2283                 from that computed using LAPACKgesvd
2284              -> This is due to a different computation of eigenvectors in LAPACKheev
2285              -> The quality of the POD-computed basis will be the same */
2286           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2287           /* Store upper triangular part of correlation matrix */
2288           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2289           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2290           for (j=0;j<temp_constraints;j++) {
2291             for (k=0;k<j+1;k++) {
2292               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2293             }
2294           }
2295           /* compute eigenvalues and eigenvectors of correlation matrix */
2296           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2297           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2298 #if !defined(PETSC_USE_COMPLEX)
2299           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2300 #else
2301           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2302 #endif
2303           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2304           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2305           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2306           j = 0;
2307           while (j < temp_constraints && singular_vals[j] < tol) j++;
2308           total_counts = total_counts-j;
2309           valid_constraints = temp_constraints-j;
2310           /* scale and copy POD basis into used quadrature memory */
2311           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2312           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2313           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2314           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2315           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2316           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2317           if (j<temp_constraints) {
2318             PetscInt ii;
2319             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2320             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2321             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2322             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2323             for (k=0;k<temp_constraints-j;k++) {
2324               for (ii=0;ii<size_of_constraint;ii++) {
2325                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2326               }
2327             }
2328           }
2329 #else  /* on missing GESVD */
2330           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2331           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2332           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2333           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2334 #if !defined(PETSC_USE_COMPLEX)
2335           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2336 #else
2337           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2338 #endif
2339           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2340           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2341           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2342           k = temp_constraints;
2343           if (k > size_of_constraint) k = size_of_constraint;
2344           j = 0;
2345           while (j < k && singular_vals[k-j-1] < tol) j++;
2346           valid_constraints = k-j;
2347           total_counts = total_counts-temp_constraints+valid_constraints;
2348 #endif /* on missing GESVD */
2349         }
2350       }
2351       /* update pointers information */
2352       if (valid_constraints) {
2353         constraints_n[total_counts_cc] = valid_constraints;
2354         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2355         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2356         /* set change_of_basis flag */
2357         if (boolforchange) {
2358           PetscBTSet(change_basis,total_counts_cc);
2359         }
2360         total_counts_cc++;
2361       }
2362     }
2363     /* free workspace */
2364     if (!skip_lapack) {
2365       ierr = PetscFree(work);CHKERRQ(ierr);
2366 #if defined(PETSC_USE_COMPLEX)
2367       ierr = PetscFree(rwork);CHKERRQ(ierr);
2368 #endif
2369       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2370 #if defined(PETSC_MISSING_LAPACK_GESVD)
2371       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2372       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2373 #endif
2374     }
2375     for (k=0;k<nnsp_size;k++) {
2376       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2377     }
2378     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2379     /* free index sets of faces, edges and vertices */
2380     for (i=0;i<n_ISForFaces;i++) {
2381       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2382     }
2383     if (n_ISForFaces) {
2384       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2385     }
2386     for (i=0;i<n_ISForEdges;i++) {
2387       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2388     }
2389     if (n_ISForEdges) {
2390       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2391     }
2392     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2393   } else {
2394     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2395 
2396     total_counts = 0;
2397     n_vertices = 0;
2398     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2399       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2400     }
2401     max_constraints = 0;
2402     total_counts_cc = 0;
2403     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2404       total_counts += pcbddc->adaptive_constraints_n[i];
2405       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2406       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2407     }
2408     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2409     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2410     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2411     constraints_data = pcbddc->adaptive_constraints_data;
2412     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2413     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2414     total_counts_cc = 0;
2415     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2416       if (pcbddc->adaptive_constraints_n[i]) {
2417         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2418       }
2419     }
2420 #if 0
2421     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2422     for (i=0;i<total_counts_cc;i++) {
2423       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2424       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2425       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2426         printf(" %d",constraints_idxs[j]);
2427       }
2428       printf("\n");
2429       printf("number of cc: %d\n",constraints_n[i]);
2430     }
2431     for (i=0;i<n_vertices;i++) {
2432       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2433     }
2434     for (i=0;i<sub_schurs->n_subs;i++) {
2435       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2436     }
2437 #endif
2438 
2439     max_size_of_constraint = 0;
2440     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2441     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2442     /* Change of basis */
2443     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2444     if (pcbddc->use_change_of_basis) {
2445       for (i=0;i<sub_schurs->n_subs;i++) {
2446         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2447           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2448         }
2449       }
2450     }
2451   }
2452   pcbddc->local_primal_size = total_counts;
2453   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2454 
2455   /* map constraints_idxs in boundary numbering */
2456   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2457   if (i != constraints_idxs_ptr[total_counts_cc]) {
2458     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2459   }
2460 
2461   /* Create constraint matrix */
2462   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2463   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2464   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2465 
2466   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2467   /* determine if a QR strategy is needed for change of basis */
2468   qr_needed = PETSC_FALSE;
2469   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2470   total_primal_vertices=0;
2471   pcbddc->local_primal_size_cc = 0;
2472   for (i=0;i<total_counts_cc;i++) {
2473     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2474     if (size_of_constraint == 1) {
2475       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2476       pcbddc->local_primal_size_cc += 1;
2477     } else if (PetscBTLookup(change_basis,i)) {
2478       for (k=0;k<constraints_n[i];k++) {
2479         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2480       }
2481       pcbddc->local_primal_size_cc += constraints_n[i];
2482       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2483         PetscBTSet(qr_needed_idx,i);
2484         qr_needed = PETSC_TRUE;
2485       }
2486     } else {
2487       pcbddc->local_primal_size_cc += 1;
2488     }
2489   }
2490   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2491   pcbddc->n_vertices = total_primal_vertices;
2492   /* permute indices in order to have a sorted set of vertices */
2493   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2494 
2495   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2496   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2497   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2498 
2499   /* nonzero structure of constraint matrix */
2500   /* and get reference dof for local constraints */
2501   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2502   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2503 
2504   j = total_primal_vertices;
2505   total_counts = total_primal_vertices;
2506   cum = total_primal_vertices;
2507   for (i=n_vertices;i<total_counts_cc;i++) {
2508     if (!PetscBTLookup(change_basis,i)) {
2509       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2510       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2511       cum++;
2512       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2513       for (k=0;k<constraints_n[i];k++) {
2514         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2515         nnz[j+k] = size_of_constraint;
2516       }
2517       j += constraints_n[i];
2518     }
2519   }
2520   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2521   ierr = PetscFree(nnz);CHKERRQ(ierr);
2522 
2523   /* set values in constraint matrix */
2524   for (i=0;i<total_primal_vertices;i++) {
2525     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2526   }
2527   total_counts = total_primal_vertices;
2528   for (i=n_vertices;i<total_counts_cc;i++) {
2529     if (!PetscBTLookup(change_basis,i)) {
2530       PetscInt *cols;
2531 
2532       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2533       cols = constraints_idxs+constraints_idxs_ptr[i];
2534       for (k=0;k<constraints_n[i];k++) {
2535         PetscInt    row = total_counts+k;
2536         PetscScalar *vals;
2537 
2538         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2539         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2540       }
2541       total_counts += constraints_n[i];
2542     }
2543   }
2544   /* assembling */
2545   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2546   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2547 
2548   /*
2549   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2550   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2551   */
2552   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2553   if (pcbddc->use_change_of_basis) {
2554     /* dual and primal dofs on a single cc */
2555     PetscInt     dual_dofs,primal_dofs;
2556     /* working stuff for GEQRF */
2557     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2558     PetscBLASInt lqr_work;
2559     /* working stuff for UNGQR */
2560     PetscScalar  *gqr_work,lgqr_work_t;
2561     PetscBLASInt lgqr_work;
2562     /* working stuff for TRTRS */
2563     PetscScalar  *trs_rhs;
2564     PetscBLASInt Blas_NRHS;
2565     /* pointers for values insertion into change of basis matrix */
2566     PetscInt     *start_rows,*start_cols;
2567     PetscScalar  *start_vals;
2568     /* working stuff for values insertion */
2569     PetscBT      is_primal;
2570     PetscInt     *aux_primal_numbering_B;
2571     /* matrix sizes */
2572     PetscInt     global_size,local_size;
2573     /* temporary change of basis */
2574     Mat          localChangeOfBasisMatrix;
2575     /* extra space for debugging */
2576     PetscScalar  *dbg_work;
2577 
2578     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2579     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2580     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2581     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2582     /* nonzeros for local mat */
2583     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2584     for (i=0;i<pcis->n;i++) nnz[i]=1;
2585     for (i=n_vertices;i<total_counts_cc;i++) {
2586       if (PetscBTLookup(change_basis,i)) {
2587         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2588         if (PetscBTLookup(qr_needed_idx,i)) {
2589           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2590         } else {
2591           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2592           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2593         }
2594       }
2595     }
2596     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2597     ierr = PetscFree(nnz);CHKERRQ(ierr);
2598     /* Set initial identity in the matrix */
2599     for (i=0;i<pcis->n;i++) {
2600       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2601     }
2602 
2603     if (pcbddc->dbg_flag) {
2604       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2605       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2606     }
2607 
2608 
2609     /* Now we loop on the constraints which need a change of basis */
2610     /*
2611        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2612        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2613 
2614        Basic blocks of change of basis matrix T computed by
2615 
2616           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2617 
2618             | 1        0   ...        0         s_1/S |
2619             | 0        1   ...        0         s_2/S |
2620             |              ...                        |
2621             | 0        ...            1     s_{n-1}/S |
2622             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2623 
2624             with S = \sum_{i=1}^n s_i^2
2625             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2626                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2627 
2628           - QR decomposition of constraints otherwise
2629     */
2630     if (qr_needed) {
2631       /* space to store Q */
2632       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2633       /* first we issue queries for optimal work */
2634       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2635       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2636       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2637       lqr_work = -1;
2638       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2639       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2640       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2641       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2642       lgqr_work = -1;
2643       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2644       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2645       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2646       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2647       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2648       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2649       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2650       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2651       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2652       /* array to store scaling factors for reflectors */
2653       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2654       /* array to store rhs and solution of triangular solver */
2655       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2656       /* allocating workspace for check */
2657       if (pcbddc->dbg_flag) {
2658         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2659       }
2660     }
2661     /* array to store whether a node is primal or not */
2662     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2663     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2664     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2665     if (i != total_primal_vertices) {
2666       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2667     }
2668     for (i=0;i<total_primal_vertices;i++) {
2669       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2670     }
2671     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2672 
2673     /* loop on constraints and see whether or not they need a change of basis and compute it */
2674     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2675       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2676       if (PetscBTLookup(change_basis,total_counts)) {
2677         /* get constraint info */
2678         primal_dofs = constraints_n[total_counts];
2679         dual_dofs = size_of_constraint-primal_dofs;
2680 
2681         if (pcbddc->dbg_flag) {
2682           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2683         }
2684 
2685         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2686 
2687           /* copy quadrature constraints for change of basis check */
2688           if (pcbddc->dbg_flag) {
2689             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2690           }
2691           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2692           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2693 
2694           /* compute QR decomposition of constraints */
2695           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2696           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2697           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2698           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2699           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2700           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2701           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2702 
2703           /* explictly compute R^-T */
2704           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2705           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2706           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2707           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2708           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2709           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2710           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2711           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2712           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2713           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2714 
2715           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2717           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2718           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2719           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2720           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2721           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2722           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2723           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2724 
2725           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2726              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2727              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2728           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2729           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2730           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2731           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2732           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2733           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2734           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2735           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2736           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2737           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2738 
2739           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2740           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2741           /* insert cols for primal dofs */
2742           for (j=0;j<primal_dofs;j++) {
2743             start_vals = &qr_basis[j*size_of_constraint];
2744             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2745             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2746           }
2747           /* insert cols for dual dofs */
2748           for (j=0,k=0;j<dual_dofs;k++) {
2749             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2750               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2751               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2752               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2753               j++;
2754             }
2755           }
2756 
2757           /* check change of basis */
2758           if (pcbddc->dbg_flag) {
2759             PetscInt   ii,jj;
2760             PetscBool valid_qr=PETSC_TRUE;
2761             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2763             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2764             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2765             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2766             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2767             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2768             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2769             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2770             for (jj=0;jj<size_of_constraint;jj++) {
2771               for (ii=0;ii<primal_dofs;ii++) {
2772                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2773                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2774               }
2775             }
2776             if (!valid_qr) {
2777               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2778               for (jj=0;jj<size_of_constraint;jj++) {
2779                 for (ii=0;ii<primal_dofs;ii++) {
2780                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2781                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2782                   }
2783                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2784                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2785                   }
2786                 }
2787               }
2788             } else {
2789               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2790             }
2791           }
2792         } else { /* simple transformation block */
2793           PetscInt    row,col;
2794           PetscScalar val,norm;
2795 
2796           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2797           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2798           for (j=0;j<size_of_constraint;j++) {
2799             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2800             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2801             if (!PetscBTLookup(is_primal,row_B)) {
2802               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2803               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2804               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2805             } else {
2806               for (k=0;k<size_of_constraint;k++) {
2807                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2808                 if (row != col) {
2809                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2810                 } else {
2811                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2812                 }
2813                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2814               }
2815             }
2816           }
2817           if (pcbddc->dbg_flag) {
2818             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2819           }
2820         }
2821       } else {
2822         if (pcbddc->dbg_flag) {
2823           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2824         }
2825       }
2826     }
2827 
2828     /* free workspace */
2829     if (qr_needed) {
2830       if (pcbddc->dbg_flag) {
2831         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2832       }
2833       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2834       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2835       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2836       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2837       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2838     }
2839     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2840     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842 
2843     /* assembling of global change of variable */
2844     {
2845       Mat      tmat;
2846       PetscInt bs;
2847 
2848       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2849       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2850       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2851       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2852       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2853       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2854       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2855       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2856       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2857       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2858       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2859       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2860       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2861       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2862       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2863       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2864       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2865       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2866     }
2867     /* check */
2868     if (pcbddc->dbg_flag) {
2869       PetscReal error;
2870       Vec       x,x_change;
2871 
2872       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2873       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2874       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2875       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2876       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2877       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2878       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2879       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2880       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2881       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2882       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2883       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2884       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2885       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2886       ierr = VecDestroy(&x);CHKERRQ(ierr);
2887       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2888     }
2889 
2890     /* adapt sub_schurs computed (if any) */
2891     if (pcbddc->use_deluxe_scaling) {
2892       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2893       if (sub_schurs->S_Ej_all) {
2894         Mat S_new,tmat;
2895         IS is_all_N;
2896 
2897         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2898         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2899         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2900         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2901         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2902         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2903         sub_schurs->S_Ej_all = S_new;
2904         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2905         if (sub_schurs->sum_S_Ej_all) {
2906           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2907           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2908           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2909           sub_schurs->sum_S_Ej_all = S_new;
2910           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2911         }
2912         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2913       }
2914     }
2915     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2916   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2917     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2918     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2919   }
2920 
2921   /* set up change of basis context */
2922   if (pcbddc->ChangeOfBasisMatrix) {
2923     PCBDDCChange_ctx change_ctx;
2924 
2925     if (!pcbddc->new_global_mat) {
2926       PetscInt global_size,local_size;
2927 
2928       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2929       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2930       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2931       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2932       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2933       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2934       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2935       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2936       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2937     } else {
2938       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2939       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2940       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2941     }
2942     if (!pcbddc->user_ChangeOfBasisMatrix) {
2943       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2944       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2945     } else {
2946       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2947       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2948     }
2949     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2950     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2951     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   }
2954 
2955   /* check if a new primal space has been introduced */
2956   pcbddc->new_primal_space_local = PETSC_TRUE;
2957   if (olocal_primal_size == pcbddc->local_primal_size) {
2958     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2959     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2960     if (!pcbddc->new_primal_space_local) {
2961       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2962       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2963     }
2964   }
2965   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2966   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2967   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2968 
2969   /* flush dbg viewer */
2970   if (pcbddc->dbg_flag) {
2971     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2972   }
2973 
2974   /* free workspace */
2975   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2976   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2977   if (!pcbddc->adaptive_selection) {
2978     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2979     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2980   } else {
2981     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2982                       pcbddc->adaptive_constraints_idxs_ptr,
2983                       pcbddc->adaptive_constraints_data_ptr,
2984                       pcbddc->adaptive_constraints_idxs,
2985                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2986     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2987     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2988   }
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 #undef __FUNCT__
2993 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2994 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2995 {
2996   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2997   PC_IS       *pcis = (PC_IS*)pc->data;
2998   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2999   PetscInt    ierr,i,vertex_size,N;
3000   PetscViewer viewer=pcbddc->dbg_viewer;
3001 
3002   PetscFunctionBegin;
3003   /* Reset previously computed graph */
3004   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3005   /* Init local Graph struct */
3006   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3007   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
3008 
3009   /* Check validity of the csr graph passed in by the user */
3010   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3011     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3012   }
3013 
3014   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3015   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3016     PetscInt  *xadj,*adjncy;
3017     PetscInt  nvtxs;
3018     PetscBool flg_row=PETSC_FALSE;
3019 
3020     if (pcbddc->use_local_adj) {
3021 
3022       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3023       if (flg_row) {
3024         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3025         pcbddc->computed_rowadj = PETSC_TRUE;
3026       }
3027       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3028     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3029       IS                     is_dummy;
3030       ISLocalToGlobalMapping l2gmap_dummy;
3031       PetscInt               j,sum;
3032       PetscInt               *cxadj,*cadjncy;
3033       const PetscInt         *idxs;
3034       PCBDDCGraph            graph;
3035       PetscBT                is_on_boundary;
3036 
3037       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3038       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3039       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3040       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3041       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3042       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3043       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3044       if (flg_row) {
3045         graph->xadj = xadj;
3046         graph->adjncy = adjncy;
3047       }
3048       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3049       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3050       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3051 
3052       if (pcbddc->dbg_flag) {
3053         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3054         for (i=0;i<graph->ncc;i++) {
3055           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3056         }
3057       }
3058 
3059       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3060       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3061       for (i=0;i<pcis->n_B;i++) {
3062         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3063       }
3064       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3065 
3066       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3067       sum = 0;
3068       for (i=0;i<graph->ncc;i++) {
3069         PetscInt sizecc = 0;
3070         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3071           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3072             sizecc++;
3073           }
3074         }
3075         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3076           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3077             cxadj[graph->queue[j]] = sizecc;
3078           }
3079         }
3080         sum += sizecc*sizecc;
3081       }
3082       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3083       sum = 0;
3084       for (i=0;i<pcis->n;i++) {
3085         PetscInt temp = cxadj[i];
3086         cxadj[i] = sum;
3087         sum += temp;
3088       }
3089       cxadj[pcis->n] = sum;
3090       for (i=0;i<graph->ncc;i++) {
3091         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3092           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3093             PetscInt k,sizecc = 0;
3094             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3095               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3096                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3097                 sizecc++;
3098               }
3099             }
3100           }
3101         }
3102       }
3103       if (sum) {
3104         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3105       } else {
3106         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3107         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3108       }
3109       graph->xadj = 0;
3110       graph->adjncy = 0;
3111       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3112       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3113     }
3114   }
3115   if (pcbddc->dbg_flag) {
3116     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3117   }
3118 
3119   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3120   vertex_size = 1;
3121   if (pcbddc->user_provided_isfordofs) {
3122     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3123       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3124       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3125         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3126         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3127       }
3128       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3129       pcbddc->n_ISForDofs = 0;
3130       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3131     }
3132     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3133     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3134   } else {
3135     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3136       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3137       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3138       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3139         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3140       }
3141     }
3142   }
3143 
3144   /* Setup of Graph */
3145   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3146     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3147   }
3148   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3149     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3150   }
3151   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3152 
3153   /* Graph's connected components analysis */
3154   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3155 
3156   /* print some info to stdout */
3157   if (pcbddc->dbg_flag) {
3158     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3159   }
3160 
3161   /* mark topography has done */
3162   pcbddc->recompute_topography = PETSC_FALSE;
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "PCBDDCSubsetNumbering"
3168 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3169 {
3170   Vec            local_vec,global_vec;
3171   IS             seqis,paris;
3172   VecScatter     scatter_ctx;
3173   PetscScalar    *array;
3174   PetscInt       *temp_global_dofs;
3175   PetscScalar    globalsum;
3176   PetscInt       i,j,s;
3177   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3178   PetscMPIInt    rank_prec_comm,size_prec_comm;
3179   PetscInt       *dof_sizes,*dof_displs;
3180   PetscBool      first_found;
3181   PetscErrorCode ierr;
3182 
3183   PetscFunctionBegin;
3184   /* mpi buffers */
3185   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3186   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3187   j = ( !rank_prec_comm ? size_prec_comm : 0);
3188   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3189   /* get maximum size of subset */
3190   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3191   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3192   max_local = 0;
3193   for (i=0;i<n_local_dofs;i++) {
3194     if (max_local < temp_global_dofs[i] ) {
3195       max_local = temp_global_dofs[i];
3196     }
3197   }
3198   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3199   max_global++;
3200   max_local = 0;
3201   for (i=0;i<n_local_dofs;i++) {
3202     if (max_local < local_dofs[i] ) {
3203       max_local = local_dofs[i];
3204     }
3205   }
3206   max_local++;
3207   /* allocate workspace */
3208   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3209   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3210   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3211   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3212   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3213   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3214   /* create scatter */
3215   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3216   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3217   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3218   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3219   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3220   /* init array */
3221   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3222   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3223   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3224   if (local_dofs_mult) {
3225     for (i=0;i<n_local_dofs;i++) {
3226       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3227     }
3228   } else {
3229     for (i=0;i<n_local_dofs;i++) {
3230       array[local_dofs[i]]=1.0;
3231     }
3232   }
3233   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3234   /* scatter into global vec and get total number of global dofs */
3235   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3237   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3238   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3239   /* Fill global_vec with cumulative function for global numbering */
3240   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3241   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3242   nlocals = 0;
3243   first_index = -1;
3244   first_found = PETSC_FALSE;
3245   for (i=0;i<s;i++) {
3246     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3247       first_found = PETSC_TRUE;
3248       first_index = i;
3249     }
3250     nlocals += (PetscInt)PetscRealPart(array[i]);
3251   }
3252   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3253   if (!rank_prec_comm) {
3254     dof_displs[0]=0;
3255     for (i=1;i<size_prec_comm;i++) {
3256       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3257     }
3258   }
3259   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3260   if (first_found) {
3261     array[first_index] += (PetscScalar)nlocals;
3262     old_index = first_index;
3263     for (i=first_index+1;i<s;i++) {
3264       if (PetscRealPart(array[i]) > 0.1) {
3265         array[i] += array[old_index];
3266         old_index = i;
3267       }
3268     }
3269   }
3270   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3271   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3272   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3273   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3274   /* get global ordering of local dofs */
3275   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3276   if (local_dofs_mult) {
3277     for (i=0;i<n_local_dofs;i++) {
3278       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3279     }
3280   } else {
3281     for (i=0;i<n_local_dofs;i++) {
3282       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3283     }
3284   }
3285   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3286   /* free workspace */
3287   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3288   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3289   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3290   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3291   /* return pointer to global ordering of local dofs */
3292   *global_numbering_subset = temp_global_dofs;
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3298 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3299 {
3300   PetscInt       i,j;
3301   PetscScalar    *alphas;
3302   PetscErrorCode ierr;
3303 
3304   PetscFunctionBegin;
3305   /* this implements stabilized Gram-Schmidt */
3306   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3307   for (i=0;i<n;i++) {
3308     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3309     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3310     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3311   }
3312   ierr = PetscFree(alphas);CHKERRQ(ierr);
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatISGetSubassemblingPattern"
3318 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3319 {
3320   Mat             subdomain_adj;
3321   IS              new_ranks,ranks_send_to;
3322   MatPartitioning partitioner;
3323   Mat_IS          *matis;
3324   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3325   PetscInt        prank;
3326   PetscMPIInt     size,rank,color;
3327   PetscInt        *xadj,*adjncy,*oldranks;
3328   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3329   PetscInt        i,local_size,threshold=0;
3330   PetscErrorCode  ierr;
3331   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3332   PetscSubcomm    subcomm;
3333 
3334   PetscFunctionBegin;
3335   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3336   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3337   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3338 
3339   /* Get info on mapping */
3340   matis = (Mat_IS*)(mat->data);
3341   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3342   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3343 
3344   /* build local CSR graph of subdomains' connectivity */
3345   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3346   xadj[0] = 0;
3347   xadj[1] = PetscMax(n_neighs-1,0);
3348   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3349   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3350 
3351   if (threshold) {
3352     PetscInt xadj_count = 0;
3353     for (i=1;i<n_neighs;i++) {
3354       if (n_shared[i] > threshold) {
3355         adjncy[xadj_count] = neighs[i];
3356         adjncy_wgt[xadj_count] = n_shared[i];
3357         xadj_count++;
3358       }
3359     }
3360     xadj[1] = xadj_count;
3361   } else {
3362     if (xadj[1]) {
3363       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3364       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3365     }
3366   }
3367   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3368   if (use_square) {
3369     for (i=0;i<xadj[1];i++) {
3370       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3371     }
3372   }
3373   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3374 
3375   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3376 
3377   /*
3378     Restrict work on active processes only.
3379   */
3380   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3381   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3382   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3383   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3384   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3385   if (color) {
3386     ierr = PetscFree(xadj);CHKERRQ(ierr);
3387     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3388     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3389   } else {
3390     PetscInt coarsening_ratio;
3391     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3392     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3393     prank = rank;
3394     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3395     /*
3396     for (i=0;i<size;i++) {
3397       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3398     }
3399     */
3400     for (i=0;i<xadj[1];i++) {
3401       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3402     }
3403     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3404     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3405     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3406 
3407     /* Partition */
3408     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3409     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3410     if (use_vwgt) {
3411       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3412       v_wgt[0] = local_size;
3413       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3414     }
3415     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3416     coarsening_ratio = size/n_subdomains;
3417     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3418     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3419     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3420     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3421 
3422     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3423     if (contiguous) {
3424       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3425     } else {
3426       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3427     }
3428     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3429     /* clean up */
3430     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3431     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3432     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3433     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3434   }
3435   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3436 
3437   /* assemble parallel IS for sends */
3438   i = 1;
3439   if (color) i=0;
3440   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3441 
3442   /* get back IS */
3443   *is_sends = ranks_send_to;
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3448 
3449 #undef __FUNCT__
3450 #define __FUNCT__ "MatISSubassemble"
3451 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3452 {
3453   Mat                    local_mat;
3454   Mat_IS                 *matis;
3455   IS                     is_sends_internal;
3456   PetscInt               rows,cols,new_local_rows;
3457   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3458   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3459   ISLocalToGlobalMapping l2gmap;
3460   PetscInt*              l2gmap_indices;
3461   const PetscInt*        is_indices;
3462   MatType                new_local_type;
3463   /* buffers */
3464   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3465   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3466   PetscInt               *recv_buffer_idxs_local;
3467   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3468   /* MPI */
3469   MPI_Comm               comm,comm_n;
3470   PetscSubcomm           subcomm;
3471   PetscMPIInt            n_sends,n_recvs,commsize;
3472   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3473   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3474   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3475   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3476   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3477   PetscErrorCode         ierr;
3478 
3479   PetscFunctionBegin;
3480   /* TODO: add missing checks */
3481   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3482   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3483   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3484   PetscValidLogicalCollectiveInt(mat,nis,7);
3485   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3486   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3487   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3488   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3489   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3490   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3491   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3492   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3493     PetscInt mrows,mcols,mnrows,mncols;
3494     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3495     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3496     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3497     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3498     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3499     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3500   }
3501   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3502   PetscValidLogicalCollectiveInt(mat,bs,0);
3503   /* prepare IS for sending if not provided */
3504   if (!is_sends) {
3505     PetscBool pcontig = PETSC_TRUE;
3506     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3507     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3508   } else {
3509     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3510     is_sends_internal = is_sends;
3511   }
3512 
3513   /* get pointer of MATIS data */
3514   matis = (Mat_IS*)mat->data;
3515 
3516   /* get comm */
3517   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3518 
3519   /* compute number of sends */
3520   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3521   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3522 
3523   /* compute number of receives */
3524   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3525   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3526   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3527   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3528   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3529   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3530   ierr = PetscFree(iflags);CHKERRQ(ierr);
3531 
3532   /* restrict comm if requested */
3533   subcomm = 0;
3534   destroy_mat = PETSC_FALSE;
3535   if (restrict_comm) {
3536     PetscMPIInt color,subcommsize;
3537 
3538     color = 0;
3539     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3540     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3541     subcommsize = commsize - subcommsize;
3542     /* check if reuse has been requested */
3543     if (reuse == MAT_REUSE_MATRIX) {
3544       if (*mat_n) {
3545         PetscMPIInt subcommsize2;
3546         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3547         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3548         comm_n = PetscObjectComm((PetscObject)*mat_n);
3549       } else {
3550         comm_n = PETSC_COMM_SELF;
3551       }
3552     } else { /* MAT_INITIAL_MATRIX */
3553       PetscMPIInt rank;
3554 
3555       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3556       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3557       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3558       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3559       comm_n = PetscSubcommChild(subcomm);
3560     }
3561     /* flag to destroy *mat_n if not significative */
3562     if (color) destroy_mat = PETSC_TRUE;
3563   } else {
3564     comm_n = comm;
3565   }
3566 
3567   /* prepare send/receive buffers */
3568   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3569   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3570   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3571   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3572   if (nis) {
3573     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3574   }
3575 
3576   /* Get data from local matrices */
3577   if (!isdense) {
3578     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3579     /* TODO: See below some guidelines on how to prepare the local buffers */
3580     /*
3581        send_buffer_vals should contain the raw values of the local matrix
3582        send_buffer_idxs should contain:
3583        - MatType_PRIVATE type
3584        - PetscInt        size_of_l2gmap
3585        - PetscInt        global_row_indices[size_of_l2gmap]
3586        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3587     */
3588   } else {
3589     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3590     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3591     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3592     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3593     send_buffer_idxs[1] = i;
3594     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3595     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3596     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3597     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3598     for (i=0;i<n_sends;i++) {
3599       ilengths_vals[is_indices[i]] = len*len;
3600       ilengths_idxs[is_indices[i]] = len+2;
3601     }
3602   }
3603   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3604   /* additional is (if any) */
3605   if (nis) {
3606     PetscMPIInt psum;
3607     PetscInt j;
3608     for (j=0,psum=0;j<nis;j++) {
3609       PetscInt plen;
3610       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3611       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3612       psum += len+1; /* indices + lenght */
3613     }
3614     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3615     for (j=0,psum=0;j<nis;j++) {
3616       PetscInt plen;
3617       const PetscInt *is_array_idxs;
3618       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3619       send_buffer_idxs_is[psum] = plen;
3620       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3621       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3622       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3623       psum += plen+1; /* indices + lenght */
3624     }
3625     for (i=0;i<n_sends;i++) {
3626       ilengths_idxs_is[is_indices[i]] = psum;
3627     }
3628     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3629   }
3630 
3631   buf_size_idxs = 0;
3632   buf_size_vals = 0;
3633   buf_size_idxs_is = 0;
3634   for (i=0;i<n_recvs;i++) {
3635     buf_size_idxs += (PetscInt)olengths_idxs[i];
3636     buf_size_vals += (PetscInt)olengths_vals[i];
3637     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3638   }
3639   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3640   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3641   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3642 
3643   /* get new tags for clean communications */
3644   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3645   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3646   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3647 
3648   /* allocate for requests */
3649   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3650   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3651   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3652   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3653   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3654   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3655 
3656   /* communications */
3657   ptr_idxs = recv_buffer_idxs;
3658   ptr_vals = recv_buffer_vals;
3659   ptr_idxs_is = recv_buffer_idxs_is;
3660   for (i=0;i<n_recvs;i++) {
3661     source_dest = onodes[i];
3662     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3663     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3664     ptr_idxs += olengths_idxs[i];
3665     ptr_vals += olengths_vals[i];
3666     if (nis) {
3667       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3668       ptr_idxs_is += olengths_idxs_is[i];
3669     }
3670   }
3671   for (i=0;i<n_sends;i++) {
3672     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3673     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3674     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3675     if (nis) {
3676       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3677     }
3678   }
3679   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3680   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3681 
3682   /* assemble new l2g map */
3683   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3684   ptr_idxs = recv_buffer_idxs;
3685   new_local_rows = 0;
3686   for (i=0;i<n_recvs;i++) {
3687     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3688     ptr_idxs += olengths_idxs[i];
3689   }
3690   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3691   ptr_idxs = recv_buffer_idxs;
3692   new_local_rows = 0;
3693   for (i=0;i<n_recvs;i++) {
3694     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3695     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3696     ptr_idxs += olengths_idxs[i];
3697   }
3698   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3699   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3700   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3701 
3702   /* infer new local matrix type from received local matrices type */
3703   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3704   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3705   if (n_recvs) {
3706     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3707     ptr_idxs = recv_buffer_idxs;
3708     for (i=0;i<n_recvs;i++) {
3709       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3710         new_local_type_private = MATAIJ_PRIVATE;
3711         break;
3712       }
3713       ptr_idxs += olengths_idxs[i];
3714     }
3715     switch (new_local_type_private) {
3716       case MATDENSE_PRIVATE:
3717         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3718           new_local_type = MATSEQAIJ;
3719           bs = 1;
3720         } else { /* if I receive only 1 dense matrix */
3721           new_local_type = MATSEQDENSE;
3722           bs = 1;
3723         }
3724         break;
3725       case MATAIJ_PRIVATE:
3726         new_local_type = MATSEQAIJ;
3727         bs = 1;
3728         break;
3729       case MATBAIJ_PRIVATE:
3730         new_local_type = MATSEQBAIJ;
3731         break;
3732       case MATSBAIJ_PRIVATE:
3733         new_local_type = MATSEQSBAIJ;
3734         break;
3735       default:
3736         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3737         break;
3738     }
3739   } else { /* by default, new_local_type is seqdense */
3740     new_local_type = MATSEQDENSE;
3741     bs = 1;
3742   }
3743 
3744   /* create MATIS object if needed */
3745   if (reuse == MAT_INITIAL_MATRIX) {
3746     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3747     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3748   } else {
3749     /* it also destroys the local matrices */
3750     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3751   }
3752   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3753   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3754 
3755   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3756 
3757   /* Global to local map of received indices */
3758   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3759   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3760   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3761 
3762   /* restore attributes -> type of incoming data and its size */
3763   buf_size_idxs = 0;
3764   for (i=0;i<n_recvs;i++) {
3765     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3766     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3767     buf_size_idxs += (PetscInt)olengths_idxs[i];
3768   }
3769   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3770 
3771   /* set preallocation */
3772   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3773   if (!newisdense) {
3774     PetscInt *new_local_nnz=0;
3775 
3776     ptr_vals = recv_buffer_vals;
3777     ptr_idxs = recv_buffer_idxs_local;
3778     if (n_recvs) {
3779       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3780     }
3781     for (i=0;i<n_recvs;i++) {
3782       PetscInt j;
3783       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3784         for (j=0;j<*(ptr_idxs+1);j++) {
3785           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3786         }
3787       } else {
3788         /* TODO */
3789       }
3790       ptr_idxs += olengths_idxs[i];
3791     }
3792     if (new_local_nnz) {
3793       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3794       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3795       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3796       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3797       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3798       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3799     } else {
3800       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3801     }
3802     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3803   } else {
3804     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3805   }
3806 
3807   /* set values */
3808   ptr_vals = recv_buffer_vals;
3809   ptr_idxs = recv_buffer_idxs_local;
3810   for (i=0;i<n_recvs;i++) {
3811     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3812       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3813       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3814       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3815       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3816       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3817     } else {
3818       /* TODO */
3819     }
3820     ptr_idxs += olengths_idxs[i];
3821     ptr_vals += olengths_vals[i];
3822   }
3823   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3824   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3825   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3826   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3827   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3828   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3829 
3830 #if 0
3831   if (!restrict_comm) { /* check */
3832     Vec       lvec,rvec;
3833     PetscReal infty_error;
3834 
3835     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3836     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3837     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3838     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3839     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3840     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3841     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3842     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3843     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3844   }
3845 #endif
3846 
3847   /* assemble new additional is (if any) */
3848   if (nis) {
3849     PetscInt **temp_idxs,*count_is,j,psum;
3850 
3851     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3852     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3853     ptr_idxs = recv_buffer_idxs_is;
3854     psum = 0;
3855     for (i=0;i<n_recvs;i++) {
3856       for (j=0;j<nis;j++) {
3857         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3858         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3859         psum += plen;
3860         ptr_idxs += plen+1; /* shift pointer to received data */
3861       }
3862     }
3863     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3864     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3865     for (i=1;i<nis;i++) {
3866       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3867     }
3868     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3869     ptr_idxs = recv_buffer_idxs_is;
3870     for (i=0;i<n_recvs;i++) {
3871       for (j=0;j<nis;j++) {
3872         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3873         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3874         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3875         ptr_idxs += plen+1; /* shift pointer to received data */
3876       }
3877     }
3878     for (i=0;i<nis;i++) {
3879       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3880       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3881       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3882     }
3883     ierr = PetscFree(count_is);CHKERRQ(ierr);
3884     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3885     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3886   }
3887   /* free workspace */
3888   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3889   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3890   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3891   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3892   if (isdense) {
3893     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3894     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3895   } else {
3896     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3897   }
3898   if (nis) {
3899     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3900     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3901   }
3902   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3903   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3904   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3905   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3906   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3907   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3908   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3909   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3910   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3911   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3912   ierr = PetscFree(onodes);CHKERRQ(ierr);
3913   if (nis) {
3914     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3915     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3916     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3917   }
3918   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3919   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3920     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3921     for (i=0;i<nis;i++) {
3922       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3923     }
3924   }
3925   PetscFunctionReturn(0);
3926 }
3927 
3928 /* temporary hack into ksp private data structure */
3929 #include <petsc/private/kspimpl.h>
3930 
3931 #undef __FUNCT__
3932 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3933 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3934 {
3935   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3936   PC_IS                  *pcis = (PC_IS*)pc->data;
3937   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3938   MatNullSpace           CoarseNullSpace=NULL;
3939   ISLocalToGlobalMapping coarse_islg;
3940   IS                     coarse_is,*isarray;
3941   PetscInt               i,im_active=-1,active_procs=-1;
3942   PetscInt               nis,nisdofs,nisneu;
3943   PC                     pc_temp;
3944   PCType                 coarse_pc_type;
3945   KSPType                coarse_ksp_type;
3946   PetscBool              multilevel_requested,multilevel_allowed;
3947   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3948   Mat                    t_coarse_mat_is;
3949   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3950   PetscMPIInt            all_procs;
3951   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3952   PetscBool              compute_vecs = PETSC_FALSE;
3953   PetscScalar            *array;
3954   PetscErrorCode         ierr;
3955 
3956   PetscFunctionBegin;
3957   /* Assign global numbering to coarse dofs */
3958   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3959     PetscInt ocoarse_size;
3960     compute_vecs = PETSC_TRUE;
3961     ocoarse_size = pcbddc->coarse_size;
3962     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3963     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3964     /* see if we can avoid some work */
3965     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3966       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3967         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3968         coarse_reuse = PETSC_FALSE;
3969       } else { /* we can safely reuse already computed coarse matrix */
3970         coarse_reuse = PETSC_TRUE;
3971       }
3972     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3973       coarse_reuse = PETSC_FALSE;
3974     }
3975     /* reset any subassembling information */
3976     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3977     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3978   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3979     coarse_reuse = PETSC_TRUE;
3980   }
3981 
3982   /* count "active" (i.e. with positive local size) and "void" processes */
3983   im_active = !!(pcis->n);
3984   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3985   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3986   void_procs = all_procs-active_procs;
3987   csin_type_simple = PETSC_TRUE;
3988   redist = PETSC_FALSE;
3989   if (pcbddc->current_level && void_procs) {
3990     csin_ml = PETSC_TRUE;
3991     ncoarse_ml = void_procs;
3992     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3993     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3994       csin_ds = PETSC_TRUE;
3995       ncoarse_ds = pcbddc->redistribute_coarse;
3996       redist = PETSC_TRUE;
3997     } else {
3998       csin_ds = PETSC_TRUE;
3999       ncoarse_ds = active_procs;
4000       redist = PETSC_TRUE;
4001     }
4002   } else {
4003     csin_ml = PETSC_FALSE;
4004     ncoarse_ml = all_procs;
4005     if (void_procs) {
4006       csin_ds = PETSC_TRUE;
4007       ncoarse_ds = void_procs;
4008       csin_type_simple = PETSC_FALSE;
4009     } else {
4010       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4011         csin_ds = PETSC_TRUE;
4012         ncoarse_ds = pcbddc->redistribute_coarse;
4013         redist = PETSC_TRUE;
4014       } else {
4015         csin_ds = PETSC_FALSE;
4016         ncoarse_ds = all_procs;
4017       }
4018     }
4019   }
4020 
4021   /*
4022     test if we can go multilevel: three conditions must be satisfied:
4023     - we have not exceeded the number of levels requested
4024     - we can actually subassemble the active processes
4025     - we can find a suitable number of MPI processes where we can place the subassembled problem
4026   */
4027   multilevel_allowed = PETSC_FALSE;
4028   multilevel_requested = PETSC_FALSE;
4029   if (pcbddc->current_level < pcbddc->max_levels) {
4030     multilevel_requested = PETSC_TRUE;
4031     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4032       multilevel_allowed = PETSC_FALSE;
4033     } else {
4034       multilevel_allowed = PETSC_TRUE;
4035     }
4036   }
4037   /* determine number of process partecipating to coarse solver */
4038   if (multilevel_allowed) {
4039     ncoarse = ncoarse_ml;
4040     csin = csin_ml;
4041     redist = PETSC_FALSE;
4042   } else {
4043     ncoarse = ncoarse_ds;
4044     csin = csin_ds;
4045   }
4046 
4047   /* creates temporary l2gmap and IS for coarse indexes */
4048   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4049   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4050 
4051   /* creates temporary MATIS object for coarse matrix */
4052   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4053   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4054   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4055   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4056 #if 0
4057   {
4058     PetscViewer viewer;
4059     char filename[256];
4060     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4061     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4062     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4063     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4064     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4065   }
4066 #endif
4067   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4068   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4069   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4070   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4071   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4072 
4073   /* compute dofs splitting and neumann boundaries for coarse dofs */
4074   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4075     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4076     const PetscInt         *idxs;
4077     ISLocalToGlobalMapping tmap;
4078 
4079     /* create map between primal indices (in local representative ordering) and local primal numbering */
4080     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4081     /* allocate space for temporary storage */
4082     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4083     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4084     /* allocate for IS array */
4085     nisdofs = pcbddc->n_ISForDofsLocal;
4086     nisneu = !!pcbddc->NeumannBoundariesLocal;
4087     nis = nisdofs + nisneu;
4088     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4089     /* dofs splitting */
4090     for (i=0;i<nisdofs;i++) {
4091       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4092       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4093       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4094       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4095       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4096       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4097       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4098       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4099     }
4100     /* neumann boundaries */
4101     if (pcbddc->NeumannBoundariesLocal) {
4102       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4103       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4104       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4105       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4106       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4107       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4108       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4109       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4110     }
4111     /* free memory */
4112     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4113     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4114     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4115   } else {
4116     nis = 0;
4117     nisdofs = 0;
4118     nisneu = 0;
4119     isarray = NULL;
4120   }
4121   /* destroy no longer needed map */
4122   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4123 
4124   /* restrict on coarse candidates (if needed) */
4125   coarse_mat_is = NULL;
4126   if (csin) {
4127     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4128       if (redist) {
4129         PetscMPIInt rank;
4130         PetscInt    spc,n_spc_p1,dest[1],destsize;
4131 
4132         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4133         spc = active_procs/ncoarse;
4134         n_spc_p1 = active_procs%ncoarse;
4135         if (im_active) {
4136           destsize = 1;
4137           if (rank > n_spc_p1*(spc+1)-1) {
4138             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4139           } else {
4140             dest[0] = rank/(spc+1);
4141           }
4142         } else {
4143           destsize = 0;
4144         }
4145         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4146       } else if (csin_type_simple) {
4147         PetscMPIInt rank;
4148         PetscInt    issize,isidx;
4149 
4150         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4151         if (im_active) {
4152           issize = 1;
4153           isidx = (PetscInt)rank;
4154         } else {
4155           issize = 0;
4156           isidx = -1;
4157         }
4158         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4159       } else { /* get a suitable subassembling pattern from MATIS code */
4160         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4161       }
4162 
4163       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4164       if (!redist || ncoarse <= void_procs) {
4165         PetscInt ncoarse_cand,tissize,*nisindices;
4166         PetscInt *coarse_candidates;
4167         const PetscInt* tisindices;
4168 
4169         /* get coarse candidates' ranks in pc communicator */
4170         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4171         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4172         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4173           if (!coarse_candidates[i]) {
4174             coarse_candidates[ncoarse_cand++]=i;
4175           }
4176         }
4177         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4178 
4179 
4180         if (pcbddc->dbg_flag) {
4181           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4182           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4183           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4184           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4185           for (i=0;i<ncoarse_cand;i++) {
4186             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4187           }
4188           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4189           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4190         }
4191         /* shift the pattern on coarse candidates */
4192         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4193         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4194         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4195         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4196         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4197         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4198         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4199       }
4200       if (pcbddc->dbg_flag) {
4201         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4202         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4203         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4204         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4205       }
4206     }
4207     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4208     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4209   } else {
4210     if (pcbddc->dbg_flag) {
4211       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4212       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4213       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4214     }
4215     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4216     coarse_mat_is = t_coarse_mat_is;
4217   }
4218 
4219   /* create local to global scatters for coarse problem */
4220   if (compute_vecs) {
4221     PetscInt lrows;
4222     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4223     if (coarse_mat_is) {
4224       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4225     } else {
4226       lrows = 0;
4227     }
4228     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4229     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4230     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4231     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4232     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4233   }
4234   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4235   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4236 
4237   /* set defaults for coarse KSP and PC */
4238   if (multilevel_allowed) {
4239     coarse_ksp_type = KSPRICHARDSON;
4240     coarse_pc_type = PCBDDC;
4241   } else {
4242     coarse_ksp_type = KSPPREONLY;
4243     coarse_pc_type = PCREDUNDANT;
4244   }
4245 
4246   /* print some info if requested */
4247   if (pcbddc->dbg_flag) {
4248     if (!multilevel_allowed) {
4249       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4250       if (multilevel_requested) {
4251         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4252       } else if (pcbddc->max_levels) {
4253         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4254       }
4255       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4256     }
4257   }
4258 
4259   /* create the coarse KSP object only once with defaults */
4260   if (coarse_mat_is) {
4261     MatReuse coarse_mat_reuse;
4262     PetscViewer dbg_viewer = NULL;
4263     if (pcbddc->dbg_flag) {
4264       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4265       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4266     }
4267     if (!pcbddc->coarse_ksp) {
4268       char prefix[256],str_level[16];
4269       size_t len;
4270       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4271       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4272       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4273       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4274       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4275       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4276       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4277       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4278       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4279       /* prefix */
4280       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4281       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4282       if (!pcbddc->current_level) {
4283         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4284         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4285       } else {
4286         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4287         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4288         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4289         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4290         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4291         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4292       }
4293       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4294       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4295       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4296       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4297       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4298       /* allow user customization */
4299       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4300     }
4301     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4302     if (nisdofs) {
4303       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4304       for (i=0;i<nisdofs;i++) {
4305         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4306       }
4307     }
4308     if (nisneu) {
4309       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4310       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4311     }
4312 
4313     /* get some info after set from options */
4314     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4315     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4316     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4317     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4318     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4319       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4320       isbddc = PETSC_FALSE;
4321     }
4322     if (isredundant) {
4323       KSP inner_ksp;
4324       PC inner_pc;
4325       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4326       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4327       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4328     }
4329 
4330     /* assemble coarse matrix */
4331     if (coarse_reuse) {
4332       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4333       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4334       coarse_mat_reuse = MAT_REUSE_MATRIX;
4335     } else {
4336       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4337     }
4338     if (isbddc || isnn) {
4339       if (pcbddc->coarsening_ratio > 1) {
4340         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4341           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4342           if (pcbddc->dbg_flag) {
4343             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4344             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4345             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4346             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4347           }
4348         }
4349         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4350       } else {
4351         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4352         coarse_mat = coarse_mat_is;
4353       }
4354     } else {
4355       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4356     }
4357     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4358 
4359     /* propagate symmetry info of coarse matrix */
4360     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4361     if (pc->pmat->symmetric_set) {
4362       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4363     }
4364     if (pc->pmat->hermitian_set) {
4365       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4366     }
4367     if (pc->pmat->spd_set) {
4368       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4369     }
4370     /* set operators */
4371     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4372     if (pcbddc->dbg_flag) {
4373       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4374     }
4375   } else { /* processes non partecipating to coarse solver (if any) */
4376     coarse_mat = 0;
4377   }
4378   ierr = PetscFree(isarray);CHKERRQ(ierr);
4379 #if 0
4380   {
4381     PetscViewer viewer;
4382     char filename[256];
4383     sprintf(filename,"coarse_mat.m");
4384     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4385     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4386     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4387     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4388   }
4389 #endif
4390 
4391   /* Compute coarse null space (special handling by BDDC only) */
4392   if (pcbddc->NullSpace) {
4393     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4394   }
4395 
4396   if (pcbddc->coarse_ksp) {
4397     Vec crhs,csol;
4398     PetscBool ispreonly;
4399     if (CoarseNullSpace) {
4400       if (isbddc) {
4401         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4402       } else {
4403         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4404       }
4405     }
4406     /* setup coarse ksp */
4407     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4408     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4409     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4410     /* hack */
4411     if (!csol) {
4412       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4413     }
4414     if (!crhs) {
4415       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4416     }
4417     /* Check coarse problem if in debug mode or if solving with an iterative method */
4418     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4419     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4420       KSP       check_ksp;
4421       KSPType   check_ksp_type;
4422       PC        check_pc;
4423       Vec       check_vec,coarse_vec;
4424       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4425       PetscInt  its;
4426       PetscBool compute_eigs;
4427       PetscReal *eigs_r,*eigs_c;
4428       PetscInt  neigs;
4429       const char *prefix;
4430 
4431       /* Create ksp object suitable for estimation of extreme eigenvalues */
4432       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4433       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4434       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4435       if (ispreonly) {
4436         check_ksp_type = KSPPREONLY;
4437         compute_eigs = PETSC_FALSE;
4438       } else {
4439         check_ksp_type = KSPGMRES;
4440         compute_eigs = PETSC_TRUE;
4441       }
4442       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4443       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4444       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4445       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4446       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4447       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4448       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4449       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4450       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4451       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4452       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4453       /* create random vec */
4454       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4455       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4456       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4457       if (CoarseNullSpace) {
4458         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4459       }
4460       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4461       /* solve coarse problem */
4462       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4463       if (CoarseNullSpace) {
4464         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4465       }
4466       /* set eigenvalue estimation if preonly has not been requested */
4467       if (compute_eigs) {
4468         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4469         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4470         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4471         lambda_max = eigs_r[neigs-1];
4472         lambda_min = eigs_r[0];
4473         if (pcbddc->use_coarse_estimates) {
4474           if (lambda_max>lambda_min) {
4475             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4476             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4477           }
4478         }
4479       }
4480 
4481       /* check coarse problem residual error */
4482       if (pcbddc->dbg_flag) {
4483         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4484         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4485         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4486         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4487         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4488         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4489         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4490         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4491         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4492         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4493         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4494         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4495         if (compute_eigs) {
4496           PetscReal lambda_max_s,lambda_min_s;
4497           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4498           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4499           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4500           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4501           for (i=0;i<neigs;i++) {
4502             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4503           }
4504         }
4505         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4506         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4507       }
4508       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4509       if (compute_eigs) {
4510         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4511         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4512       }
4513     }
4514   }
4515   /* print additional info */
4516   if (pcbddc->dbg_flag) {
4517     /* waits until all processes reaches this point */
4518     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4519     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4520     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4521   }
4522 
4523   /* free memory */
4524   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4525   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4526   PetscFunctionReturn(0);
4527 }
4528 
4529 #undef __FUNCT__
4530 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4531 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4532 {
4533   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4534   PC_IS*         pcis = (PC_IS*)pc->data;
4535   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4536   PetscInt       i,local_size,coarse_size=0;
4537   PetscInt       *local_primal_indices=NULL;
4538   PetscInt       *t_local_primal_indices=NULL;
4539   PetscErrorCode ierr;
4540 
4541   PetscFunctionBegin;
4542   /* Compute global number of coarse dofs */
4543   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4545   }
4546   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4547   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4548   local_size = 0;
4549   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4550     PetscInt j;
4551     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4552   }
4553   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4554   if (local_size != pcbddc->local_primal_size) {
4555     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4556   }
4557 
4558   /* check numbering */
4559   if (pcbddc->dbg_flag) {
4560     PetscScalar coarsesum,*array;
4561     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4562 
4563     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4564     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4565     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4566     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4567     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4568     for (i=0;i<pcbddc->local_primal_size;i++) {
4569       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4570     }
4571     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4572     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4573     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4574     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4575     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4576     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4577     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4578     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4579     for (i=0;i<pcis->n;i++) {
4580       if (array[i] == 1.0) {
4581         set_error = PETSC_TRUE;
4582         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4583       }
4584     }
4585     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4586     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4587     for (i=0;i<pcis->n;i++) {
4588       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4589     }
4590     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4591     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4592     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4593     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4594     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4596     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4597       PetscInt *gidxs;
4598 
4599       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4600       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4601       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4602       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4603       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4604       for (i=0;i<pcbddc->local_primal_size;i++) {
4605         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4606       }
4607       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4608       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4609     }
4610     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4611     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4612   }
4613   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4614   /* get back data */
4615   *coarse_size_n = coarse_size;
4616   *local_primal_indices_n = local_primal_indices;
4617   PetscFunctionReturn(0);
4618 }
4619 
4620 #undef __FUNCT__
4621 #define __FUNCT__ "PCBDDCGlobalToLocal"
4622 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4623 {
4624   IS             localis_t;
4625   PetscInt       i,lsize,*idxs,n;
4626   PetscScalar    *vals;
4627   PetscErrorCode ierr;
4628 
4629   PetscFunctionBegin;
4630   /* get indices in local ordering exploiting local to global map */
4631   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4632   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4633   for (i=0;i<lsize;i++) vals[i] = 1.0;
4634   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4635   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4636   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4637   if (idxs) { /* multilevel guard */
4638     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4639   }
4640   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4641   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4642   ierr = PetscFree(vals);CHKERRQ(ierr);
4643   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4644   /* now compute set in local ordering */
4645   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4646   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4647   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4648   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4649   for (i=0,lsize=0;i<n;i++) {
4650     if (PetscRealPart(vals[i]) > 0.5) {
4651       lsize++;
4652     }
4653   }
4654   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4655   for (i=0,lsize=0;i<n;i++) {
4656     if (PetscRealPart(vals[i]) > 0.5) {
4657       idxs[lsize++] = i;
4658     }
4659   }
4660   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4661   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4662   *localis = localis_t;
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4667 #undef __FUNCT__
4668 #define __FUNCT__ "PCBDDCMatMult_Private"
4669 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4670 {
4671   PCBDDCChange_ctx change_ctx;
4672   PetscErrorCode   ierr;
4673 
4674   PetscFunctionBegin;
4675   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4676   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4677   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4678   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4679   PetscFunctionReturn(0);
4680 }
4681 
4682 #undef __FUNCT__
4683 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4684 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4685 {
4686   PCBDDCChange_ctx change_ctx;
4687   PetscErrorCode   ierr;
4688 
4689   PetscFunctionBegin;
4690   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4691   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4692   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4693   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4694   PetscFunctionReturn(0);
4695 }
4696 
4697 #undef __FUNCT__
4698 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4699 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4700 {
4701   PC_IS               *pcis=(PC_IS*)pc->data;
4702   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4703   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4704   Mat                 S_j;
4705   PetscInt            *used_xadj,*used_adjncy;
4706   PetscBool           free_used_adj;
4707   PetscErrorCode      ierr;
4708 
4709   PetscFunctionBegin;
4710   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4711   free_used_adj = PETSC_FALSE;
4712   if (pcbddc->sub_schurs_layers == -1) {
4713     used_xadj = NULL;
4714     used_adjncy = NULL;
4715   } else {
4716     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4717       used_xadj = pcbddc->mat_graph->xadj;
4718       used_adjncy = pcbddc->mat_graph->adjncy;
4719     } else if (pcbddc->computed_rowadj) {
4720       used_xadj = pcbddc->mat_graph->xadj;
4721       used_adjncy = pcbddc->mat_graph->adjncy;
4722     } else {
4723       PetscBool      flg_row=PETSC_FALSE;
4724       const PetscInt *xadj,*adjncy;
4725       PetscInt       nvtxs;
4726 
4727       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4728       if (flg_row) {
4729         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4730         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4731         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4732         free_used_adj = PETSC_TRUE;
4733       } else {
4734         pcbddc->sub_schurs_layers = -1;
4735         used_xadj = NULL;
4736         used_adjncy = NULL;
4737       }
4738       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4739     }
4740   }
4741 
4742   /* setup sub_schurs data */
4743   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4744   if (!sub_schurs->use_mumps) {
4745     /* pcbddc->ksp_D up to date only if not using MUMPS */
4746     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4747     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr);
4748   } else {
4749     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4750     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr);
4751   }
4752   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4753 
4754   /* free adjacency */
4755   if (free_used_adj) {
4756     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4757   }
4758   PetscFunctionReturn(0);
4759 }
4760 
4761 #undef __FUNCT__
4762 #define __FUNCT__ "PCBDDCInitSubSchurs"
4763 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4764 {
4765   PC_IS               *pcis=(PC_IS*)pc->data;
4766   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4767   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4768   PCBDDCGraph         graph;
4769   PetscErrorCode      ierr;
4770 
4771   PetscFunctionBegin;
4772   /* attach interface graph for determining subsets */
4773   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4774     IS       verticesIS,verticescomm;
4775     PetscInt vsize,*idxs;
4776 
4777     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4778     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4779     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4780     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4781     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4782     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4783     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4784     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4785     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4786     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4787     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4788 /*
4789     if (pcbddc->dbg_flag) {
4790       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4791     }
4792 */
4793   } else {
4794     graph = pcbddc->mat_graph;
4795   }
4796 
4797   /* sub_schurs init */
4798   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4799 
4800   /* free graph struct */
4801   if (pcbddc->sub_schurs_rebuild) {
4802     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4803   }
4804   PetscFunctionReturn(0);
4805 }
4806